Passed
Push — master ( f63037...3e5c55 )
by Lars
03:29
created

UTF8   F

Complexity

Total Complexity 1544

Size/Duplication

Total Lines 11188
Duplicated Lines 0 %

Test Coverage

Coverage 83.91%

Importance

Changes 0
Metric Value
eloc 3804
dl 0
loc 11188
ccs 2784
cts 3318
cp 0.8391
rs 0.8
c 0
b 0
f 0
wmc 1544

288 Methods

Rating   Name   Duplication   Size   Complexity  
A chr_to_decimal() 0 30 6
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 22 5
A count_chars() 0 3 1
A ctype_loaded() 0 3 1
F chr() 0 88 21
A apply_padding() 0 19 5
A chr_to_int() 0 3 1
A decode_mimeheader() 0 19 6
A chunk_split() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 3 1
A decimal_to_chr() 0 3 1
A between() 0 24 5
A codepoints() 0 29 4
A chr_map() 0 5 1
A cleanup() 0 28 2
A char_at() 0 3 1
A chars() 0 3 1
A checkForSupport() 0 42 4
A chr_to_hex() 0 11 3
A collapse_whitespace() 0 4 1
A access() 0 11 3
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A chr_size_list() 0 22 4
A file_has_bom() 0 8 2
A filter_input() 0 9 2
C filter() 0 53 13
A getData() 0 9 2
A fix_utf8() 0 20 4
A first_char() 0 12 3
D getCharDirection() 0 114 119
A filter_var_array() 0 9 2
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A filter_var() 0 9 2
A fix_simple_utf8() 0 20 4
A fixStrCaseHelper() 0 34 5
A filter_input_array() 0 9 2
A getSupportInfo() 0 15 4
A encode_mimeheader() 0 30 5
D extract_text() 0 109 20
F encode() 0 132 39
A get_unique_string() 0 15 2
A is_bom() 0 9 3
A is_hexadecimal() 0 3 1
A has_uppercase() 0 3 1
A isBinary() 0 3 1
D is_utf8() 0 148 32
A html_escape() 0 6 1
C get_file_type() 0 87 12
C is_utf16() 0 63 16
A isHtml() 0 3 1
A isBase64() 0 3 1
A is_html() 0 12 2
A html_decode() 0 3 1
A isUtf32() 0 3 1
A is_alpha() 0 3 1
A get_random_string() 0 25 4
A isUtf8() 0 3 1
A is_serialized() 0 11 3
A is_uppercase() 0 3 1
A is_ascii() 0 7 2
A is_blank() 0 3 1
A htmlspecialchars() 0 7 3
C is_binary() 0 48 12
A intlChar_loaded() 0 3 1
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 7 2
A hasBom() 0 3 1
A iconv_loaded() 0 3 2
A isAscii() 0 3 1
A is_empty() 0 3 1
A isUtf16() 0 3 1
C is_utf32() 0 63 16
A is_alphanumeric() 0 3 1
A json_decode() 0 16 3
A is_json() 0 24 6
A int_to_hex() 0 7 2
A has_lowercase() 0 3 1
A json_encode() 0 16 3
A is_base64() 0 13 4
A hex_to_int() 0 14 3
A htmlentities() 0 19 3
A hex_to_chr() 0 3 1
A isJson() 0 3 1
B file_get_contents() 0 58 10
B html_encode() 0 39 7
D html_entity_decode() 0 87 18
A str_substr_after_first_separator() 0 20 4
A max() 0 14 3
A parse_str() 0 20 5
A str_contains() 0 16 6
A str_isubstr_last() 0 16 4
A str_replace_beginning() 0 6 1
A remove_left() 0 13 2
A str_offset_exists() 0 10 2
A str_iends_with() 0 11 4
A max_chr_width() 0 8 2
A ltrim() 0 15 4
A remove_html() 0 3 1
A str_longest_common_suffix() 0 16 3
A lcword() 0 9 1
A str_pad_both() 0 5 1
A str_index_last() 0 7 1
A str_substr_last() 0 16 4
A mbstring_loaded() 0 9 3
A str_limit() 0 15 4
C normalize_encoding() 0 132 14
B rxClass() 0 39 8
A str_ensure_right() 0 7 2
A normalize_whitespace() 0 31 6
A str_starts_with() 0 11 4
A str_humanize() 0 15 1
A str_index_first() 0 7 1
A rtrim() 0 15 4
B str_longest_common_substring() 0 40 8
A regex_replace() 0 18 3
A str_iindex_first() 0 7 1
A str_isubstr_before_first_separator() 0 16 4
A replace_all() 0 7 2
A removeBOM() 0 3 1
A str_matches_pattern() 0 7 2
A str_replace_first() 0 8 2
A str_pad_right() 0 3 1
B str_pad() 0 56 11
A str_ireplace() 0 17 3
A str_replace_ending() 0 6 1
B str_contains_all() 0 22 7
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 20 4
B range() 0 46 11
A rawurldecode() 0 28 4
B str_capitalize_name_helper() 0 78 10
A normalize_msword() 0 20 4
C str_detect_encoding() 0 116 14
A spaces_to_tabs() 0 3 1
A str_istarts_with() 0 11 4
A str_replace() 0 3 1
A replace() 0 7 2
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 16 4
A lowerCaseFirst() 0 9 1
A str_split() 0 3 1
A str_ends_with_any() 0 13 4
A remove_right() 0 12 2
A remove_html_breaks() 0 3 1
A showSupport() 0 11 3
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 19 6
A str_replace_last() 0 8 2
A str_iindex_last() 0 7 1
A str_substr_before_last_separator() 0 20 4
A lcfirst() 0 16 2
A str_ends_with() 0 7 3
A reduce_string_array() 0 26 6
A str_longest_common_prefix() 0 16 3
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 3 1
A str_substr_first() 0 21 4
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 20 4
A str_isubstr_after_first_separator() 0 20 4
A str_sort() 0 15 3
A str_offset_get() 0 14 4
A str_capitalize_name() 0 8 1
A str_limit_after_word() 0 30 6
B lcwords() 0 45 8
A normalizeEncoding() 0 3 1
A str_dasherize() 0 3 1
A str_ensure_left() 0 7 2
C ord() 0 56 15
B str_split_pattern() 0 31 7
A str_isubstr_first() 0 21 4
A str_last_char() 0 9 3
A str_substr_before_first_separator() 0 20 4
A str_surround() 0 3 1
A str_insert() 0 12 2
B replace_diamond_question_mark() 0 45 7
A str_delimit() 0 9 1
A min() 0 14 3
A str_starts_with_any() 0 17 5
A str_istarts_with_any() 0 17 5
A str_contains_any() 0 13 4
A remove_duplicates() 0 14 4
A str_slice() 0 13 5
A str_shuffle() 0 12 2
A str_camelize() 0 26 2
B str_to_lines() 0 27 7
B substr_in_byte() 0 26 7
A strnatcasecmp() 0 5 1
A substr_left() 0 19 5
F strlen() 0 99 21
C stripos() 0 57 15
F strrchr() 0 92 20
A to_filename() 0 24 2
F utf8_decode() 0 75 16
C wordwrap() 0 53 13
A ucfirst() 0 22 3
A toUTF8() 0 3 1
A string() 0 10 1
B str_titleize_for_humans() 0 127 5
C substr_count_in_byte() 0 51 15
A strchr() 0 3 1
A strichr() 0 3 1
A strlen_in_byte() 0 16 4
A titlecase() 0 7 3
B strtolower() 0 48 10
A urldecode() 0 28 4
A strrev() 0 13 3
F substr_replace() 0 107 26
A strstr_in_byte() 0 16 5
A str_titleize() 0 38 5
A ws() 0 3 1
A toLatin1() 0 3 1
B ucwords() 0 56 11
A to_boolean() 0 33 4
D stristr() 0 63 18
A strncasecmp() 0 6 1
B strwidth() 0 40 8
A trim() 0 15 4
A str_upper_camelize() 0 3 1
A substr_compare() 0 25 6
F substr_count() 0 74 19
A strnatcmp() 0 3 2
A to_latin1() 0 3 1
A string_has_bom() 0 9 3
B strtr() 0 30 7
B strspn() 0 15 7
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
A utf8_encode() 0 32 6
A substr_iright() 0 19 5
A to_iso8859() 0 16 4
A words_limit() 0 21 5
A strip_tags() 0 11 3
A str_truncate_safe() 0 29 6
A substr_right() 0 19 5
A strrpos_in_byte() 0 16 5
F strrpos() 0 116 27
A strtocasefold() 0 20 3
A tabs_to_spaces() 0 3 1
A str_truncate() 0 20 3
F strripos() 0 91 20
A strpos_in_byte() 0 16 5
F to_ascii() 0 150 28
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 17 6
A str_snakeize() 0 38 2
F to_utf8() 0 93 32
A ucword() 0 3 1
A str_underscored() 0 3 1
A strip_whitespace() 0 7 2
A toAscii() 0 3 1
A str_upper_first() 0 3 1
A swapCase() 0 17 5
A substr_ileft() 0 19 5
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
A strtonatfold() 0 4 1
B strcspn() 0 23 7
F strstr() 0 87 19
F substr() 0 140 31
A wordwrap_per_line() 0 15 3
A strncmp() 0 6 1
A utf8_fix_win1252_chars() 0 3 1
A to_utf8_convert_helper() 0 27 5
B strtoupper() 0 48 10
B strrichr() 0 49 11
D split() 0 122 27
F strpos() 0 134 31
A strcmp() 0 6 2
A str_word_count() 0 29 5
A strripos_in_byte() 0 16 5
A str_to_binary() 0 5 1
B symfony_polyfill_used() 0 16 7
B str_to_words() 0 33 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 17
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 17
    static $CHAR_CACHE = [];
516
517 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 17
        $encoding !== 'UTF-8'
527
        &&
528 17
        $encoding !== 'ISO-8859-1'
529
        &&
530 17
        $encoding !== 'WINDOWS-1252'
531
        &&
532 17
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 17
    $cacheKey = $code_point . $encoding;
538 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 16
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 10
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 10
      $chr = self::$CHR[$code_point];
552
553 10
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 10
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642
      return \array_map(
643
          function ($data) {
644
            return UTF8::strlen_in_byte($data);
645
          },
646
          $strSplit
647
      );
648
    }
649
650 4
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 112
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 112
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 112
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 112
    if ($replace_diamond_question_mark === true) {
788 62
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 112
    if ($remove_invisible_characters === true) {
792 112
      $str = self::remove_invisible_characters($str);
793
    }
794
795 112
    if ($normalize_whitespace === true) {
796 66
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 112
    if ($normalize_msword === true) {
800 34
      $str = self::normalize_msword($str);
801
    }
802
803 112
    if ($remove_bom === true) {
804 64
      $str = self::remove_bom($str);
805
    }
806
807 112
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string $str                         <p>The input string</p>
1000
   * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1004
   *                                            A empty string will trigger the autodetect anyway.</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019 2
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ($toEncoding === 'JSON') {
1031 1
      return self::json_encode($str);
1032
    }
1033 30
    if ($fromEncoding === 'JSON') {
1034 1
      $str = self::json_decode($str);
1035 1
      $fromEncoding = '';
1036
    }
1037
1038 30
    if ($toEncoding === 'BASE64') {
1039 2
      return base64_encode($str);
1040
    }
1041 30
    if ($fromEncoding === 'BASE64') {
1042 2
      $str = base64_decode($str);
1043 2
      $fromEncoding = '';
1044
    }
1045
1046 30
    if ($toEncoding === 'HTML-ENTITIES') {
1047 2
      return self::html_encode($str, true, 'UTF-8');
1048
    }
1049 30
    if ($fromEncoding === 'HTML-ENTITIES') {
1050 2
      $str = self::html_decode($str, ENT_COMPAT, 'UTF-8');
1051 2
      $fromEncoding = '';
1052
    }
1053
1054 30
    $fromEncodingDetected = false;
1055
    if (
1056 30
        $autodetectFromEncoding === true
1057
        ||
1058 30
        !$fromEncoding
1059
    ) {
1060 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1061
    }
1062
1063
    // DEBUG
1064
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1065
1066 30
    if ($fromEncodingDetected !== false) {
1067 25
      $fromEncoding = $fromEncodingDetected;
1068 7
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1069
      // fallback for the "autodetect"-mode
1070 7
      return self::to_utf8($str);
1071
    }
1072
1073
    if (
1074 25
        !$fromEncoding
1075
        ||
1076 25
        $fromEncoding === $toEncoding
1077
    ) {
1078 15
      return $str;
1079
    }
1080
1081
    if (
1082 19
        $toEncoding === 'UTF-8'
1083
        &&
1084
        (
1085 17
            $fromEncoding === 'WINDOWS-1252'
1086
            ||
1087 19
            $fromEncoding === 'ISO-8859-1'
1088
        )
1089
    ) {
1090 14
      return self::to_utf8($str);
1091
    }
1092
1093
    if (
1094 11
        $toEncoding === 'ISO-8859-1'
1095
        &&
1096
        (
1097 6
            $fromEncoding === 'WINDOWS-1252'
1098
            ||
1099 11
            $fromEncoding === 'UTF-8'
1100
        )
1101
    ) {
1102 6
      return self::to_iso8859($str);
1103
    }
1104
1105
    if (
1106 9
        $toEncoding !== 'UTF-8'
1107
        &&
1108 9
        $toEncoding !== 'ISO-8859-1'
1109
        &&
1110 9
        $toEncoding !== 'WINDOWS-1252'
1111
        &&
1112 9
        self::$SUPPORT['mbstring'] === false
1113
    ) {
1114
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1115
    }
1116
1117 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1118
      self::checkForSupport();
1119
    }
1120
1121 9
    if (self::$SUPPORT['mbstring'] === true) {
1122
      // info: do not use the symfony polyfill here
1123 9
      $strEncoded = \mb_convert_encoding(
1124 9
          $str,
1125 9
          $toEncoding,
1126 9
          $fromEncoding
1127
      );
1128
1129 9
      if ($strEncoded) {
1130 9
        return $strEncoded;
1131
      }
1132
    }
1133
1134
    $return = \iconv($fromEncoding, $toEncoding, $str);
1135
    if ($return !== false) {
1136
      return $return;
1137
    }
1138
1139
    return $str;
1140
  }
1141
1142
  /**
1143
   * @param string $str
1144
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1145
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1146
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1147
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1148
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1149
   *
1150
   * @return string|false
1151
   *                      An encoded MIME field on success,
1152
   *                      or false if an error occurs during the encoding.
1153
   */
1154
  public static function encode_mimeheader(
1155
      $str,
1156
      $fromCharset = 'UTF-8',
1157
      $toCharset = 'UTF-8',
1158
      $transferEncoding = 'Q',
1159
      $linefeed = "\r\n",
1160
      $indent = 76
1161
  )
1162
  {
1163
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1164
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1165
    }
1166
1167
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1168
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1169
    }
1170
1171
    $output = \iconv_mime_encode(
1172
        '',
1173
        $str,
1174
        [
1175
            'scheme'           => $transferEncoding,
1176
            'line-length'      => $indent,
1177
            'input-charset'    => $fromCharset,
1178
            'output-charset'   => $toCharset,
1179
            'line-break-chars' => $linefeed,
1180
        ]
1181
    );
1182
1183
    return $output;
1184
  }
1185
1186
  /**
1187
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1188
   *
1189
   * @param string   $str                    <p>The input string.</p>
1190
   * @param string   $search                 <p>The searched string.</p>
1191
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1192
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1193
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1194
   *
1195
   * @return string
1196
   */
1197 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1198
  {
1199 1
    if ('' === $str) {
1200 1
      return '';
1201
    }
1202
1203 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1204
1205 1
    if ($length === null) {
1206 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1207
    }
1208
1209 1
    if (empty($search)) {
1210
1211 1
      $stringLength = self::strlen($str, $encoding);
1212
1213 1
      if ($length > 0) {
1214 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1215
      } else {
1216 1
        $end = 0;
1217
      }
1218
1219 1
      $pos = (int)\min(
1220 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1220
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1221 1
          self::strpos($str, '.', $end, $encoding)
1222
      );
1223
1224 1
      if ($pos) {
1225 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1226 1
        if ($strSub === false) {
1227
          return '';
1228
        }
1229
1230 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1231
      }
1232
1233
      return $str;
1234
    }
1235
1236 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1237 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1238
1239 1
    $pos_start = 0;
1240 1
    if ($halfSide > 0) {
1241 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1242 1
      if ($halfText !== false) {
1243 1
        $pos_start = (int)\max(
1244 1
            self::strrpos($halfText, ' ', 0, $encoding),
1245 1
            self::strrpos($halfText, '.', 0, $encoding)
1246
        );
1247
      }
1248
    }
1249
1250 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1251 1
      $l = $pos_start + $length - 1;
1252 1
      $realLength = self::strlen($str, $encoding);
1253
1254 1
      if ($l > $realLength) {
1255
        $l = $realLength;
1256
      }
1257
1258 1
      $pos_end = (int)\min(
1259 1
              self::strpos($str, ' ', $l, $encoding),
1260 1
              self::strpos($str, '.', $l, $encoding)
1261 1
          ) - $pos_start;
1262
1263 1
      if (!$pos_end || $pos_end <= 0) {
1264 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1264
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1265 1
        if ($strSub !== false) {
1266 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1267
        } else {
1268 1
          $extract = '';
1269
        }
1270
      } else {
1271 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1272 1
        if ($strSub !== false) {
1273 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1274
        } else {
1275 1
          $extract = '';
1276
        }
1277
      }
1278
1279
    } else {
1280
1281 1
      $l = $length - 1;
1282 1
      $trueLength = self::strlen($str, $encoding);
1283
1284 1
      if ($l > $trueLength) {
1285
        $l = $trueLength;
1286
      }
1287
1288 1
      $pos_end = \min(
1289 1
          self::strpos($str, ' ', $l, $encoding),
1290 1
          self::strpos($str, '.', $l, $encoding)
1291
      );
1292
1293 1
      if ($pos_end) {
1294 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1295 1
        if ($strSub !== false) {
1296 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1297
        } else {
1298 1
          $extract = '';
1299
        }
1300
      } else {
1301 1
        $extract = $str;
1302
      }
1303
    }
1304
1305 1
    return $extract;
1306
  }
1307
1308
  /**
1309
   * Reads entire file into a string.
1310
   *
1311
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1312
   *
1313
   * @link http://php.net/manual/en/function.file-get-contents.php
1314
   *
1315
   * @param string        $filename             <p>
1316
   *                                            Name of the file to read.
1317
   *                                            </p>
1318
   * @param bool          $use_include_path     [optional] <p>
1319
   *                                            Prior to PHP 5, this parameter is called
1320
   *                                            use_include_path and is a bool.
1321
   *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1322
   *                                            to trigger include path
1323
   *                                            search.
1324
   *                                            </p>
1325
   * @param resource|null $context              [optional] <p>
1326
   *                                            A valid context resource created with
1327
   *                                            stream_context_create. If you don't need to use a
1328
   *                                            custom context, you can skip this parameter by &null;.
1329
   *                                            </p>
1330
   * @param int|null      $offset               [optional] <p>
1331
   *                                            The offset where the reading starts.
1332
   *                                            </p>
1333
   * @param int|null      $maxLength            [optional] <p>
1334
   *                                            Maximum length of data read. The default is to read until end
1335
   *                                            of file is reached.
1336
   *                                            </p>
1337
   * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1338
   *
1339
   * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for some
1340
   *                                            files, because they used non default utf-8 chars. Binary files like
1341
   *                                            images or pdf will not be converted.</p>
1342
   * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1343
   *                                            A empty string will trigger the autodetect anyway.</p>
1344
   *
1345
   * @return string|false The function returns the read data or false on failure.
1346
   */
1347 11
  public static function file_get_contents(
1348
      string $filename,
1349
      bool $use_include_path = false,
1350
      $context = null,
1351
      int $offset = null,
1352
      int $maxLength = null,
1353
      int $timeout = 10,
1354
      bool $convertToUtf8 = true,
1355
      string $fromEncoding = ''
1356
  )
1357
  {
1358
    // init
1359 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1360
1361 11
    if ($timeout && $context === null) {
1362 9
      $context = \stream_context_create(
1363
          [
1364
              'http' =>
1365
                  [
1366 9
                      'timeout' => $timeout,
1367
                  ],
1368
          ]
1369
      );
1370
    }
1371
1372 11
    if ($offset === null) {
1373 11
      $offset = 0;
1374
    }
1375
1376 11
    if (\is_int($maxLength) === true) {
1377 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1378
    } else {
1379 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1380
    }
1381
1382
    // return false on error
1383 11
    if ($data === false) {
1384
      return false;
1385
    }
1386
1387 11
    if ($convertToUtf8 === true) {
1388
      if (
1389 11
          self::is_binary($data, true) === true
1390
          &&
1391 11
          self::is_utf16($data, false) === false
1392
          &&
1393 11
          self::is_utf32($data, false) === false
1394 6
      ) {
1395
        // do nothing, it's binary and not UTF16 or UTF32
1396
      } else {
1397
1398 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1399 9
        $data = self::cleanup($data);
1400
1401
      }
1402
    }
1403
1404 11
    return $data;
1405
  }
1406
1407
  /**
1408
   * Checks if a file starts with BOM (Byte Order Mark) character.
1409
   *
1410
   * @param string $file_path <p>Path to a valid file.</p>
1411
   *
1412
   * @throws \RuntimeException if file_get_contents() returned false
1413
   *
1414
   * @return bool
1415
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1416
   */
1417 2
  public static function file_has_bom(string $file_path): bool
1418
  {
1419 2
    $file_content = \file_get_contents($file_path);
1420 2
    if ($file_content === false) {
1421
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1422
    }
1423
1424 2
    return self::string_has_bom($file_content);
1425
  }
1426
1427
  /**
1428
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1429
   *
1430
   * @param mixed  $var
1431
   * @param int    $normalization_form
1432
   * @param string $leading_combining
1433
   *
1434
   * @return mixed
1435
   */
1436 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1437
  {
1438 43
    switch (\gettype($var)) {
1439 43
      case 'array':
1440 6
        foreach ($var as $k => $v) {
1441
          /** @noinspection AlterInForeachInspection */
1442 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1443
        }
1444 6
        break;
1445 43
      case 'object':
1446 4
        foreach ($var as $k => $v) {
1447 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1448
        }
1449 4
        break;
1450 43
      case 'string':
1451
1452 43
        if (false !== \strpos($var, "\r")) {
1453
          // Workaround https://bugs.php.net/65732
1454 3
          $var = self::normalize_line_ending($var);
1455
        }
1456
1457 43
        if (self::is_ascii($var) === false) {
1458
          /** @noinspection PhpUndefinedClassInspection */
1459 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1460 21
            $n = '-';
1461
          } else {
1462
            /** @noinspection PhpUndefinedClassInspection */
1463 13
            $n = \Normalizer::normalize($var, $normalization_form);
1464
1465 13
            if (isset($n[0])) {
1466 7
              $var = $n;
1467
            } else {
1468 9
              $var = self::encode('UTF-8', $var, true);
1469
            }
1470
          }
1471
1472
          if (
1473 26
              $var[0] >= "\x80"
1474
              &&
1475 26
              isset($n[0], $leading_combining[0])
1476
              &&
1477 26
              \preg_match('/^\p{Mn}/u', $var)
1478
          ) {
1479
            // Prevent leading combining chars
1480
            // for NFC-safe concatenations.
1481 3
            $var = $leading_combining . $var;
1482
          }
1483
        }
1484
1485 43
        break;
1486
    }
1487
1488 43
    return $var;
1489
  }
1490
1491
  /**
1492
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1493
   *
1494
   * Gets a specific external variable by name and optionally filters it
1495
   *
1496
   * @link  http://php.net/manual/en/function.filter-input.php
1497
   *
1498
   * @param int    $type          <p>
1499
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1500
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1501
   *                              <b>INPUT_ENV</b>.
1502
   *                              </p>
1503
   * @param string $variable_name <p>
1504
   *                              Name of a variable to get.
1505
   *                              </p>
1506
   * @param int    $filter        [optional] <p>
1507
   *                              The ID of the filter to apply. The
1508
   *                              manual page lists the available filters.
1509
   *                              </p>
1510
   * @param mixed  $options       [optional] <p>
1511
   *                              Associative array of options or bitwise disjunction of flags. If filter
1512
   *                              accepts options, flags can be provided in "flags" field of array.
1513
   *                              </p>
1514
   *
1515
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1516
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1517
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1518
   */
1519
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1520
  {
1521
    if (4 > \func_num_args()) {
1522
      $var = \filter_input($type, $variable_name, $filter);
1523
    } else {
1524
      $var = \filter_input($type, $variable_name, $filter, $options);
1525
    }
1526
1527
    return self::filter($var);
1528
  }
1529
1530
  /**
1531
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1532
   *
1533
   * Gets external variables and optionally filters them
1534
   *
1535
   * @link  http://php.net/manual/en/function.filter-input-array.php
1536
   *
1537
   * @param int   $type       <p>
1538
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1539
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1540
   *                          <b>INPUT_ENV</b>.
1541
   *                          </p>
1542
   * @param mixed $definition [optional] <p>
1543
   *                          An array defining the arguments. A valid key is a string
1544
   *                          containing a variable name and a valid value is either a filter type, or an array
1545
   *                          optionally specifying the filter, flags and options. If the value is an
1546
   *                          array, valid keys are filter which specifies the
1547
   *                          filter type,
1548
   *                          flags which specifies any flags that apply to the
1549
   *                          filter, and options which specifies any options that
1550
   *                          apply to the filter. See the example below for a better understanding.
1551
   *                          </p>
1552
   *                          <p>
1553
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1554
   *                          input array are filtered by this filter.
1555
   *                          </p>
1556
   * @param bool  $add_empty  [optional] <p>
1557
   *                          Add missing keys as <b>NULL</b> to the return value.
1558
   *                          </p>
1559
   *
1560
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1561
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1562
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1563
   *               set and <b>NULL</b> if the filter fails.
1564
   */
1565
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1566
  {
1567
    if (2 > \func_num_args()) {
1568
      $a = \filter_input_array($type);
1569
    } else {
1570
      $a = \filter_input_array($type, $definition, $add_empty);
1571
    }
1572
1573
    return self::filter($a);
1574
  }
1575
1576
  /**
1577
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1578
   *
1579
   * Filters a variable with a specified filter
1580
   *
1581
   * @link  http://php.net/manual/en/function.filter-var.php
1582
   *
1583
   * @param mixed $variable <p>
1584
   *                        Value to filter.
1585
   *                        </p>
1586
   * @param int   $filter   [optional] <p>
1587
   *                        The ID of the filter to apply. The
1588
   *                        manual page lists the available filters.
1589
   *                        </p>
1590
   * @param mixed $options  [optional] <p>
1591
   *                        Associative array of options or bitwise disjunction of flags. If filter
1592
   *                        accepts options, flags can be provided in "flags" field of array. For
1593
   *                        the "callback" filter, callable type should be passed. The
1594
   *                        callback must accept one argument, the value to be filtered, and return
1595
   *                        the value after filtering/sanitizing it.
1596
   *                        </p>
1597
   *                        <p>
1598
   *                        <code>
1599
   *                        // for filters that accept options, use this format
1600
   *                        $options = array(
1601
   *                        'options' => array(
1602
   *                        'default' => 3, // value to return if the filter fails
1603
   *                        // other options here
1604
   *                        'min_range' => 0
1605
   *                        ),
1606
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1607
   *                        );
1608
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1609
   *                        // for filter that only accept flags, you can pass them directly
1610
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1611
   *                        // for filter that only accept flags, you can also pass as an array
1612
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1613
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1614
   *                        // callback validate filter
1615
   *                        function foo($value)
1616
   *                        {
1617
   *                        // Expected format: Surname, GivenNames
1618
   *                        if (strpos($value, ", ") === false) return false;
1619
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1620
   *                        $empty = (empty($surname) || empty($givennames));
1621
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1622
   *                        if ($empty || $notstrings) {
1623
   *                        return false;
1624
   *                        } else {
1625
   *                        return $value;
1626
   *                        }
1627
   *                        }
1628
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1629
   *                        </code>
1630
   *                        </p>
1631
   *
1632
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1633
   */
1634 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1635
  {
1636 2
    if (3 > \func_num_args()) {
1637 2
      $variable = \filter_var($variable, $filter);
1638
    } else {
1639 2
      $variable = \filter_var($variable, $filter, $options);
1640
    }
1641
1642 2
    return self::filter($variable);
1643
  }
1644
1645
  /**
1646
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1647
   *
1648
   * Gets multiple variables and optionally filters them
1649
   *
1650
   * @link  http://php.net/manual/en/function.filter-var-array.php
1651
   *
1652
   * @param array $data       <p>
1653
   *                          An array with string keys containing the data to filter.
1654
   *                          </p>
1655
   * @param mixed $definition [optional] <p>
1656
   *                          An array defining the arguments. A valid key is a string
1657
   *                          containing a variable name and a valid value is either a
1658
   *                          filter type, or an
1659
   *                          array optionally specifying the filter, flags and options.
1660
   *                          If the value is an array, valid keys are filter
1661
   *                          which specifies the filter type,
1662
   *                          flags which specifies any flags that apply to the
1663
   *                          filter, and options which specifies any options that
1664
   *                          apply to the filter. See the example below for a better understanding.
1665
   *                          </p>
1666
   *                          <p>
1667
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1668
   *                          input array are filtered by this filter.
1669
   *                          </p>
1670
   * @param bool  $add_empty  [optional] <p>
1671
   *                          Add missing keys as <b>NULL</b> to the return value.
1672
   *                          </p>
1673
   *
1674
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1675
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1676
   */
1677 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1678
  {
1679 2
    if (2 > \func_num_args()) {
1680 2
      $a = \filter_var_array($data);
1681
    } else {
1682 2
      $a = \filter_var_array($data, $definition, $add_empty);
1683
    }
1684
1685 2
    return self::filter($a);
1686
  }
1687
1688
  /**
1689
   * Checks whether finfo is available on the server.
1690
   *
1691
   * @return bool
1692
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1693
   */
1694
  public static function finfo_loaded(): bool
1695
  {
1696
    return \class_exists('finfo');
1697
  }
1698
1699
  /**
1700
   * Returns the first $n characters of the string.
1701
   *
1702
   * @param string $str      <p>The input string.</p>
1703
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1704
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1705
   *
1706
   * @return string
1707
   */
1708 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1709
  {
1710 13
    if ($n <= 0) {
1711 4
      return '';
1712
    }
1713
1714 9
    $strSub = self::substr($str, 0, $n, $encoding);
1715 9
    if ($strSub === false) {
1716
      return '';
1717
    }
1718
1719 9
    return $strSub;
1720
  }
1721
1722
  /**
1723
   * Check if the number of unicode characters are not more than the specified integer.
1724
   *
1725
   * @param string $str      The original string to be checked.
1726
   * @param int    $box_size The size in number of chars to be checked against string.
1727
   *
1728
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1729
   */
1730 2
  public static function fits_inside(string $str, int $box_size): bool
1731
  {
1732 2
    return (self::strlen($str) <= $box_size);
1733
  }
1734
1735
  /**
1736
   * @param string $str
1737
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1738
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1739
   *
1740
   * @return string
1741
   */
1742 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1743
  {
1744 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1745 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1746
1747 54
    if ($useLower === true) {
1748 2
      $str = (string)\str_replace(
1749 2
          $upper,
1750 2
          $lower,
1751 2
          $str
1752
      );
1753
    } else {
1754 52
      $str = (string)\str_replace(
1755 52
          $lower,
1756 52
          $upper,
1757 52
          $str
1758
      );
1759
    }
1760
1761 54
    if ($fullCaseFold) {
1762
1763 52
      static $FULL_CASE_FOLD = null;
1764 52
      if ($FULL_CASE_FOLD === null) {
1765 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1766
      }
1767
1768 52
      if ($useLower === true) {
1769 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1770
      } else {
1771 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1772
      }
1773
    }
1774
1775 54
    return $str;
1776
  }
1777
1778
  /**
1779
   * Try to fix simple broken UTF-8 strings.
1780
   *
1781
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1782
   *
1783
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1784
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1785
   * See: http://en.wikipedia.org/wiki/Windows-1252
1786
   *
1787
   * @param string $str <p>The input string</p>
1788
   *
1789
   * @return string
1790
   */
1791 42
  public static function fix_simple_utf8(string $str): string
1792
  {
1793 42
    if ('' === $str) {
1794 4
      return '';
1795
    }
1796
1797 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1798 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1799
1800 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1801
1802 1
      if (self::$BROKEN_UTF8_FIX === null) {
1803 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1804
      }
1805
1806 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1806
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1807 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1807
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1808
    }
1809
1810 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1811
  }
1812
1813
  /**
1814
   * Fix a double (or multiple) encoded UTF8 string.
1815
   *
1816
   * @param string[]|string $str You can use a string or an array of strings.
1817
   *
1818
   * @return string[]|string
1819
   *                          Will return the fixed input-"array" or
1820
   *                          the fixed input-"string".
1821
   */
1822 2
  public static function fix_utf8($str)
1823
  {
1824 2
    if (\is_array($str) === true) {
1825 2
      foreach ($str as $k => $v) {
1826 2
        $str[$k] = self::fix_utf8($v);
1827
      }
1828
1829 2
      return $str;
1830
    }
1831
1832 2
    $str = (string)$str;
1833 2
    $last = '';
1834 2
    while ($last !== $str) {
1835 2
      $last = $str;
1836 2
      $str = self::to_utf8(
1837 2
          self::utf8_decode($str, true)
1838
      );
1839
    }
1840
1841 2
    return $str;
1842
  }
1843
1844
  /**
1845
   * Get character of a specific character.
1846
   *
1847
   * @param string $char
1848
   *
1849
   * @return string 'RTL' or 'LTR'
1850
   */
1851 2
  public static function getCharDirection(string $char): string
1852
  {
1853 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1854
      self::checkForSupport();
1855
    }
1856
1857 2
    if (self::$SUPPORT['intlChar'] === true) {
1858
      /** @noinspection PhpComposerExtensionStubsInspection */
1859 2
      $tmpReturn = \IntlChar::charDirection($char);
1860
1861
      // from "IntlChar"-Class
1862
      $charDirection = [
1863 2
          'RTL' => [1, 13, 14, 15, 21],
1864
          'LTR' => [0, 11, 12, 20],
1865
      ];
1866
1867 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1868
        return 'LTR';
1869
      }
1870
1871 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1872 2
        return 'RTL';
1873
      }
1874
    }
1875
1876 2
    $c = static::chr_to_decimal($char);
1877
1878 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1879 2
      return 'LTR';
1880
    }
1881
1882 2
    if (0x85e >= $c) {
1883
1884 2
      if (0x5be === $c ||
1885 2
          0x5c0 === $c ||
1886 2
          0x5c3 === $c ||
1887 2
          0x5c6 === $c ||
1888 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1889 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1890 2
          0x608 === $c ||
1891 2
          0x60b === $c ||
1892 2
          0x60d === $c ||
1893 2
          0x61b === $c ||
1894 2
          (0x61e <= $c && 0x64a >= $c) ||
1895
          (0x66d <= $c && 0x66f >= $c) ||
1896
          (0x671 <= $c && 0x6d5 >= $c) ||
1897
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1898
          (0x6ee <= $c && 0x6ef >= $c) ||
1899
          (0x6fa <= $c && 0x70d >= $c) ||
1900
          0x710 === $c ||
1901
          (0x712 <= $c && 0x72f >= $c) ||
1902
          (0x74d <= $c && 0x7a5 >= $c) ||
1903
          0x7b1 === $c ||
1904
          (0x7c0 <= $c && 0x7ea >= $c) ||
1905
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1906
          0x7fa === $c ||
1907
          (0x800 <= $c && 0x815 >= $c) ||
1908
          0x81a === $c ||
1909
          0x824 === $c ||
1910
          0x828 === $c ||
1911
          (0x830 <= $c && 0x83e >= $c) ||
1912
          (0x840 <= $c && 0x858 >= $c) ||
1913 2
          0x85e === $c
1914
      ) {
1915 2
        return 'RTL';
1916
      }
1917
1918 2
    } elseif (0x200f === $c) {
1919
1920
      return 'RTL';
1921
1922 2
    } elseif (0xfb1d <= $c) {
1923
1924 2
      if (0xfb1d === $c ||
1925 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1926 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1927 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1928 2
          0xfb3e === $c ||
1929 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1930 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1931 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1932 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1933 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1934 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1935 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1936 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1937 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1938 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1939 2
          0x10808 === $c ||
1940 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1941 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1942 2
          0x1083c === $c ||
1943 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1944 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1945 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1946 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1947 2
          0x1093f === $c ||
1948 2
          0x10a00 === $c ||
1949 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1950 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1951 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1952 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1953 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1954 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1955 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1956 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1957 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1958 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1959
      ) {
1960 2
        return 'RTL';
1961
      }
1962
    }
1963
1964 2
    return 'LTR';
1965
  }
1966
1967
  /**
1968
   * get data from "/data/*.ser"
1969
   *
1970
   * @param string $file
1971
   *
1972
   * @return mixed|false Will return false on error.
1973
   */
1974 13
  private static function getData(string $file)
1975
  {
1976 13
    $file = __DIR__ . '/data/' . $file . '.php';
1977 13
    if (\file_exists($file)) {
1978
      /** @noinspection PhpIncludeInspection */
1979 12
      return require $file;
1980
    }
1981
1982 2
    return false;
1983
  }
1984
1985
  /**
1986
   * Check for php-support.
1987
   *
1988
   * @param string|null $key
1989
   *
1990
   * @return mixed
1991
   *               Return the full support-"array", if $key === null<br>
1992
   *               return bool-value, if $key is used and available<br>
1993
   *               otherwise return <strong>null</strong>.
1994
   */
1995 26
  public static function getSupportInfo(string $key = null)
1996
  {
1997 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1998
      self::checkForSupport();
1999
    }
2000
2001 26
    if ($key === null) {
2002 4
      return self::$SUPPORT;
2003
    }
2004
2005 24
    if (!isset(self::$SUPPORT[$key])) {
2006 2
      return null;
2007
    }
2008
2009 22
    return self::$SUPPORT[$key];
2010
  }
2011
2012
  /**
2013
   * @param string $str
2014
   * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2015
   *
2016
   * @return array
2017
   *               with this keys: 'ext', 'mime', 'type'
2018
   */
2019 40
  private static function get_file_type(
2020
      string $str,
2021
      array $fallback = [
2022
          'ext'  => null,
2023
          'mime' => 'application/octet-stream',
2024
          'type' => null,
2025
      ]
2026
  ): array
2027
  {
2028 40
    if ($str === '') {
2029
      return $fallback;
2030
    }
2031
2032 40
    $str_info = self::substr_in_byte($str, 0, 2);
2033 40
    if (self::strlen_in_byte($str_info) !== 2) {
2034 11
      return $fallback;
2035
    }
2036
2037 35
    $str_info = \unpack('C2chars', $str_info);
2038 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2039
2040
    // DEBUG
2041
    //var_dump($type_code);
2042
2043
    switch ($type_code) {
2044 35
      case 3780:
2045 4
        $ext = 'pdf';
2046 4
        $mime = 'application/pdf';
2047 4
        $type = 'binary';
2048
2049 4
        break;
2050 35
      case 7790:
2051
        $ext = 'exe';
2052
        $mime = 'application/octet-stream';
2053
        $type = 'binary';
2054
2055
        break;
2056 35
      case 7784:
2057
        $ext = 'midi';
2058
        $mime = 'audio/x-midi';
2059
        $type = 'binary';
2060
2061
        break;
2062 35
      case 8075:
2063 6
        $ext = 'zip';
2064 6
        $mime = 'application/zip';
2065 6
        $type = 'binary';
2066
2067 6
        break;
2068 35
      case 8297:
2069
        $ext = 'rar';
2070
        $mime = 'application/rar';
2071
        $type = 'binary';
2072
2073
        break;
2074 35
      case 255216:
2075
        $ext = 'jpg';
2076
        $mime = 'image/jpeg';
2077
        $type = 'binary';
2078
2079
        break;
2080 35
      case 7173:
2081
        $ext = 'gif';
2082
        $mime = 'image/gif';
2083
        $type = 'binary';
2084
2085
        break;
2086 35
      case 6677:
2087
        $ext = 'bmp';
2088
        $mime = 'image/bmp';
2089
        $type = 'binary';
2090
2091
        break;
2092 35
      case 13780:
2093 6
        $ext = 'png';
2094 6
        $mime = 'image/png';
2095 6
        $type = 'binary';
2096
2097 6
        break;
2098
      default:
2099 33
        return $fallback;
2100
    }
2101
2102
    return [
2103 6
        'ext'  => $ext,
2104 6
        'mime' => $mime,
2105 6
        'type' => $type,
2106
    ];
2107
  }
2108
2109
  /**
2110
   * @param int    $length        <p>Length of the random string.</p>
2111
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2112
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2113
   *
2114
   * @return string
2115
   */
2116 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2117
  {
2118
    // init
2119 1
    $i = 0;
2120 1
    $str = '';
2121 1
    $maxlength = self::strlen($possibleChars, $encoding);
2122
2123 1
    if ($maxlength === 0) {
2124 1
      return '';
2125
    }
2126
2127
    // add random chars
2128 1
    while ($i < $length) {
2129
      try {
2130 1
        $randInt = \random_int(0, $maxlength - 1);
2131
      } catch (\Exception $e) {
2132
        /** @noinspection RandomApiMigrationInspection */
2133
        $randInt = \mt_rand(0, $maxlength - 1);
2134
      }
2135 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2136 1
      $str .= $char;
2137 1
      $i++;
2138
    }
2139
2140 1
    return $str;
2141
  }
2142
2143
  /**
2144
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2145
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2146
   *
2147
   * @return string
2148
   */
2149 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2150
  {
2151 1
    $uniqueHelper = \mt_rand() .
2152 1
                    \session_id() .
2153 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2154 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2155 1
                    $entropyExtra;
2156
2157 1
    $uniqueString = \uniqid($uniqueHelper, true);
2158
2159 1
    if ($md5) {
2160 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2161
    }
2162
2163 1
    return $uniqueString;
2164
  }
2165
2166
  /**
2167
   * alias for "UTF8::string_has_bom()"
2168
   *
2169
   * @see        UTF8::string_has_bom()
2170
   *
2171
   * @param string $str
2172
   *
2173
   * @return bool
2174
   *
2175
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2176
   */
2177 2
  public static function hasBom(string $str): bool
2178
  {
2179 2
    return self::string_has_bom($str);
2180
  }
2181
2182
  /**
2183
   * Returns true if the string contains a lower case char, false otherwise.
2184
   *
2185
   * @param string $str <p>The input string.</p>
2186
   *
2187
   * @return bool Whether or not the string contains a lower case character.
2188
   */
2189 47
  public static function has_lowercase(string $str): bool
2190
  {
2191 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2192
  }
2193
2194
  /**
2195
   * Returns true if the string contains an upper case char, false otherwise.
2196
   *
2197
   * @param string $str <p>The input string.</p>
2198
   *
2199
   * @return bool Whether or not the string contains an upper case character.
2200
   */
2201 12
  public static function has_uppercase(string $str): bool
2202
  {
2203 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2204
  }
2205
2206
  /**
2207
   * Converts a hexadecimal-value into an UTF-8 character.
2208
   *
2209
   * @param string $hexdec <p>The hexadecimal value.</p>
2210
   *
2211
   * @return string|false One single UTF-8 character.
2212
   */
2213 4
  public static function hex_to_chr(string $hexdec)
2214
  {
2215 4
    return self::decimal_to_chr(\hexdec($hexdec));
2216
  }
2217
2218
  /**
2219
   * Converts hexadecimal U+xxxx code point representation to integer.
2220
   *
2221
   * INFO: opposite to UTF8::int_to_hex()
2222
   *
2223
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2224
   *
2225
   * @return int|false The code point, or false on failure.
2226
   */
2227 2
  public static function hex_to_int($hexDec)
2228
  {
2229
    // init
2230 2
    $hexDec = (string)$hexDec;
2231
2232 2
    if ('' === $hexDec) {
2233 2
      return false;
2234
    }
2235
2236 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2237 2
      return \intval($match[1], 16);
2238
    }
2239
2240 2
    return false;
2241
  }
2242
2243
  /**
2244
   * alias for "UTF8::html_entity_decode()"
2245
   *
2246
   * @see UTF8::html_entity_decode()
2247
   *
2248
   * @param string $str
2249
   * @param int    $flags
2250
   * @param string $encoding
2251
   *
2252
   * @return string
2253
   */
2254 4
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2255
  {
2256 4
    return self::html_entity_decode($str, $flags, $encoding);
2257
  }
2258
2259
  /**
2260
   * Converts a UTF-8 string to a series of HTML numbered entities.
2261
   *
2262
   * INFO: opposite to UTF8::html_decode()
2263
   *
2264
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2265
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2266
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2267
   *
2268
   * @return string HTML numbered entities.
2269
   */
2270 13
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2271
  {
2272 13
    if ('' === $str) {
2273 4
      return '';
2274
    }
2275
2276 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2277 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2278
    }
2279
2280 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2281
      self::checkForSupport();
2282
    }
2283
2284
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2285 13
    if (self::$SUPPORT['mbstring'] === true) {
2286 13
      $startCode = 0x00;
2287 13
      if ($keepAsciiChars === true) {
2288 13
        $startCode = 0x80;
2289
      }
2290
2291 13
      return \mb_encode_numericentity(
2292 13
          $str,
2293 13
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2294 13
          $encoding
2295
      );
2296
    }
2297
2298
    //
2299
    // fallback via vanilla php
2300
    //
2301
2302
    return \implode(
2303
        '',
2304
        \array_map(
2305
            function ($chr) use ($keepAsciiChars, $encoding) {
2306
              return UTF8::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2307
            },
2308
            self::split($str)
2309
        )
2310
    );
2311
  }
2312
2313
  /**
2314
   * UTF-8 version of html_entity_decode()
2315
   *
2316
   * The reason we are not using html_entity_decode() by itself is because
2317
   * while it is not technically correct to leave out the semicolon
2318
   * at the end of an entity most browsers will still interpret the entity
2319
   * correctly. html_entity_decode() does not convert entities without
2320
   * semicolons, so we are left with our own little solution here. Bummer.
2321
   *
2322
   * Convert all HTML entities to their applicable characters
2323
   *
2324
   * INFO: opposite to UTF8::html_encode()
2325
   *
2326
   * @link http://php.net/manual/en/function.html-entity-decode.php
2327
   *
2328
   * @param string $str      <p>
2329
   *                         The input string.
2330
   *                         </p>
2331
   * @param int    $flags    [optional] <p>
2332
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2333
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2334
   *                         <table>
2335
   *                         Available <i>flags</i> constants
2336
   *                         <tr valign="top">
2337
   *                         <td>Constant Name</td>
2338
   *                         <td>Description</td>
2339
   *                         </tr>
2340
   *                         <tr valign="top">
2341
   *                         <td><b>ENT_COMPAT</b></td>
2342
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2343
   *                         </tr>
2344
   *                         <tr valign="top">
2345
   *                         <td><b>ENT_QUOTES</b></td>
2346
   *                         <td>Will convert both double and single quotes.</td>
2347
   *                         </tr>
2348
   *                         <tr valign="top">
2349
   *                         <td><b>ENT_NOQUOTES</b></td>
2350
   *                         <td>Will leave both double and single quotes unconverted.</td>
2351
   *                         </tr>
2352
   *                         <tr valign="top">
2353
   *                         <td><b>ENT_HTML401</b></td>
2354
   *                         <td>
2355
   *                         Handle code as HTML 4.01.
2356
   *                         </td>
2357
   *                         </tr>
2358
   *                         <tr valign="top">
2359
   *                         <td><b>ENT_XML1</b></td>
2360
   *                         <td>
2361
   *                         Handle code as XML 1.
2362
   *                         </td>
2363
   *                         </tr>
2364
   *                         <tr valign="top">
2365
   *                         <td><b>ENT_XHTML</b></td>
2366
   *                         <td>
2367
   *                         Handle code as XHTML.
2368
   *                         </td>
2369
   *                         </tr>
2370
   *                         <tr valign="top">
2371
   *                         <td><b>ENT_HTML5</b></td>
2372
   *                         <td>
2373
   *                         Handle code as HTML 5.
2374
   *                         </td>
2375
   *                         </tr>
2376
   *                         </table>
2377
   *                         </p>
2378
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2379
   *
2380
   * @return string The decoded string.
2381
   */
2382 40
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2383
  {
2384 40
    if ('' === $str) {
2385 12
      return '';
2386
    }
2387
2388 40
    if (!isset($str[3])) { // examples: &; || &x;
2389 19
      return $str;
2390
    }
2391
2392
    if (
2393 39
        \strpos($str, '&') === false
2394
        ||
2395
        (
2396 39
            \strpos($str, '&#') === false
2397
            &&
2398 39
            \strpos($str, ';') === false
2399
        )
2400
    ) {
2401 18
      return $str;
2402
    }
2403
2404 39
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2405 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2406
    }
2407
2408 39
    if ($flags === null) {
2409 10
      $flags = ENT_QUOTES | ENT_HTML5;
2410
    }
2411
2412
    if (
2413 39
        $encoding !== 'UTF-8'
2414
        &&
2415 39
        $encoding !== 'ISO-8859-1'
2416
        &&
2417 39
        $encoding !== 'WINDOWS-1252'
2418
        &&
2419 39
        self::$SUPPORT['mbstring'] === false
2420
    ) {
2421
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2422
    }
2423
2424 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2425
      self::checkForSupport();
2426
    }
2427
2428
    do {
2429 39
      $str_compare = $str;
2430
2431
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2432 39
      if (self::$SUPPORT['mbstring'] === true) {
2433
2434 39
        $str = \mb_decode_numericentity(
2435 39
            $str,
2436 39
            [0x80, 0xfffff, 0, 0xfffff, 0],
2437 39
            $encoding
2438
        );
2439
2440
      } else {
2441
2442
        $str = (string)\preg_replace_callback(
2443
            "/&#\d{2,6};/",
2444
            function ($matches) use ($encoding) {
2445
              // always fallback via symfony polyfill
2446
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2447
2448
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2449
                return $returnTmp;
2450
              }
2451
2452
              return $matches[0];
2453
            },
2454
            $str
2455
        );
2456
2457
      }
2458
2459
      // decode numeric & UTF16 two byte entities
2460 39
      $str = \html_entity_decode(
2461 39
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2462 39
          $flags,
2463 39
          $encoding
2464
      );
2465
2466 39
    } while ($str_compare !== $str);
2467
2468 39
    return $str;
2469
  }
2470
2471
  /**
2472
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2473
   *
2474
   * @param string $str
2475
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2476
   *
2477
   * @return string
2478
   */
2479 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2480
  {
2481 6
    return self::htmlspecialchars(
2482 6
        $str,
2483 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2484 6
        $encoding
2485
    );
2486
  }
2487
2488
  /**
2489
   * Remove empty html-tag.
2490
   *
2491
   * e.g.: <tag></tag>
2492
   *
2493
   * @param string $str
2494
   *
2495
   * @return string
2496
   */
2497 1
  public static function html_stripe_empty_tags(string $str): string
2498
  {
2499 1
    return (string)\preg_replace(
2500 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2501 1
        '',
2502 1
        $str
2503
    );
2504
  }
2505
2506
  /**
2507
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2508
   *
2509
   * @link http://php.net/manual/en/function.htmlentities.php
2510
   *
2511
   * @param string $str           <p>
2512
   *                              The input string.
2513
   *                              </p>
2514
   * @param int    $flags         [optional] <p>
2515
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2516
   *                              invalid code unit sequences and the used document type. The default is
2517
   *                              ENT_COMPAT | ENT_HTML401.
2518
   *                              <table>
2519
   *                              Available <i>flags</i> constants
2520
   *                              <tr valign="top">
2521
   *                              <td>Constant Name</td>
2522
   *                              <td>Description</td>
2523
   *                              </tr>
2524
   *                              <tr valign="top">
2525
   *                              <td><b>ENT_COMPAT</b></td>
2526
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2527
   *                              </tr>
2528
   *                              <tr valign="top">
2529
   *                              <td><b>ENT_QUOTES</b></td>
2530
   *                              <td>Will convert both double and single quotes.</td>
2531
   *                              </tr>
2532
   *                              <tr valign="top">
2533
   *                              <td><b>ENT_NOQUOTES</b></td>
2534
   *                              <td>Will leave both double and single quotes unconverted.</td>
2535
   *                              </tr>
2536
   *                              <tr valign="top">
2537
   *                              <td><b>ENT_IGNORE</b></td>
2538
   *                              <td>
2539
   *                              Silently discard invalid code unit sequences instead of returning
2540
   *                              an empty string. Using this flag is discouraged as it
2541
   *                              may have security implications.
2542
   *                              </td>
2543
   *                              </tr>
2544
   *                              <tr valign="top">
2545
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2546
   *                              <td>
2547
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2548
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2549
   *                              </td>
2550
   *                              </tr>
2551
   *                              <tr valign="top">
2552
   *                              <td><b>ENT_DISALLOWED</b></td>
2553
   *                              <td>
2554
   *                              Replace invalid code points for the given document type with a
2555
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2556
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2557
   *                              instance, to ensure the well-formedness of XML documents with
2558
   *                              embedded external content.
2559
   *                              </td>
2560
   *                              </tr>
2561
   *                              <tr valign="top">
2562
   *                              <td><b>ENT_HTML401</b></td>
2563
   *                              <td>
2564
   *                              Handle code as HTML 4.01.
2565
   *                              </td>
2566
   *                              </tr>
2567
   *                              <tr valign="top">
2568
   *                              <td><b>ENT_XML1</b></td>
2569
   *                              <td>
2570
   *                              Handle code as XML 1.
2571
   *                              </td>
2572
   *                              </tr>
2573
   *                              <tr valign="top">
2574
   *                              <td><b>ENT_XHTML</b></td>
2575
   *                              <td>
2576
   *                              Handle code as XHTML.
2577
   *                              </td>
2578
   *                              </tr>
2579
   *                              <tr valign="top">
2580
   *                              <td><b>ENT_HTML5</b></td>
2581
   *                              <td>
2582
   *                              Handle code as HTML 5.
2583
   *                              </td>
2584
   *                              </tr>
2585
   *                              </table>
2586
   *                              </p>
2587
   * @param string $encoding      [optional] <p>
2588
   *                              Like <b>htmlspecialchars</b>,
2589
   *                              <b>htmlentities</b> takes an optional third argument
2590
   *                              <i>encoding</i> which defines encoding used in
2591
   *                              conversion.
2592
   *                              Although this argument is technically optional, you are highly
2593
   *                              encouraged to specify the correct value for your code.
2594
   *                              </p>
2595
   * @param bool   $double_encode [optional] <p>
2596
   *                              When <i>double_encode</i> is turned off PHP will not
2597
   *                              encode existing html entities. The default is to convert everything.
2598
   *                              </p>
2599
   *
2600
   *
2601
   * @return string The encoded string.
2602
   * </p>
2603
   * <p>
2604
   * If the input <i>string</i> contains an invalid code unit
2605
   * sequence within the given <i>encoding</i> an empty string
2606
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2607
   * <b>ENT_SUBSTITUTE</b> flags are set.
2608
   */
2609 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2610
  {
2611 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2612 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2613
    }
2614
2615 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2616
2617
    /**
2618
     * PHP doesn't replace a backslash to its html entity since this is something
2619
     * that's mostly used to escape characters when inserting in a database. Since
2620
     * we're using a decent database layer, we don't need this shit and we're replacing
2621
     * the double backslashes by its' html entity equivalent.
2622
     *
2623
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2624
     */
2625 9
    $str = \str_replace('\\', '&#92;', $str);
2626
2627 9
    return self::html_encode($str, true, $encoding);
2628
  }
2629
2630
  /**
2631
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2632
   *
2633
   * INFO: Take a look at "UTF8::htmlentities()"
2634
   *
2635
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2636
   *
2637
   * @param string $str           <p>
2638
   *                              The string being converted.
2639
   *                              </p>
2640
   * @param int    $flags         [optional] <p>
2641
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2642
   *                              invalid code unit sequences and the used document type. The default is
2643
   *                              ENT_COMPAT | ENT_HTML401.
2644
   *                              <table>
2645
   *                              Available <i>flags</i> constants
2646
   *                              <tr valign="top">
2647
   *                              <td>Constant Name</td>
2648
   *                              <td>Description</td>
2649
   *                              </tr>
2650
   *                              <tr valign="top">
2651
   *                              <td><b>ENT_COMPAT</b></td>
2652
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2653
   *                              </tr>
2654
   *                              <tr valign="top">
2655
   *                              <td><b>ENT_QUOTES</b></td>
2656
   *                              <td>Will convert both double and single quotes.</td>
2657
   *                              </tr>
2658
   *                              <tr valign="top">
2659
   *                              <td><b>ENT_NOQUOTES</b></td>
2660
   *                              <td>Will leave both double and single quotes unconverted.</td>
2661
   *                              </tr>
2662
   *                              <tr valign="top">
2663
   *                              <td><b>ENT_IGNORE</b></td>
2664
   *                              <td>
2665
   *                              Silently discard invalid code unit sequences instead of returning
2666
   *                              an empty string. Using this flag is discouraged as it
2667
   *                              may have security implications.
2668
   *                              </td>
2669
   *                              </tr>
2670
   *                              <tr valign="top">
2671
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2672
   *                              <td>
2673
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2674
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2675
   *                              </td>
2676
   *                              </tr>
2677
   *                              <tr valign="top">
2678
   *                              <td><b>ENT_DISALLOWED</b></td>
2679
   *                              <td>
2680
   *                              Replace invalid code points for the given document type with a
2681
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2682
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2683
   *                              instance, to ensure the well-formedness of XML documents with
2684
   *                              embedded external content.
2685
   *                              </td>
2686
   *                              </tr>
2687
   *                              <tr valign="top">
2688
   *                              <td><b>ENT_HTML401</b></td>
2689
   *                              <td>
2690
   *                              Handle code as HTML 4.01.
2691
   *                              </td>
2692
   *                              </tr>
2693
   *                              <tr valign="top">
2694
   *                              <td><b>ENT_XML1</b></td>
2695
   *                              <td>
2696
   *                              Handle code as XML 1.
2697
   *                              </td>
2698
   *                              </tr>
2699
   *                              <tr valign="top">
2700
   *                              <td><b>ENT_XHTML</b></td>
2701
   *                              <td>
2702
   *                              Handle code as XHTML.
2703
   *                              </td>
2704
   *                              </tr>
2705
   *                              <tr valign="top">
2706
   *                              <td><b>ENT_HTML5</b></td>
2707
   *                              <td>
2708
   *                              Handle code as HTML 5.
2709
   *                              </td>
2710
   *                              </tr>
2711
   *                              </table>
2712
   *                              </p>
2713
   * @param string $encoding      [optional] <p>
2714
   *                              Defines encoding used in conversion.
2715
   *                              </p>
2716
   *                              <p>
2717
   *                              For the purposes of this function, the encodings
2718
   *                              ISO-8859-1, ISO-8859-15,
2719
   *                              UTF-8, cp866,
2720
   *                              cp1251, cp1252, and
2721
   *                              KOI8-R are effectively equivalent, provided the
2722
   *                              <i>string</i> itself is valid for the encoding, as
2723
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2724
   *                              the same positions in all of these encodings.
2725
   *                              </p>
2726
   * @param bool   $double_encode [optional] <p>
2727
   *                              When <i>double_encode</i> is turned off PHP will not
2728
   *                              encode existing html entities, the default is to convert everything.
2729
   *                              </p>
2730
   *
2731
   * @return string The converted string.
2732
   * </p>
2733
   * <p>
2734
   * If the input <i>string</i> contains an invalid code unit
2735
   * sequence within the given <i>encoding</i> an empty string
2736
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2737
   * <b>ENT_SUBSTITUTE</b> flags are set.
2738
   */
2739 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2740
  {
2741 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2742 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2743
    }
2744
2745 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2746
  }
2747
2748
  /**
2749
   * Checks whether iconv is available on the server.
2750
   *
2751
   * @return bool
2752
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2753
   */
2754
  public static function iconv_loaded(): bool
2755
  {
2756
    return \extension_loaded('iconv') ? true : false;
2757
  }
2758
2759
  /**
2760
   * alias for "UTF8::decimal_to_chr()"
2761
   *
2762
   * @see UTF8::decimal_to_chr()
2763
   *
2764
   * @param mixed $int
2765
   *
2766
   * @return string
2767
   */
2768 4
  public static function int_to_chr($int): string
2769
  {
2770 4
    return self::decimal_to_chr($int);
2771
  }
2772
2773
  /**
2774
   * Converts Integer to hexadecimal U+xxxx code point representation.
2775
   *
2776
   * INFO: opposite to UTF8::hex_to_int()
2777
   *
2778
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2779
   * @param string $pfix [optional]
2780
   *
2781
   * @return string The code point, or empty string on failure.
2782
   */
2783 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2784
  {
2785 6
    $hex = \dechex($int);
2786
2787 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2788
2789 6
    return $pfix . $hex . '';
2790
  }
2791
2792
  /**
2793
   * Checks whether intl-char is available on the server.
2794
   *
2795
   * @return bool
2796
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2797
   */
2798
  public static function intlChar_loaded(): bool
2799
  {
2800
    return \class_exists('IntlChar');
2801
  }
2802
2803
  /**
2804
   * Checks whether intl is available on the server.
2805
   *
2806
   * @return bool
2807
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2808
   */
2809 5
  public static function intl_loaded(): bool
2810
  {
2811 5
    return \extension_loaded('intl');
2812
  }
2813
2814
  /**
2815
   * alias for "UTF8::is_ascii()"
2816
   *
2817
   * @see        UTF8::is_ascii()
2818
   *
2819
   * @param string $str
2820
   *
2821
   * @return bool
2822
   *
2823
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2824
   */
2825 2
  public static function isAscii(string $str): bool
2826
  {
2827 2
    return self::is_ascii($str);
2828
  }
2829
2830
  /**
2831
   * alias for "UTF8::is_base64()"
2832
   *
2833
   * @see        UTF8::is_base64()
2834
   *
2835
   * @param string $str
2836
   *
2837
   * @return bool
2838
   *
2839
   * @deprecated <p>use "UTF8::is_base64()"</p>
2840
   */
2841 2
  public static function isBase64($str): bool
2842
  {
2843 2
    return self::is_base64($str);
2844
  }
2845
2846
  /**
2847
   * alias for "UTF8::is_binary()"
2848
   *
2849
   * @see        UTF8::is_binary()
2850
   *
2851
   * @param mixed $str
2852
   * @param bool  $strict
2853
   *
2854
   * @return bool
2855
   *
2856
   * @deprecated <p>use "UTF8::is_binary()"</p>
2857
   */
2858 4
  public static function isBinary($str, $strict = false): bool
2859
  {
2860 4
    return self::is_binary($str, $strict);
2861
  }
2862
2863
  /**
2864
   * alias for "UTF8::is_bom()"
2865
   *
2866
   * @see        UTF8::is_bom()
2867
   *
2868
   * @param string $utf8_chr
2869
   *
2870
   * @return bool
2871
   *
2872
   * @deprecated <p>use "UTF8::is_bom()"</p>
2873
   */
2874 2
  public static function isBom(string $utf8_chr): bool
2875
  {
2876 2
    return self::is_bom($utf8_chr);
2877
  }
2878
2879
  /**
2880
   * alias for "UTF8::is_html()"
2881
   *
2882
   * @see        UTF8::is_html()
2883
   *
2884
   * @param string $str
2885
   *
2886
   * @return bool
2887
   *
2888
   * @deprecated <p>use "UTF8::is_html()"</p>
2889
   */
2890 2
  public static function isHtml(string $str): bool
2891
  {
2892 2
    return self::is_html($str);
2893
  }
2894
2895
  /**
2896
   * alias for "UTF8::is_json()"
2897
   *
2898
   * @see        UTF8::is_json()
2899
   *
2900
   * @param string $str
2901
   *
2902
   * @return bool
2903
   *
2904
   * @deprecated <p>use "UTF8::is_json()"</p>
2905
   */
2906
  public static function isJson(string $str): bool
2907
  {
2908
    return self::is_json($str);
2909
  }
2910
2911
  /**
2912
   * alias for "UTF8::is_utf16()"
2913
   *
2914
   * @see        UTF8::is_utf16()
2915
   *
2916
   * @param mixed $str
2917
   *
2918
   * @return int|false
2919
   *                    <strong>false</strong> if is't not UTF16,<br>
2920
   *                    <strong>1</strong> for UTF-16LE,<br>
2921
   *                    <strong>2</strong> for UTF-16BE.
2922
   *
2923
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2924
   */
2925 2
  public static function isUtf16($str)
2926
  {
2927 2
    return self::is_utf16($str);
2928
  }
2929
2930
  /**
2931
   * alias for "UTF8::is_utf32()"
2932
   *
2933
   * @see        UTF8::is_utf32()
2934
   *
2935
   * @param mixed $str
2936
   *
2937
   * @return int|false
2938
   *                   <strong>false</strong> if is't not UTF16,
2939
   *                   <strong>1</strong> for UTF-32LE,
2940
   *                   <strong>2</strong> for UTF-32BE.
2941
   *
2942
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2943
   */
2944 2
  public static function isUtf32($str)
2945
  {
2946 2
    return self::is_utf32($str);
2947
  }
2948
2949
  /**
2950
   * alias for "UTF8::is_utf8()"
2951
   *
2952
   * @see        UTF8::is_utf8()
2953
   *
2954
   * @param string $str
2955
   * @param bool   $strict
2956
   *
2957
   * @return bool
2958
   *
2959
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2960
   */
2961 17
  public static function isUtf8($str, $strict = false): bool
2962
  {
2963 17
    return self::is_utf8($str, $strict);
2964
  }
2965
2966
  /**
2967
   * Returns true if the string contains only alphabetic chars, false otherwise.
2968
   *
2969
   * @param string $str
2970
   *
2971
   * @return bool
2972
   *               Whether or not $str contains only alphabetic chars.
2973
   */
2974 10
  public static function is_alpha(string $str): bool
2975
  {
2976 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2977
  }
2978
2979
  /**
2980
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2981
   *
2982
   * @param string $str
2983
   *
2984
   * @return bool
2985
   *               Whether or not $str contains only alphanumeric chars.
2986
   */
2987 13
  public static function is_alphanumeric(string $str): bool
2988
  {
2989 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2990
  }
2991
2992
  /**
2993
   * Checks if a string is 7 bit ASCII.
2994
   *
2995
   * @param string $str <p>The string to check.</p>
2996
   *
2997
   * @return bool
2998
   *              <strong>true</strong> if it is ASCII<br>
2999
   *              <strong>false</strong> otherwise
3000
   *
3001
   */
3002 202
  public static function is_ascii(string $str): bool
3003
  {
3004 202
    if ('' === $str) {
3005 10
      return true;
3006
    }
3007
3008 201
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3009
  }
3010
3011
  /**
3012
   * Returns true if the string is base64 encoded, false otherwise.
3013
   *
3014
   * @param string $str <p>The input string.</p>
3015
   *
3016
   * @return bool Whether or not $str is base64 encoded.
3017
   */
3018 9
  public static function is_base64($str): bool
3019
  {
3020 9
    if ('' === $str) {
3021 3
      return false;
3022
    }
3023
3024 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
3025 2
      return false;
3026
    }
3027
3028 8
    $base64String = (string)\base64_decode($str, true);
3029
3030 8
    return $base64String && \base64_encode($base64String) === $str;
3031
  }
3032
3033
  /**
3034
   * Check if the input is binary... (is look like a hack).
3035
   *
3036
   * @param mixed $input
3037
   * @param bool  $strict
3038
   *
3039
   * @return bool
3040
   */
3041 40
  public static function is_binary($input, bool $strict = false): bool
3042
  {
3043 40
    $input = (string)$input;
3044 40
    if ('' === $input) {
3045 10
      return false;
3046
    }
3047
3048 40
    if (\preg_match('~^[01]+$~', $input)) {
3049 12
      return true;
3050
    }
3051
3052 40
    $ext = self::get_file_type($input);
3053 40
    if ($ext['type'] === 'binary') {
3054 6
      return true;
3055
    }
3056
3057 38
    $testLength = self::strlen_in_byte($input);
3058 38
    if ($testLength) {
3059 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3060
        self::checkForSupport();
3061
      }
3062
3063 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3064 38
      if (($testNull / $testLength) > 0.256) {
3065 12
        return true;
3066
      }
3067
    }
3068
3069 36
    if ($strict === true) {
3070
3071 36
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3072
        self::checkForSupport();
3073
      }
3074
3075 36
      if (self::$SUPPORT['finfo'] === false) {
3076
        throw new \RuntimeException('ext-fileinfo: is not installed');
3077
      }
3078
3079
      /** @noinspection PhpComposerExtensionStubsInspection */
3080 36
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3081 36
      $finfo_encoding = $finfo->buffer($input);
3082 36
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3083 15
        return true;
3084
      }
3085
3086
    }
3087
3088 31
    return false;
3089
  }
3090
3091
  /**
3092
   * Check if the file is binary.
3093
   *
3094
   * @param string $file
3095
   *
3096
   * @return bool
3097
   */
3098 6
  public static function is_binary_file($file): bool
3099
  {
3100
    // init
3101 6
    $block = '';
3102
3103 6
    $fp = \fopen($file, 'rb');
3104 6
    if (\is_resource($fp)) {
3105 6
      $block = \fread($fp, 512);
3106 6
      \fclose($fp);
3107
    }
3108
3109 6
    if ($block === '') {
3110 2
      return false;
3111
    }
3112
3113 6
    return self::is_binary($block, true);
3114
  }
3115
3116
  /**
3117
   * Returns true if the string contains only whitespace chars, false otherwise.
3118
   *
3119
   * @param string $str
3120
   *
3121
   * @return bool
3122
   *               Whether or not $str contains only whitespace characters.
3123
   */
3124 15
  public static function is_blank(string $str): bool
3125
  {
3126 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3127
  }
3128
3129
  /**
3130
   * Checks if the given string is equal to any "Byte Order Mark".
3131
   *
3132
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3133
   *
3134
   * @param string $str <p>The input string.</p>
3135
   *
3136
   * @return bool
3137
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3138
   */
3139 2
  public static function is_bom($str): bool
3140
  {
3141 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3142 2
      if ($str === $bomString) {
3143 2
        return true;
3144
      }
3145
    }
3146
3147 2
    return false;
3148
  }
3149
3150
  /**
3151
   * Determine whether the string is considered to be empty.
3152
   *
3153
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3154
   * empty() does not generate a warning if the variable does not exist.
3155
   *
3156
   * @param mixed $str
3157
   *
3158
   * @return bool Whether or not $str is empty().
3159
   */
3160
  public static function is_empty($str): bool
3161
  {
3162
    return empty($str);
3163
  }
3164
3165
  /**
3166
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3167
   *
3168
   * @param string $str
3169
   *
3170
   * @return bool
3171
   *               Whether or not $str contains only hexadecimal chars.
3172
   */
3173 13
  public static function is_hexadecimal(string $str): bool
3174
  {
3175 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3176
  }
3177
3178
  /**
3179
   * Check if the string contains any html-tags <lall>.
3180
   *
3181
   * @param string $str <p>The input string.</p>
3182
   *
3183
   * @return bool
3184
   */
3185 3
  public static function is_html(string $str): bool
3186
  {
3187 3
    if ('' === $str) {
3188 3
      return false;
3189
    }
3190
3191
    // init
3192 3
    $matches = [];
3193
3194 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3195
3196 3
    return !(\count($matches) === 0);
3197
  }
3198
3199
  /**
3200
   * Try to check if "$str" is an json-string.
3201
   *
3202
   * @param string $str <p>The input string.</p>
3203
   *
3204
   * @return bool
3205
   */
3206 22
  public static function is_json(string $str): bool
3207
  {
3208 22
    if ('' === $str) {
3209 3
      return false;
3210
    }
3211
3212 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3213
      self::checkForSupport();
3214
    }
3215
3216 21
    if (self::$SUPPORT['json'] === false) {
3217
      throw new \RuntimeException('ext-json: is not installed');
3218
    }
3219
3220 21
    $json = self::json_decode($str);
3221
3222
    /** @noinspection PhpComposerExtensionStubsInspection */
3223
    return (
3224 21
               \is_object($json) === true
3225
               ||
3226 21
               \is_array($json) === true
3227
           )
3228
           &&
3229 21
           \json_last_error() === JSON_ERROR_NONE;
3230
  }
3231
3232
  /**
3233
   * @param string $str
3234
   *
3235
   * @return bool
3236
   */
3237 8
  public static function is_lowercase(string $str): bool
3238
  {
3239 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3240 3
      return true;
3241
    }
3242
3243 5
    return false;
3244
  }
3245
3246
  /**
3247
   * Returns true if the string is serialized, false otherwise.
3248
   *
3249
   * @param string $str
3250
   *
3251
   * @return bool Whether or not $str is serialized.
3252
   */
3253 7
  public static function is_serialized(string $str): bool
3254
  {
3255 7
    if ('' === $str) {
3256 1
      return false;
3257
    }
3258
3259
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3260
    /** @noinspection UnserializeExploitsInspection */
3261 6
    return $str === 'b:0;'
3262
           ||
3263 6
           @\unserialize($str) !== false;
3264
  }
3265
3266
  /**
3267
   * Returns true if the string contains only lower case chars, false
3268
   * otherwise.
3269
   *
3270
   * @param string $str <p>The input string.</p>
3271
   *
3272
   * @return bool
3273
   *               Whether or not $str contains only lower case characters.
3274
   */
3275 8
  public static function is_uppercase(string $str): bool
3276
  {
3277 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3278
  }
3279
3280
  /**
3281
   * Check if the string is UTF-16.
3282
   *
3283
   * @param mixed $str <p>The input string.</p>
3284
   * @param bool  $checkIfStringIsBinary
3285
   *
3286
   * @return int|false
3287
   *                   <strong>false</strong> if is't not UTF-16,<br>
3288
   *                   <strong>1</strong> for UTF-16LE,<br>
3289
   *                   <strong>2</strong> for UTF-16BE.
3290
   */
3291 21
  public static function is_utf16($str, $checkIfStringIsBinary = true)
3292
  {
3293
    // init
3294 21
    $str = (string)$str;
3295 21
    $strChars = [];
3296
3297
    if (
3298 21
        $checkIfStringIsBinary === true
3299
        &&
3300 21
        self::is_binary($str, true) === false
3301
    ) {
3302 2
      return false;
3303
    }
3304
3305 21
    if (self::$SUPPORT['mbstring'] === false) {
3306 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3307
    }
3308
3309 21
    $str = self::remove_bom($str);
3310
3311 21
    $maybeUTF16LE = 0;
3312 21
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3313 21
    if ($test) {
3314 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3315 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3316 14
      if ($test3 === $test) {
3317 14
        if (\count($strChars) === 0) {
3318 14
          $strChars = self::count_chars($str, true);
3319
        }
3320 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3321 14
          if (\in_array($test3char, $strChars, true) === true) {
3322 14
            $maybeUTF16LE++;
3323
          }
3324
        }
3325
      }
3326
    }
3327
3328 21
    $maybeUTF16BE = 0;
3329 21
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3330 21
    if ($test) {
3331 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3332 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3333 14
      if ($test3 === $test) {
3334 14
        if (\count($strChars) === 0) {
3335 6
          $strChars = self::count_chars($str, true);
3336
        }
3337 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3338 14
          if (\in_array($test3char, $strChars, true) === true) {
3339 14
            $maybeUTF16BE++;
3340
          }
3341
        }
3342
      }
3343
    }
3344
3345 21
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3346 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3347 4
        return 1;
3348
      }
3349
3350 6
      return 2;
3351
    }
3352
3353 17
    return false;
3354
  }
3355
3356
  /**
3357
   * Check if the string is UTF-32.
3358
   *
3359
   * @param mixed $str <p>The input string.</p>
3360
   * @param bool  $checkIfStringIsBinary
3361
   *
3362
   * @return int|false
3363
   *                   <strong>false</strong> if is't not UTF-32,<br>
3364
   *                   <strong>1</strong> for UTF-32LE,<br>
3365
   *                   <strong>2</strong> for UTF-32BE.
3366
   */
3367 17
  public static function is_utf32($str, $checkIfStringIsBinary = true)
3368
  {
3369
    // init
3370 17
    $str = (string)$str;
3371 17
    $strChars = [];
3372
3373
    if (
3374 17
        $checkIfStringIsBinary === true
3375
        &&
3376 17
        self::is_binary($str, true) === false
3377
    ) {
3378 2
      return false;
3379
    }
3380
3381 17
    if (self::$SUPPORT['mbstring'] === false) {
3382 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3383
    }
3384
3385 17
    $str = self::remove_bom($str);
3386
3387 17
    $maybeUTF32LE = 0;
3388 17
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3389 17
    if ($test) {
3390 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3391 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3392 10
      if ($test3 === $test) {
3393 10
        if (\count($strChars) === 0) {
3394 10
          $strChars = self::count_chars($str, true);
3395
        }
3396 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3397 10
          if (\in_array($test3char, $strChars, true) === true) {
3398 10
            $maybeUTF32LE++;
3399
          }
3400
        }
3401
      }
3402
    }
3403
3404 17
    $maybeUTF32BE = 0;
3405 17
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3406 17
    if ($test) {
3407 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3408 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3409 10
      if ($test3 === $test) {
3410 10
        if (\count($strChars) === 0) {
3411 6
          $strChars = self::count_chars($str, true);
3412
        }
3413 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3414 10
          if (\in_array($test3char, $strChars, true) === true) {
3415 10
            $maybeUTF32BE++;
3416
          }
3417
        }
3418
      }
3419
    }
3420
3421 17
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3422 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3423 2
        return 1;
3424
      }
3425
3426 2
      return 2;
3427
    }
3428
3429 17
    return false;
3430
  }
3431
3432
  /**
3433
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3434
   *
3435
   * @see    http://hsivonen.iki.fi/php-utf8/
3436
   *
3437
   * @param string|string[] $str    <p>The string to be checked.</p>
3438
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3439
   *
3440
   * @return bool
3441
   */
3442 107
  public static function is_utf8($str, bool $strict = false): bool
3443
  {
3444 107
    if (\is_array($str) === true) {
3445 2
      foreach ($str as $k => $v) {
3446 2
        if (false === self::is_utf8($v, $strict)) {
3447 2
          return false;
3448
        }
3449
      }
3450
3451
      return true;
3452
    }
3453
3454 107
    if ('' === $str) {
3455 12
      return true;
3456
    }
3457
3458 103
    if ($strict === true) {
3459 2
      $isBinary = self::is_binary($str, true);
3460
3461 2
      if ($isBinary && self::is_utf16($str, false) !== false) {
3462 2
        return false;
3463
      }
3464
3465
      if ($isBinary && self::is_utf32($str, false) !== false) {
3466
        return false;
3467
      }
3468
    }
3469
3470 103
    if (self::pcre_utf8_support() !== true) {
3471
3472
      // If even just the first character can be matched, when the /u
3473
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3474
      // invalid, nothing at all will match, even if the string contains
3475
      // some valid sequences
3476
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3477
    }
3478
3479 103
    $mState = 0; // cached expected number of octets after the current octet
3480
    // until the beginning of the next UTF8 character sequence
3481 103
    $mUcs4 = 0; // cached Unicode character
3482 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3483
3484 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3485
      self::checkForSupport();
3486
    }
3487
3488 103
    if (self::$ORD === null) {
3489
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3490
    }
3491
3492 103
    $len = self::strlen_in_byte((string)$str);
3493
    /** @noinspection ForeachInvariantsInspection */
3494 103
    for ($i = 0; $i < $len; $i++) {
3495 103
      $in = self::$ORD[$str[$i]];
3496 103
      if ($mState === 0) {
3497
        // When mState is zero we expect either a US-ASCII character or a
3498
        // multi-octet sequence.
3499 103
        if (0 === (0x80 & $in)) {
3500
          // US-ASCII, pass straight through.
3501 98
          $mBytes = 1;
3502 84
        } elseif (0xC0 === (0xE0 & $in)) {
3503
          // First octet of 2 octet sequence.
3504 75
          $mUcs4 = $in;
3505 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3506 75
          $mState = 1;
3507 75
          $mBytes = 2;
3508 58
        } elseif (0xE0 === (0xF0 & $in)) {
3509
          // First octet of 3 octet sequence.
3510 41
          $mUcs4 = $in;
3511 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3512 41
          $mState = 2;
3513 41
          $mBytes = 3;
3514 30
        } elseif (0xF0 === (0xF8 & $in)) {
3515
          // First octet of 4 octet sequence.
3516 19
          $mUcs4 = $in;
3517 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3518 19
          $mState = 3;
3519 19
          $mBytes = 4;
3520 13
        } elseif (0xF8 === (0xFC & $in)) {
3521
          /* First octet of 5 octet sequence.
3522
          *
3523
          * This is illegal because the encoded codepoint must be either
3524
          * (a) not the shortest form or
3525
          * (b) outside the Unicode range of 0-0x10FFFF.
3526
          * Rather than trying to resynchronize, we will carry on until the end
3527
          * of the sequence and let the later error handling code catch it.
3528
          */
3529 5
          $mUcs4 = $in;
3530 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3531 5
          $mState = 4;
3532 5
          $mBytes = 5;
3533 10
        } elseif (0xFC === (0xFE & $in)) {
3534
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3535 5
          $mUcs4 = $in;
3536 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3537 5
          $mState = 5;
3538 5
          $mBytes = 6;
3539
        } else {
3540
          // Current octet is neither in the US-ASCII range nor a legal first
3541
          // octet of a multi-octet sequence.
3542 103
          return false;
3543
        }
3544
      } else {
3545
        // When mState is non-zero, we expect a continuation of the multi-octet
3546
        // sequence
3547 84
        if (0x80 === (0xC0 & $in)) {
3548
          // Legal continuation.
3549 76
          $shift = ($mState - 1) * 6;
3550 76
          $tmp = $in;
3551 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3552 76
          $mUcs4 |= $tmp;
3553
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3554
          // Unicode code point to be output.
3555 76
          if (0 === --$mState) {
3556
            // Check for illegal sequences and code points.
3557
            //
3558
            // From Unicode 3.1, non-shortest form is illegal
3559
            if (
3560 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3561
                ||
3562 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3563
                ||
3564 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3565
                ||
3566 76
                (4 < $mBytes)
3567
                ||
3568
                // From Unicode 3.2, surrogate characters are illegal.
3569 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3570
                ||
3571
                // Code points outside the Unicode range are illegal.
3572 76
                ($mUcs4 > 0x10FFFF)
3573
            ) {
3574 8
              return false;
3575
            }
3576
            // initialize UTF8 cache
3577 76
            $mState = 0;
3578 76
            $mUcs4 = 0;
3579 76
            $mBytes = 1;
3580
          }
3581
        } else {
3582
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3583
          // Incomplete multi-octet sequence.
3584 36
          return false;
3585
        }
3586
      }
3587
    }
3588
3589 67
    return true;
3590
  }
3591
3592
  /**
3593
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3594
   * Decodes a JSON string
3595
   *
3596
   * @link http://php.net/manual/en/function.json-decode.php
3597
   *
3598
   * @param string $json    <p>
3599
   *                        The <i>json</i> string being decoded.
3600
   *                        </p>
3601
   *                        <p>
3602
   *                        This function only works with UTF-8 encoded strings.
3603
   *                        </p>
3604
   *                        <p>PHP implements a superset of
3605
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3606
   *                        only supports these values when they are nested inside an array or an object.
3607
   *                        </p>
3608
   * @param bool   $assoc   [optional] <p>
3609
   *                        When <b>TRUE</b>, returned objects will be converted into
3610
   *                        associative arrays.
3611
   *                        </p>
3612
   * @param int    $depth   [optional] <p>
3613
   *                        User specified recursion depth.
3614
   *                        </p>
3615
   * @param int    $options [optional] <p>
3616
   *                        Bitmask of JSON decode options. Currently only
3617
   *                        <b>JSON_BIGINT_AS_STRING</b>
3618
   *                        is supported (default is to cast large integers as floats)
3619
   *                        </p>
3620
   *
3621
   * @return mixed
3622
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3623
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3624
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3625
   *                is deeper than the recursion limit.
3626
   */
3627 24
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3628
  {
3629 24
    $json = self::filter($json);
3630
3631 24
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3632
      self::checkForSupport();
3633
    }
3634
3635 24
    if (self::$SUPPORT['json'] === false) {
3636
      throw new \RuntimeException('ext-json: is not installed');
3637
    }
3638
3639
    /** @noinspection PhpComposerExtensionStubsInspection */
3640 24
    $json = \json_decode($json, $assoc, $depth, $options);
3641
3642 24
    return $json;
3643
  }
3644
3645
  /**
3646
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3647
   * Returns the JSON representation of a value.
3648
   *
3649
   * @link http://php.net/manual/en/function.json-encode.php
3650
   *
3651
   * @param mixed $value   <p>
3652
   *                       The <i>value</i> being encoded. Can be any type except
3653
   *                       a resource.
3654
   *                       </p>
3655
   *                       <p>
3656
   *                       All string data must be UTF-8 encoded.
3657
   *                       </p>
3658
   *                       <p>PHP implements a superset of
3659
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3660
   *                       only supports these values when they are nested inside an array or an object.
3661
   *                       </p>
3662
   * @param int   $options [optional] <p>
3663
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3664
   *                       <b>JSON_HEX_TAG</b>,
3665
   *                       <b>JSON_HEX_AMP</b>,
3666
   *                       <b>JSON_HEX_APOS</b>,
3667
   *                       <b>JSON_NUMERIC_CHECK</b>,
3668
   *                       <b>JSON_PRETTY_PRINT</b>,
3669
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3670
   *                       <b>JSON_FORCE_OBJECT</b>,
3671
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3672
   *                       constants is described on
3673
   *                       the JSON constants page.
3674
   *                       </p>
3675
   * @param int   $depth   [optional] <p>
3676
   *                       Set the maximum depth. Must be greater than zero.
3677
   *                       </p>
3678
   *
3679
   * @return string|false
3680
   *                      A JSON encoded <strong>string</strong> on success or<br>
3681
   *                      <strong>FALSE</strong> on failure.
3682
   */
3683 5
  public static function json_encode($value, int $options = 0, int $depth = 512)
3684
  {
3685 5
    $value = self::filter($value);
3686
3687 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3688
      self::checkForSupport();
3689
    }
3690
3691 5
    if (self::$SUPPORT['json'] === false) {
3692
      throw new \RuntimeException('ext-json: is not installed');
3693
    }
3694
3695
    /** @noinspection PhpComposerExtensionStubsInspection */
3696 5
    $json = \json_encode($value, $options, $depth);
3697
3698 5
    return $json;
3699
  }
3700
3701
  /**
3702
   * Checks whether JSON is available on the server.
3703
   *
3704
   * @return bool
3705
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3706
   */
3707
  public static function json_loaded(): bool
3708
  {
3709
    return \function_exists('json_decode');
3710
  }
3711
3712
  /**
3713
   * Makes string's first char lowercase.
3714
   *
3715
   * @param string      $str                   <p>The input string</p>
3716
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3717
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3718
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3719
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3720
   *
3721
   * @return string The resulting string.
3722
   */
3723 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3724
  {
3725 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3726 46
    if ($strPartTwo === false) {
3727
      $strPartTwo = '';
3728
    }
3729
3730 46
    $strPartOne = self::strtolower(
3731 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3732 46
        $encoding,
3733 46
        $cleanUtf8,
3734 46
        $lang,
3735 46
        $tryToKeepStringLength
3736
    );
3737
3738 46
    return $strPartOne . $strPartTwo;
3739
  }
3740
3741
  /**
3742
   * alias for "UTF8::lcfirst()"
3743
   *
3744
   * @see UTF8::lcfirst()
3745
   *
3746
   * @param string      $str
3747
   * @param string      $encoding
3748
   * @param bool        $cleanUtf8
3749
   * @param string|null $lang
3750
   * @param bool        $tryToKeepStringLength
3751
   *
3752
   * @return string
3753
   */
3754 2
  public static function lcword(
3755
      string $str,
3756
      string $encoding = 'UTF-8',
3757
      bool $cleanUtf8 = false,
3758
      string $lang = null,
3759
      bool $tryToKeepStringLength = false
3760
  ): string
3761
  {
3762 2
    return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3763
  }
3764
3765
  /**
3766
   * Lowercase for all words in the string.
3767
   *
3768
   * @param string      $str                   <p>The input string.</p>
3769
   * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3770
   * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start a
3771
   *                                           new word.</p>
3772
   * @param string      $encoding              [optional] <p>Set the charset.</p>
3773
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3774
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3775
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3776
   *
3777
   * @return string
3778
   */
3779 2
  public static function lcwords(
3780
      string $str,
3781
      array $exceptions = [],
3782
      string $charlist = '',
3783
      string $encoding = 'UTF-8',
3784
      bool $cleanUtf8 = false,
3785
      string $lang = null,
3786
      bool $tryToKeepStringLength = false
3787
  ): string
3788
  {
3789 2
    if (!$str) {
3790 2
      return '';
3791
    }
3792
3793 2
    $words = self::str_to_words($str, $charlist);
3794 2
    $newWords = [];
3795
3796 2
    if (\count($exceptions) > 0) {
3797 2
      $useExceptions = true;
3798
    } else {
3799 2
      $useExceptions = false;
3800
    }
3801
3802 2
    foreach ($words as $word) {
3803
3804 2
      if (!$word) {
3805 2
        continue;
3806
      }
3807
3808
      if (
3809 2
          $useExceptions === false
3810
          ||
3811
          (
3812 2
              $useExceptions === true
3813
              &&
3814 2
              !\in_array($word, $exceptions, true)
3815
          )
3816
      ) {
3817 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3818
      }
3819
3820 2
      $newWords[] = $word;
3821
    }
3822
3823 2
    return \implode('', $newWords);
3824
  }
3825
3826
  /**
3827
   * alias for "UTF8::lcfirst()"
3828
   *
3829
   * @see UTF8::lcfirst()
3830
   *
3831
   * @param string      $str
3832
   * @param string      $encoding
3833
   * @param bool        $cleanUtf8
3834
   * @param string|null $lang
3835
   * @param bool        $tryToKeepStringLength
3836
   *
3837
   * @return string
3838
   */
3839 5
  public static function lowerCaseFirst(
3840
      string $str,
3841
      string $encoding = 'UTF-8',
3842
      bool $cleanUtf8 = false,
3843
      string $lang = null,
3844
      bool $tryToKeepStringLength = false
3845
  ): string
3846
  {
3847 5
    return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3848
  }
3849
3850
  /**
3851
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3852
   *
3853
   * @param string $str   <p>The string to be trimmed</p>
3854
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3855
   *
3856
   * @return string The string with unwanted characters stripped from the left.
3857
   */
3858 22
  public static function ltrim(string $str = '', $chars = INF): string
3859
  {
3860 22
    if ('' === $str) {
3861 3
      return '';
3862
    }
3863
3864
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3865 21
    if ($chars === INF || !$chars) {
3866 14
      $pattern = "^[\pZ\pC]+";
3867
    } else {
3868 10
      $chars = \preg_quote($chars, '/');
3869 10
      $pattern = "^[$chars]+";
3870
    }
3871
3872 21
    return self::regex_replace($str, $pattern, '', '', '/');
3873
  }
3874
3875
  /**
3876
   * Returns the UTF-8 character with the maximum code point in the given data.
3877
   *
3878
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3879
   *
3880
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3881
   */
3882 2
  public static function max($arg)
3883
  {
3884 2
    if (\is_array($arg) === true) {
3885 2
      $arg = \implode('', $arg);
3886
    }
3887
3888 2
    $codepoints = self::codepoints($arg, false);
3889 2
    if (\count($codepoints) === 0) {
3890 2
      return null;
3891
    }
3892
3893 2
    $codepoint_max = \max($codepoints);
3894
3895 2
    return self::chr($codepoint_max);
3896
  }
3897
3898
  /**
3899
   * Calculates and returns the maximum number of bytes taken by any
3900
   * UTF-8 encoded character in the given string.
3901
   *
3902
   * @param string $str <p>The original Unicode string.</p>
3903
   *
3904
   * @return int Max byte lengths of the given chars.
3905
   */
3906 2
  public static function max_chr_width(string $str): int
3907
  {
3908 2
    $bytes = self::chr_size_list($str);
3909 2
    if (\count($bytes) > 0) {
3910 2
      return (int)\max($bytes);
3911
    }
3912
3913 2
    return 0;
3914
  }
3915
3916
  /**
3917
   * Checks whether mbstring is available on the server.
3918
   *
3919
   * @return bool
3920
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3921
   */
3922 27
  public static function mbstring_loaded(): bool
3923
  {
3924 27
    $return = \extension_loaded('mbstring') ? true : false;
3925
3926 27
    if ($return === true) {
3927 27
      \mb_internal_encoding('UTF-8');
3928
    }
3929
3930 27
    return $return;
3931
  }
3932
3933
  /**
3934
   * Checks whether mbstring "overloaded" is active on the server.
3935
   *
3936
   * @return bool
3937
   */
3938
  private static function mbstring_overloaded(): bool
3939
  {
3940
    /**
3941
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3942
     */
3943
3944
    /** @noinspection PhpComposerExtensionStubsInspection */
3945
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3946
    return \defined('MB_OVERLOAD_STRING')
3947
           &&
3948
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3949
  }
3950
3951
  /**
3952
   * Returns the UTF-8 character with the minimum code point in the given data.
3953
   *
3954
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3955
   *
3956
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3957
   */
3958 2
  public static function min($arg)
3959
  {
3960 2
    if (\is_array($arg) === true) {
3961 2
      $arg = \implode('', $arg);
3962
    }
3963
3964 2
    $codepoints = self::codepoints($arg, false);
3965 2
    if (\count($codepoints) === 0) {
3966 2
      return null;
3967
    }
3968
3969 2
    $codepoint_min = \min($codepoints);
3970
3971 2
    return self::chr($codepoint_min);
3972
  }
3973
3974
  /**
3975
   * alias for "UTF8::normalize_encoding()"
3976
   *
3977
   * @see        UTF8::normalize_encoding()
3978
   *
3979
   * @param mixed $encoding
3980
   * @param mixed $fallback
3981
   *
3982
   * @return mixed
3983
   *
3984
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3985
   */
3986 2
  public static function normalizeEncoding($encoding, $fallback = '')
3987
  {
3988 2
    return self::normalize_encoding($encoding, $fallback);
3989
  }
3990
3991
  /**
3992
   * Normalize the encoding-"name" input.
3993
   *
3994
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3995
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3996
   *
3997
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3998
   */
3999 340
  public static function normalize_encoding($encoding, $fallback = '')
4000
  {
4001 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4002
4003
    // init
4004 340
    $encoding = (string)$encoding;
4005
4006
    if (
4007 340
        !$encoding
4008
        ||
4009 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
4010
        ||
4011 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
4012
    ) {
4013 296
      return $fallback;
4014
    }
4015
4016
    if (
4017 48
        'UTF-8' === $encoding
4018
        ||
4019 48
        'UTF8' === $encoding
4020
    ) {
4021 21
      return 'UTF-8';
4022
    }
4023
4024
    if (
4025 41
        '8BIT' === $encoding
4026
        ||
4027 41
        'BINARY' === $encoding
4028
    ) {
4029
      return 'CP850';
4030
    }
4031
4032
    if (
4033 41
        'HTML' === $encoding
4034
        ||
4035 41
        'HTML-ENTITIES' === $encoding
4036
    ) {
4037 2
      return 'HTML-ENTITIES';
4038
    }
4039
4040 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4041 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4042
    }
4043
4044 6
    if (self::$ENCODINGS === null) {
4045 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4046
    }
4047
4048 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4048
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
4049 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4050
4051 4
      return $encoding;
4052
    }
4053
4054 5
    $encodingOrig = $encoding;
4055 5
    $encoding = \strtoupper($encoding);
4056 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4057
4058
    $equivalences = [
4059 5
        'ISO8859'     => 'ISO-8859-1',
4060
        'ISO88591'    => 'ISO-8859-1',
4061
        'ISO'         => 'ISO-8859-1',
4062
        'LATIN'       => 'ISO-8859-1',
4063
        'LATIN1'      => 'ISO-8859-1', // Western European
4064
        'ISO88592'    => 'ISO-8859-2',
4065
        'LATIN2'      => 'ISO-8859-2', // Central European
4066
        'ISO88593'    => 'ISO-8859-3',
4067
        'LATIN3'      => 'ISO-8859-3', // Southern European
4068
        'ISO88594'    => 'ISO-8859-4',
4069
        'LATIN4'      => 'ISO-8859-4', // Northern European
4070
        'ISO88595'    => 'ISO-8859-5',
4071
        'ISO88596'    => 'ISO-8859-6', // Greek
4072
        'ISO88597'    => 'ISO-8859-7',
4073
        'ISO88598'    => 'ISO-8859-8', // Hebrew
4074
        'ISO88599'    => 'ISO-8859-9',
4075
        'LATIN5'      => 'ISO-8859-9', // Turkish
4076
        'ISO885911'   => 'ISO-8859-11',
4077
        'TIS620'      => 'ISO-8859-11', // Thai
4078
        'ISO885910'   => 'ISO-8859-10',
4079
        'LATIN6'      => 'ISO-8859-10', // Nordic
4080
        'ISO885913'   => 'ISO-8859-13',
4081
        'LATIN7'      => 'ISO-8859-13', // Baltic
4082
        'ISO885914'   => 'ISO-8859-14',
4083
        'LATIN8'      => 'ISO-8859-14', // Celtic
4084
        'ISO885915'   => 'ISO-8859-15',
4085
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4086
        'ISO885916'   => 'ISO-8859-16',
4087
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4088
        'CP1250'      => 'WINDOWS-1250',
4089
        'WIN1250'     => 'WINDOWS-1250',
4090
        'WINDOWS1250' => 'WINDOWS-1250',
4091
        'CP1251'      => 'WINDOWS-1251',
4092
        'WIN1251'     => 'WINDOWS-1251',
4093
        'WINDOWS1251' => 'WINDOWS-1251',
4094
        'CP1252'      => 'WINDOWS-1252',
4095
        'WIN1252'     => 'WINDOWS-1252',
4096
        'WINDOWS1252' => 'WINDOWS-1252',
4097
        'CP1253'      => 'WINDOWS-1253',
4098
        'WIN1253'     => 'WINDOWS-1253',
4099
        'WINDOWS1253' => 'WINDOWS-1253',
4100
        'CP1254'      => 'WINDOWS-1254',
4101
        'WIN1254'     => 'WINDOWS-1254',
4102
        'WINDOWS1254' => 'WINDOWS-1254',
4103
        'CP1255'      => 'WINDOWS-1255',
4104
        'WIN1255'     => 'WINDOWS-1255',
4105
        'WINDOWS1255' => 'WINDOWS-1255',
4106
        'CP1256'      => 'WINDOWS-1256',
4107
        'WIN1256'     => 'WINDOWS-1256',
4108
        'WINDOWS1256' => 'WINDOWS-1256',
4109
        'CP1257'      => 'WINDOWS-1257',
4110
        'WIN1257'     => 'WINDOWS-1257',
4111
        'WINDOWS1257' => 'WINDOWS-1257',
4112
        'CP1258'      => 'WINDOWS-1258',
4113
        'WIN1258'     => 'WINDOWS-1258',
4114
        'WINDOWS1258' => 'WINDOWS-1258',
4115
        'UTF16'       => 'UTF-16',
4116
        'UTF32'       => 'UTF-32',
4117
        'UTF8'        => 'UTF-8',
4118
        'UTF'         => 'UTF-8',
4119
        'UTF7'        => 'UTF-7',
4120
        '8BIT'        => 'CP850',
4121
        'BINARY'      => 'CP850',
4122
    ];
4123
4124 5
    if (!empty($equivalences[$encodingUpperHelper])) {
4125 4
      $encoding = $equivalences[$encodingUpperHelper];
4126
    }
4127
4128 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4129
4130 5
    return $encoding;
4131
  }
4132
4133
  /**
4134
   * Standardize line ending to unix-like.
4135
   *
4136
   * @param string $str
4137
   *
4138
   * @return string
4139
   */
4140 5
  public static function normalize_line_ending(string $str): string
4141
  {
4142 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4143
  }
4144
4145
  /**
4146
   * Normalize some MS Word special characters.
4147
   *
4148
   * @param string $str <p>The string to be normalized.</p>
4149
   *
4150
   * @return string
4151
   */
4152 40
  public static function normalize_msword(string $str): string
4153
  {
4154 40
    if ('' === $str) {
4155 2
      return '';
4156
    }
4157
4158 40
    static $UTF8_MSWORD_KEYS_CACHE = null;
4159 40
    static $UTF8_MSWORD_VALUES_CACHE = null;
4160
4161 40
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4162
4163 1
      if (self::$UTF8_MSWORD === null) {
4164 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4165
      }
4166
4167 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4167
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4168 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4168
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4169
    }
4170
4171 40
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4172
  }
4173
4174
  /**
4175
   * Normalize the whitespace.
4176
   *
4177
   * @param string $str                     <p>The string to be normalized.</p>
4178
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4179
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4180
   *                                        bidirectional text chars.</p>
4181
   *
4182
   * @return string
4183
   */
4184 88
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4185
  {
4186 88
    if ('' === $str) {
4187 9
      return '';
4188
    }
4189
4190 88
    static $WHITESPACE_CACHE = [];
4191 88
    $cacheKey = (int)$keepNonBreakingSpace;
4192
4193 88
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4194
4195 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4196
4197 2
      if ($keepNonBreakingSpace === true) {
4198 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4199
      }
4200
4201 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4202
    }
4203
4204 88
    if ($keepBidiUnicodeControls === false) {
4205 88
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4206
4207 88
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4208 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4209
      }
4210
4211 88
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4212
    }
4213
4214 88
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4215
  }
4216
4217
  /**
4218
   * Calculates Unicode code point of the given UTF-8 encoded character.
4219
   *
4220
   * INFO: opposite to UTF8::chr()
4221
   *
4222
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4223
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4224
   *
4225
   * @return int
4226
   *             Unicode code point of the given character,<br>
4227
   *             0 on invalid UTF-8 byte sequence.
4228
   */
4229 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4230
  {
4231
    // init
4232 35
    $chr = (string)$chr;
4233
4234 35
    static $CHAR_CACHE = [];
4235
4236
    // save the original string
4237 35
    $chr_orig = $chr;
4238
4239 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4240 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4241
4242
      // check again, if it's still not UTF-8
4243 4
      if ($encoding !== 'UTF-8') {
4244 4
        $chr = self::encode($encoding, $chr);
4245
      }
4246
    }
4247
4248 35
    $cacheKey = $chr_orig . $encoding;
4249 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4250 35
      return $CHAR_CACHE[$cacheKey];
4251
    }
4252
4253 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4254
      self::checkForSupport();
4255
    }
4256
4257 12
    if (self::$SUPPORT['intlChar'] === true) {
4258
      /** @noinspection PhpComposerExtensionStubsInspection */
4259 11
      $code = \IntlChar::ord($chr);
4260 11
      if ($code) {
4261 10
        return $CHAR_CACHE[$cacheKey] = $code;
4262
      }
4263
    }
4264
4265
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4266 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
4267 6
    $code = $chr ? $chr[1] : 0;
4268
4269 6
    if (0xF0 <= $code && isset($chr[4])) {
4270
      /** @noinspection UnnecessaryCastingInspection */
4271
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4272
    }
4273
4274 6
    if (0xE0 <= $code && isset($chr[3])) {
4275
      /** @noinspection UnnecessaryCastingInspection */
4276 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4277
    }
4278
4279 6
    if (0xC0 <= $code && isset($chr[2])) {
4280
      /** @noinspection UnnecessaryCastingInspection */
4281 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4282
    }
4283
4284 6
    return $CHAR_CACHE[$cacheKey] = $code;
4285
  }
4286
4287
  /**
4288
   * Parses the string into an array (into the the second parameter).
4289
   *
4290
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4291
   *          if the second parameter is not set!
4292
   *
4293
   * @link http://php.net/manual/en/function.parse-str.php
4294
   *
4295
   * @param string $str       <p>The input string.</p>
4296
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4297
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4298
   *
4299
   * @return bool
4300
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4301
   */
4302 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4303
  {
4304 2
    if ($cleanUtf8 === true) {
4305 2
      $str = self::clean($str);
4306
    }
4307
4308 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4309
      self::checkForSupport();
4310
    }
4311
4312 2
    if (self::$SUPPORT['mbstring'] === true) {
4313 2
      $return = \mb_parse_str($str, $result);
4314
4315 2
      return !($return === false || empty($result));
4316
    }
4317
4318
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4319
    \parse_str($str, $result);
4320
4321
    return !empty($result);
4322
  }
4323
4324
  /**
4325
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4326
   *
4327
   * @return bool
4328
   *              <strong>true</strong> if support is available,<br>
4329
   *              <strong>false</strong> otherwise.
4330
   */
4331 103
  public static function pcre_utf8_support(): bool
4332
  {
4333
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4334 103
    return (bool)@\preg_match('//u', '');
4335
  }
4336
4337
  /**
4338
   * Create an array containing a range of UTF-8 characters.
4339
   *
4340
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4341
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4342
   *
4343
   * @return string[]
4344
   */
4345 2
  public static function range($var1, $var2): array
4346
  {
4347 2
    if (!$var1 || !$var2) {
4348 2
      return [];
4349
    }
4350
4351 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4352
      self::checkForSupport();
4353
    }
4354
4355 2
    if (self::$SUPPORT['ctype'] === false) {
4356
      throw new \RuntimeException('ext-ctype: is not installed');
4357
    }
4358
4359
    /** @noinspection PhpComposerExtensionStubsInspection */
4360 2
    if (\ctype_digit((string)$var1)) {
4361 2
      $start = (int)$var1;
4362 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4363
      $start = (int)self::hex_to_int($var1);
4364
    } else {
4365 2
      $start = self::ord($var1);
4366
    }
4367
4368 2
    if (!$start) {
4369
      return [];
4370
    }
4371
4372
    /** @noinspection PhpComposerExtensionStubsInspection */
4373 2
    if (\ctype_digit((string)$var2)) {
4374 2
      $end = (int)$var2;
4375 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4376
      $end = (int)self::hex_to_int($var2);
4377
    } else {
4378 2
      $end = self::ord($var2);
4379
    }
4380
4381 2
    if (!$end) {
4382
      return [];
4383
    }
4384
4385 2
    return \array_map(
4386
        [
4387 2
            self::class,
4388
            'chr',
4389
        ],
4390 2
        \range($start, $end)
4391
    );
4392
  }
4393
4394
  /**
4395
   * Multi decode html entity & fix urlencoded-win1252-chars.
4396
   *
4397
   * e.g:
4398
   * 'test+test'                     => 'test+test'
4399
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4400
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4401
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4402
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4403
   * 'Düsseldorf'                   => 'Düsseldorf'
4404
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4405
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4406
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4407
   *
4408
   * @param string $str          <p>The input string.</p>
4409
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4410
   *
4411
   * @return string
4412
   */
4413 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4414
  {
4415 3
    if ('' === $str) {
4416 2
      return '';
4417
    }
4418
4419 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4420 3
    if (\preg_match($pattern, $str)) {
4421 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4422
    }
4423
4424 3
    $flags = ENT_QUOTES | ENT_HTML5;
4425
4426
    do {
4427 3
      $str_compare = $str;
4428
4429 3
      $str = self::fix_simple_utf8(
4430 3
          \rawurldecode(
4431 3
              self::html_entity_decode(
4432 3
                  self::to_utf8($str),
4433 3
                  $flags
4434
              )
4435
          )
4436
      );
4437
4438 3
    } while ($multi_decode === true && $str_compare !== $str);
4439
4440 3
    return $str;
4441
  }
4442
4443
  /**
4444
   * @param array $strings
4445
   * @param bool  $removeEmptyValues
4446
   * @param int   $removeShortValues
4447
   *
4448
   * @return array
4449
   */
4450 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4451
  {
4452
    // init
4453 2
    $return = [];
4454
4455 2
    foreach ($strings as $str) {
4456
      if (
4457 2
          $removeShortValues !== null
4458
          &&
4459 2
          self::strlen($str) <= $removeShortValues
4460
      ) {
4461 2
        continue;
4462
      }
4463
4464
      if (
4465 2
          $removeEmptyValues === true
4466
          &&
4467 2
          \trim($str) === ''
4468
      ) {
4469 2
        continue;
4470
      }
4471
4472 2
      $return[] = $str;
4473
    }
4474
4475 2
    return $return;
4476
  }
4477
4478
  /**
4479
   * Replaces all occurrences of $pattern in $str by $replacement.
4480
   *
4481
   * @param string $str         <p>The input string.</p>
4482
   * @param string $pattern     <p>The regular expression pattern.</p>
4483
   * @param string $replacement <p>The string to replace with.</p>
4484
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4485
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4486
   *
4487
   * @return string
4488
   */
4489 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4490
  {
4491 291
    if ($options === 'msr') {
4492 9
      $options = 'ms';
4493
    }
4494
4495
    // fallback
4496 291
    if (!$delimiter) {
4497
      $delimiter = '/';
4498
    }
4499
4500 291
    $str = (string)\preg_replace(
4501 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4502 291
        $replacement,
4503 291
        $str
4504
    );
4505
4506 291
    return $str;
4507
  }
4508
4509
  /**
4510
   * alias for "UTF8::remove_bom()"
4511
   *
4512
   * @see        UTF8::remove_bom()
4513
   *
4514
   * @param string $str
4515
   *
4516
   * @return string
4517
   *
4518
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4519
   */
4520
  public static function removeBOM(string $str): string
4521
  {
4522
    return self::remove_bom($str);
4523
  }
4524
4525
  /**
4526
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4527
   *
4528
   * @param string $str <p>The input string.</p>
4529
   *
4530
   * @return string String without UTF-BOM.
4531
   */
4532 81
  public static function remove_bom(string $str): string
4533
  {
4534 81
    if ('' === $str) {
4535 7
      return '';
4536
    }
4537
4538 81
    $strLength = self::strlen_in_byte($str);
4539 81
    foreach (self::$BOM as $bomString => $bomByteLength) {
4540 81
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4541 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4542 10
        if ($strTmp === false) {
4543
          return '';
4544
        }
4545
4546 10
        $strLength -= $bomByteLength;
4547
4548 81
        $str = (string)$strTmp;
4549
      }
4550
    }
4551
4552 81
    return $str;
4553
  }
4554
4555
  /**
4556
   * Removes duplicate occurrences of a string in another string.
4557
   *
4558
   * @param string          $str  <p>The base string.</p>
4559
   * @param string|string[] $what <p>String to search for in the base string.</p>
4560
   *
4561
   * @return string The result string with removed duplicates.
4562
   */
4563 2
  public static function remove_duplicates(string $str, $what = ' '): string
4564
  {
4565 2
    if (\is_string($what) === true) {
4566 2
      $what = [$what];
4567
    }
4568
4569 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4570
      /** @noinspection ForeachSourceInspection */
4571 2
      foreach ($what as $item) {
4572 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4573
      }
4574
    }
4575
4576 2
    return $str;
4577
  }
4578
4579
  /**
4580
   * Remove html via "strip_tags()" from the string.
4581
   *
4582
   * @param string $str
4583
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4584
   *                              not be stripped. Default: null
4585
   *                              </p>
4586
   *
4587
   * @return string
4588
   */
4589 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4590
  {
4591 6
    return \strip_tags($str, $allowableTags);
4592
  }
4593
4594
  /**
4595
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4596
   *
4597
   * @param string $str
4598
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4599
   *
4600
   * @return string
4601
   */
4602 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4603
  {
4604 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4605
  }
4606
4607
  /**
4608
   * Remove invisible characters from a string.
4609
   *
4610
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4611
   *
4612
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4613
   *
4614
   * @param string $str
4615
   * @param bool   $url_encoded
4616
   * @param string $replacement
4617
   *
4618
   * @return string
4619
   */
4620 114
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4621
  {
4622
    // init
4623 114
    $non_displayables = [];
4624
4625
    // every control character except newline (dec 10),
4626
    // carriage return (dec 13) and horizontal tab (dec 09)
4627 114
    if ($url_encoded) {
4628 114
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4629 114
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4630
    }
4631
4632 114
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4633
4634
    do {
4635 114
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4636 114
    } while ($count !== 0);
4637
4638 114
    return $str;
4639
  }
4640
4641
  /**
4642
   * Returns a new string with the prefix $substring removed, if present.
4643
   *
4644
   * @param string $str
4645
   * @param string $substring <p>The prefix to remove.</p>
4646
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4647
   *
4648
   * @return string String without the prefix $substring.
4649
   */
4650 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4651
  {
4652 12
    if (self::str_starts_with($str, $substring)) {
4653
4654 6
      return (string)self::substr(
4655 6
          $str,
4656 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4656
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4657 6
          null,
4658 6
          $encoding
4659
      );
4660
    }
4661
4662 6
    return $str;
4663
  }
4664
4665
  /**
4666
   * Returns a new string with the suffix $substring removed, if present.
4667
   *
4668
   * @param string $str
4669
   * @param string $substring <p>The suffix to remove.</p>
4670
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4671
   *
4672
   * @return string String having a $str without the suffix $substring.
4673
   */
4674 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4675
  {
4676 12
    if (self::str_ends_with($str, $substring)) {
4677
4678 6
      return (string)self::substr(
4679 6
          $str,
4680 6
          0,
4681 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4682
      );
4683
    }
4684
4685 6
    return $str;
4686
  }
4687
4688
  /**
4689
   * Replaces all occurrences of $search in $str by $replacement.
4690
   *
4691
   * @param string $str           <p>The input string.</p>
4692
   * @param string $search        <p>The needle to search for.</p>
4693
   * @param string $replacement   <p>The string to replace with.</p>
4694
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4695
   *
4696
   * @return string String after the replacements.
4697
   */
4698 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4699
  {
4700 29
    if ($caseSensitive) {
4701 22
      return self::str_replace($search, $replacement, $str);
4702
    }
4703
4704 7
    return self::str_ireplace($search, $replacement, $str);
4705
  }
4706
4707
  /**
4708
   * Replaces all occurrences of $search in $str by $replacement.
4709
   *
4710
   * @param string       $str           <p>The input string.</p>
4711
   * @param array        $search        <p>The elements to search for.</p>
4712
   * @param string|array $replacement   <p>The string to replace with.</p>
4713
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4714
   *
4715
   * @return string String after the replacements.
4716
   */
4717 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4718
  {
4719 30
    if ($caseSensitive) {
4720 23
      return self::str_replace($search, $replacement, $str);
4721
    }
4722
4723 7
    return self::str_ireplace($search, $replacement, $str);
4724
  }
4725
4726
  /**
4727
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4728
   *
4729
   * @param string $str                <p>The input string</p>
4730
   * @param string $replacementChar    <p>The replacement character.</p>
4731
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4732
   *
4733
   * @return string
4734
   */
4735 64
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4736
  {
4737 64
    if ('' === $str) {
4738 9
      return '';
4739
    }
4740
4741 64
    if ($processInvalidUtf8 === true) {
4742 64
      $replacementCharHelper = $replacementChar;
4743 64
      if ($replacementChar === '') {
4744 64
        $replacementCharHelper = 'none';
4745
      }
4746
4747 64
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4748
        self::checkForSupport();
4749
      }
4750
4751 64
      if (self::$SUPPORT['mbstring'] === false) {
4752
        // if there is no native support for "mbstring",
4753
        // then we need to clean the string before ...
4754
        $str = self::clean($str);
4755
      }
4756
4757
      // always fallback via symfony polyfill
4758 64
      $save = \mb_substitute_character();
4759 64
      \mb_substitute_character($replacementCharHelper);
4760 64
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4761 64
      \mb_substitute_character($save);
4762
4763 64
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4764 64
        $str = $strTmp;
4765
      } else {
4766
        $str = '';
4767
      }
4768
    }
4769
4770 64
    return str_replace(
4771
        [
4772 64
            "\xEF\xBF\xBD",
4773
            '�',
4774
        ],
4775
        [
4776 64
            $replacementChar,
4777 64
            $replacementChar,
4778
        ],
4779 64
        $str
4780
    );
4781
  }
4782
4783
  /**
4784
   * Strip whitespace or other characters from end of a UTF-8 string.
4785
   *
4786
   * @param string $str   <p>The string to be trimmed.</p>
4787
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4788
   *
4789
   * @return string The string with unwanted characters stripped from the right.
4790
   */
4791 22
  public static function rtrim(string $str = '', $chars = INF): string
4792
  {
4793 22
    if ('' === $str) {
4794 3
      return '';
4795
    }
4796
4797
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4798 21
    if ($chars === INF || !$chars) {
4799 16
      $pattern = "[\pZ\pC]+\$";
4800
    } else {
4801 8
      $chars = \preg_quote($chars, '/');
4802 8
      $pattern = "[$chars]+\$";
4803
    }
4804
4805 21
    return self::regex_replace($str, $pattern, '', '', '/');
4806
  }
4807
4808
  /**
4809
   * rxClass
4810
   *
4811
   * @param string $s
4812
   * @param string $class
4813
   *
4814
   * @return string
4815
   */
4816 47
  private static function rxClass(string $s, string $class = ''): string
4817
  {
4818 47
    static $RX_CLASSS_CACHE = [];
4819
4820 47
    $cacheKey = $s . $class;
4821
4822 47
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4823 35
      return $RX_CLASSS_CACHE[$cacheKey];
4824
    }
4825
4826
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4827 16
    $class = [$class];
4828
4829
    /** @noinspection SuspiciousLoopInspection */
4830 16
    foreach (self::str_split($s) as $s) {
4831 15
      if ('-' === $s) {
4832
        $class[0] = '-' . $class[0];
4833 15
      } elseif (!isset($s[2])) {
4834 15
        $class[0] .= \preg_quote($s, '/');
4835 1
      } elseif (1 === self::strlen($s)) {
4836 1
        $class[0] .= $s;
4837
      } else {
4838 15
        $class[] = $s;
4839
      }
4840
    }
4841
4842 16
    if ($class[0]) {
4843 16
      $class[0] = '[' . $class[0] . ']';
4844
    }
4845
4846 16
    if (1 === \count($class)) {
4847 16
      $return = $class[0];
4848
    } else {
4849
      $return = '(?:' . \implode('|', $class) . ')';
4850
    }
4851
4852 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4853
4854 16
    return $return;
4855
  }
4856
4857
  /**
4858
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4859
   */
4860 2
  public static function showSupport()
4861
  {
4862 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4863
      self::checkForSupport();
4864
    }
4865
4866 2
    echo '<pre>';
4867 2
    foreach (self::$SUPPORT as $key => $value) {
4868 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4869
    }
4870 2
    echo '</pre>';
4871 2
  }
4872
4873
  /**
4874
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4875
   *
4876
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4877
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4878
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4879
   *
4880
   * @return string The HTML numbered entity.
4881
   */
4882 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4883
  {
4884 2
    if ('' === $char) {
4885 2
      return '';
4886
    }
4887
4888
    if (
4889 2
        $keepAsciiChars === true
4890
        &&
4891 2
        self::is_ascii($char) === true
4892
    ) {
4893 2
      return $char;
4894
    }
4895
4896 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4897 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4898
    }
4899
4900 2
    return '&#' . self::ord($char, $encoding) . ';';
4901
  }
4902
4903
  /**
4904
   * @param string $str
4905
   * @param int    $tabLength
4906
   *
4907
   * @return string
4908
   */
4909 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4910
  {
4911 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4912
  }
4913
4914
  /**
4915
   * Convert a string to an array of Unicode characters.
4916
   *
4917
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4918
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4919
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4920
   *
4921
   * @return string[] An array containing chunks of the string.
4922
   */
4923 86
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4924
  {
4925 86
    if ($length <= 0) {
4926 3
      return [];
4927
    }
4928
4929 85
    if (\is_array($str) === true) {
4930 2
      foreach ($str as $k => $v) {
4931 2
        $str[$k] = self::split($v, $length);
4932
      }
4933
4934 2
      return $str;
4935
    }
4936
4937
    // init
4938 85
    $str = (string)$str;
4939
4940 85
    if ('' === $str) {
4941 13
      return [];
4942
    }
4943
4944
    // init
4945 82
    $ret = [];
4946
4947 82
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4948
      self::checkForSupport();
4949
    }
4950
4951 82
    if ($cleanUtf8 === true) {
4952 18
      $str = self::clean($str);
4953
    }
4954
4955 82
    if (self::$SUPPORT['pcre_utf8'] === true) {
4956
4957 78
      \preg_match_all('/./us', $str, $retArray);
4958 78
      if (isset($retArray[0])) {
4959 78
        $ret = $retArray[0];
4960
      }
4961 78
      unset($retArray);
4962
4963
    } else {
4964
4965
      // fallback
4966
4967 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4968
        self::checkForSupport();
4969
      }
4970
4971 8
      $len = self::strlen_in_byte($str);
4972
4973
      /** @noinspection ForeachInvariantsInspection */
4974 8
      for ($i = 0; $i < $len; $i++) {
4975
4976 8
        if (($str[$i] & "\x80") === "\x00") {
4977
4978 8
          $ret[] = $str[$i];
4979
4980
        } elseif (
4981 8
            isset($str[$i + 1])
4982
            &&
4983 8
            ($str[$i] & "\xE0") === "\xC0"
4984
        ) {
4985
4986 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4987 4
            $ret[] = $str[$i] . $str[$i + 1];
4988
4989 4
            $i++;
4990
          }
4991
4992
        } elseif (
4993 6
            isset($str[$i + 2])
4994
            &&
4995 6
            ($str[$i] & "\xF0") === "\xE0"
4996
        ) {
4997
4998
          if (
4999 6
              ($str[$i + 1] & "\xC0") === "\x80"
5000
              &&
5001 6
              ($str[$i + 2] & "\xC0") === "\x80"
5002
          ) {
5003 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5004
5005 6
            $i += 2;
5006
          }
5007
5008
        } elseif (
5009
            isset($str[$i + 3])
5010
            &&
5011
            ($str[$i] & "\xF8") === "\xF0"
5012
        ) {
5013
5014
          if (
5015
              ($str[$i + 1] & "\xC0") === "\x80"
5016
              &&
5017
              ($str[$i + 2] & "\xC0") === "\x80"
5018
              &&
5019
              ($str[$i + 3] & "\xC0") === "\x80"
5020
          ) {
5021
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5022
5023
            $i += 3;
5024
          }
5025
5026
        }
5027
      }
5028
    }
5029
5030 82
    if ($length > 1) {
5031 11
      $ret = \array_chunk($ret, $length);
5032
5033 11
      return \array_map(
5034
          function ($item) {
5035 11
            return \implode('', $item);
5036 11
          }, $ret
5037
      );
5038
    }
5039
5040 75
    if (isset($ret[0]) && $ret[0] === '') {
5041
      return [];
5042
    }
5043
5044 75
    return $ret;
5045
  }
5046
5047
  /**
5048
   * Returns a camelCase version of the string. Trims surrounding spaces,
5049
   * capitalizes letters following digits, spaces, dashes and underscores,
5050
   * and removes spaces, dashes, as well as underscores.
5051
   *
5052
   * @param string      $str                   <p>The input string.</p>
5053
   * @param string      $encoding              [optional] <p>Default: UTF-8</p>
5054
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5055
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5056
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5057
   *
5058
   * @return string
5059
   */
5060 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
5061
  {
5062 32
    $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5063 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
5064
5065 32
    $str = (string)\preg_replace_callback(
5066 32
        '/[-_\s]+(.)?/u',
5067
        function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5068 27
          if (isset($match[1])) {
5069 27
            return UTF8::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5070
          }
5071
5072 1
          return '';
5073 32
        },
5074 32
        $str
5075
    );
5076
5077 32
    $str = (string)\preg_replace_callback(
5078 32
        '/[\d]+(.)?/u',
5079
        function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5080 6
          return UTF8::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5081 32
        },
5082 32
        $str
5083
    );
5084
5085 32
    return $str;
5086
  }
5087
5088
  /**
5089
   * Returns the string with the first letter of each word capitalized,
5090
   * except for when the word is a name which shouldn't be capitalized.
5091
   *
5092
   * @param string $str
5093
   *
5094
   * @return string String with $str capitalized.
5095
   */
5096 1
  public static function str_capitalize_name(string $str): string
5097
  {
5098 1
    $str = self::collapse_whitespace($str);
5099
5100 1
    $str = self::str_capitalize_name_helper($str, ' ');
5101 1
    $str = self::str_capitalize_name_helper($str, '-');
5102
5103 1
    return $str;
5104
  }
5105
5106
  /**
5107
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5108
   *
5109
   * @param string $names
5110
   * @param string $delimiter
5111
   * @param string $encoding
5112
   *
5113
   * @return string
5114
   */
5115 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5116
  {
5117
    // init
5118 1
    $namesArray = \explode($delimiter, $names);
5119
5120 1
    if ($namesArray === false) {
5121
      return '';
5122
    }
5123
5124
    $specialCases = [
5125 1
        'names'    => [
5126
            'ab',
5127
            'af',
5128
            'al',
5129
            'and',
5130
            'ap',
5131
            'bint',
5132
            'binte',
5133
            'da',
5134
            'de',
5135
            'del',
5136
            'den',
5137
            'der',
5138
            'di',
5139
            'dit',
5140
            'ibn',
5141
            'la',
5142
            'mac',
5143
            'nic',
5144
            'of',
5145
            'ter',
5146
            'the',
5147
            'und',
5148
            'van',
5149
            'von',
5150
            'y',
5151
            'zu',
5152
        ],
5153
        'prefixes' => [
5154
            'al-',
5155
            "d'",
5156
            'ff',
5157
            "l'",
5158
            'mac',
5159
            'mc',
5160
            'nic',
5161
        ],
5162
    ];
5163
5164 1
    foreach ($namesArray as &$name) {
5165 1
      if (\in_array($name, $specialCases['names'], true)) {
5166 1
        continue;
5167
      }
5168
5169 1
      $continue = false;
5170
5171 1
      if ($delimiter == '-') {
5172 1
        foreach ($specialCases['names'] as $beginning) {
5173 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5174 1
            $continue = true;
5175
          }
5176
        }
5177
      }
5178
5179 1
      foreach ($specialCases['prefixes'] as $beginning) {
5180 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5181 1
          $continue = true;
5182
        }
5183
      }
5184
5185 1
      if ($continue) {
5186 1
        continue;
5187
      }
5188
5189 1
      $name = self::str_upper_first($name);
5190
    }
5191
5192 1
    return \implode($delimiter, $namesArray);
5193
  }
5194
5195
  /**
5196
   * Returns true if the string contains $needle, false otherwise. By default
5197
   * the comparison is case-sensitive, but can be made insensitive by setting
5198
   * $caseSensitive to false.
5199
   *
5200
   * @param string $haystack      <p>The input string.</p>
5201
   * @param string $needle        <p>Substring to look for.</p>
5202
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5203
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5204
   *
5205
   * @return bool Whether or not $haystack contains $needle.
5206
   */
5207 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5208
  {
5209 106
    if ('' === $haystack || '' === $needle) {
5210 1
      return false;
5211
    }
5212
5213
    // only a fallback to prevent BC in the api ...
5214 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5215 2
      $encoding = (string)$caseSensitive;
5216
    }
5217
5218 105
    if ($caseSensitive) {
5219 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5220
    }
5221
5222 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5223
  }
5224
5225
  /**
5226
   * Returns true if the string contains all $needles, false otherwise. By
5227
   * default the comparison is case-sensitive, but can be made insensitive by
5228
   * setting $caseSensitive to false.
5229
   *
5230
   * @param string $haystack      <p>The input string.</p>
5231
   * @param array  $needles       <p>SubStrings to look for.</p>
5232
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5233
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5234
   *
5235
   * @return bool Whether or not $haystack contains $needle.
5236
   */
5237 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5238
  {
5239 44
    if ('' === $haystack) {
5240
      return false;
5241
    }
5242
5243 44
    if (empty($needles)) {
5244 1
      return false;
5245
    }
5246
5247
    // only a fallback to prevent BC in the api ...
5248 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5249 1
      $encoding = (string)$caseSensitive;
5250
    }
5251
5252 43
    foreach ($needles as $needle) {
5253 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5254 43
        return false;
5255
      }
5256
    }
5257
5258 24
    return true;
5259
  }
5260
5261
  /**
5262
   * Returns true if the string contains any $needles, false otherwise. By
5263
   * default the comparison is case-sensitive, but can be made insensitive by
5264
   * setting $caseSensitive to false.
5265
   *
5266
   * @param string $haystack      <p>The input string.</p>
5267
   * @param array  $needles       <p>SubStrings to look for.</p>
5268
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5269
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5270
   *
5271
   * @return bool
5272
   *               Whether or not $str contains $needle.
5273
   */
5274 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5275
  {
5276 43
    if (empty($needles)) {
5277 1
      return false;
5278
    }
5279
5280 42
    foreach ($needles as $needle) {
5281 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5282 42
        return true;
5283
      }
5284
    }
5285
5286 18
    return false;
5287
  }
5288
5289
  /**
5290
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5291
   * inserted before uppercase characters (with the exception of the first
5292
   * character of the string), and in place of spaces as well as underscores.
5293
   *
5294
   * @param string $str      <p>The input string.</p>
5295
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5296
   *
5297
   * @return string
5298
   */
5299 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5300
  {
5301 19
    return self::str_delimit($str, '-', $encoding);
5302
  }
5303
5304
  /**
5305
   * Returns a lowercase and trimmed string separated by the given delimiter.
5306
   * Delimiters are inserted before uppercase characters (with the exception
5307
   * of the first character of the string), and in place of spaces, dashes,
5308
   * and underscores. Alpha delimiters are not converted to lowercase.
5309
   *
5310
   * @param string      $str                           <p>The input string.</p>
5311
   * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5312
   * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5313
   * @param bool        $cleanUtf8                     [optional] <p>Remove non UTF-8 chars from the string.</p>
5314
   * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5315
   *                                                   tr</p>
5316
   * @param bool        $tryToKeepStringLength         [optional] <p>true === try to keep the string length: e.g. ẞ ->
5317
   *                                                   ß</p>
5318
   *
5319
   * @return string
5320
   */
5321 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
5322
  {
5323 49
    $str = self::trim($str);
5324
5325 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5326
5327 49
    $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5328
5329 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5330
  }
5331
5332
  /**
5333
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5334
   *
5335
   * @param string $str <p>The input string.</p>
5336
   *
5337
   * @return false|string
5338
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5339
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5340
   */
5341 32
  public static function str_detect_encoding($str)
5342
  {
5343
    // init
5344 32
    $str = (string)$str;
5345
5346
    //
5347
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5348
    //
5349
5350 32
    if (self::is_binary($str, true) === true) {
5351
5352 11
      $isUtf16 = self::is_utf16($str, false);
5353 11
      if ($isUtf16 === 1) {
5354 2
        return 'UTF-16LE';
5355
      }
5356 11
      if ($isUtf16 === 2) {
5357 2
        return 'UTF-16BE';
5358
      }
5359
5360 9
      $isUtf32 = self::is_utf32($str, false);
5361 9
      if ($isUtf32 === 1) {
5362
        return 'UTF-32LE';
5363
      }
5364 9
      if ($isUtf32 === 2) {
5365
        return 'UTF-32BE';
5366
      }
5367
5368
      // is binary but not "UTF-16" or "UTF-32"
5369 9
      return false;
5370
    }
5371
5372
    //
5373
    // 2.) simple check for ASCII chars
5374
    //
5375
5376 27
    if (self::is_ascii($str) === true) {
5377 9
      return 'ASCII';
5378
    }
5379
5380
    //
5381
    // 3.) simple check for UTF-8 chars
5382
    //
5383
5384 27
    if (self::is_utf8($str) === true) {
5385 19
      return 'UTF-8';
5386
    }
5387
5388
    //
5389
    // 4.) check via "mb_detect_encoding()"
5390
    //
5391
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5392
5393
    $detectOrder = [
5394 16
        'ISO-8859-1',
5395
        'ISO-8859-2',
5396
        'ISO-8859-3',
5397
        'ISO-8859-4',
5398
        'ISO-8859-5',
5399
        'ISO-8859-6',
5400
        'ISO-8859-7',
5401
        'ISO-8859-8',
5402
        'ISO-8859-9',
5403
        'ISO-8859-10',
5404
        'ISO-8859-13',
5405
        'ISO-8859-14',
5406
        'ISO-8859-15',
5407
        'ISO-8859-16',
5408
        'WINDOWS-1251',
5409
        'WINDOWS-1252',
5410
        'WINDOWS-1254',
5411
        'CP932',
5412
        'CP936',
5413
        'CP950',
5414
        'CP866',
5415
        'CP850',
5416
        'CP51932',
5417
        'CP50220',
5418
        'CP50221',
5419
        'CP50222',
5420
        'ISO-2022-JP',
5421
        'ISO-2022-KR',
5422
        'JIS',
5423
        'JIS-ms',
5424
        'EUC-CN',
5425
        'EUC-JP',
5426
    ];
5427
5428 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5429
      self::checkForSupport();
5430
    }
5431
5432 16
    if (self::$SUPPORT['mbstring'] === true) {
5433
      // info: do not use the symfony polyfill here
5434 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5435 16
      if ($encoding) {
5436 16
        return $encoding;
5437
      }
5438
    }
5439
5440
    //
5441
    // 5.) check via "iconv()"
5442
    //
5443
5444
    if (self::$ENCODINGS === null) {
5445
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5446
    }
5447
5448
    foreach (self::$ENCODINGS as $encodingTmp) {
5449
      # INFO: //IGNORE but still throw notice
5450
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5451
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5452
        return $encodingTmp;
5453
      }
5454
    }
5455
5456
    return false;
5457
  }
5458
5459
  /**
5460
   * Check if the string ends with the given substring.
5461
   *
5462
   * @param string $haystack <p>The string to search in.</p>
5463
   * @param string $needle   <p>The substring to search for.</p>
5464
   *
5465
   * @return bool
5466
   */
5467 40
  public static function str_ends_with(string $haystack, string $needle): bool
5468
  {
5469 40
    if ('' === $haystack || '' === $needle) {
5470 4
      return false;
5471
    }
5472
5473 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5474
  }
5475
5476
  /**
5477
   * Returns true if the string ends with any of $substrings, false otherwise.
5478
   *
5479
   * - case-sensitive
5480
   *
5481
   * @param string   $str        <p>The input string.</p>
5482
   * @param string[] $substrings <p>Substrings to look for.</p>
5483
   *
5484
   * @return bool Whether or not $str ends with $substring.
5485
   */
5486 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5487
  {
5488 7
    if (empty($substrings)) {
5489
      return false;
5490
    }
5491
5492 7
    foreach ($substrings as $substring) {
5493 7
      if (self::str_ends_with($str, $substring)) {
5494 7
        return true;
5495
      }
5496
    }
5497
5498 6
    return false;
5499
  }
5500
5501
  /**
5502
   * Ensures that the string begins with $substring. If it doesn't, it's
5503
   * prepended.
5504
   *
5505
   * @param string $str       <p>The input string.</p>
5506
   * @param string $substring <p>The substring to add if not present.</p>
5507
   *
5508
   * @return string
5509
   */
5510 10
  public static function str_ensure_left(string $str, string $substring): string
5511
  {
5512 10
    if (!self::str_starts_with($str, $substring)) {
5513 4
      $str = $substring . $str;
5514
    }
5515
5516 10
    return $str;
5517
  }
5518
5519
  /**
5520
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5521
   *
5522
   * @param string $str       <p>The input string.</p>
5523
   * @param string $substring <p>The substring to add if not present.</p>
5524
   *
5525
   * @return string
5526
   */
5527 10
  public static function str_ensure_right(string $str, string $substring): string
5528
  {
5529 10
    if (!self::str_ends_with($str, $substring)) {
5530 4
      $str .= $substring;
5531
    }
5532
5533 10
    return $str;
5534
  }
5535
5536
  /**
5537
   * Capitalizes the first word of the string, replaces underscores with
5538
   * spaces, and strips '_id'.
5539
   *
5540
   * @param string $str
5541
   *
5542
   * @return string
5543
   */
5544 3
  public static function str_humanize($str): string
5545
  {
5546 3
    $str = self::str_replace(
5547
        [
5548 3
            '_id',
5549
            '_',
5550
        ],
5551
        [
5552 3
            '',
5553
            ' ',
5554
        ],
5555 3
        $str
5556
    );
5557
5558 3
    return self::ucfirst(self::trim($str));
5559
  }
5560
5561
  /**
5562
   * Check if the string ends with the given substring, case insensitive.
5563
   *
5564
   * @param string $haystack <p>The string to search in.</p>
5565
   * @param string $needle   <p>The substring to search for.</p>
5566
   *
5567
   * @return bool
5568
   */
5569 12
  public static function str_iends_with(string $haystack, string $needle): bool
5570
  {
5571 12
    if ('' === $haystack || '' === $needle) {
5572 2
      return false;
5573
    }
5574
5575 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5576 12
      return true;
5577
    }
5578
5579 8
    return false;
5580
  }
5581
5582
  /**
5583
   * Returns true if the string ends with any of $substrings, false otherwise.
5584
   *
5585
   * - case-insensitive
5586
   *
5587
   * @param string   $str        <p>The input string.</p>
5588
   * @param string[] $substrings <p>Substrings to look for.</p>
5589
   *
5590
   * @return bool Whether or not $str ends with $substring.
5591
   */
5592 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5593
  {
5594 4
    if (empty($substrings)) {
5595
      return false;
5596
    }
5597
5598 4
    foreach ($substrings as $substring) {
5599 4
      if (self::str_iends_with($str, $substring)) {
5600 4
        return true;
5601
      }
5602
    }
5603
5604
    return false;
5605
  }
5606
5607
  /**
5608
   * Returns the index of the first occurrence of $needle in the string,
5609
   * and false if not found. Accepts an optional offset from which to begin
5610
   * the search.
5611
   *
5612
   * @param string $str      <p>The input string.</p>
5613
   * @param string $needle   <p>Substring to look for.</p>
5614
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5615
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5616
   *
5617
   * @return int|false
5618
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5619
   */
5620 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5621
  {
5622 2
    return self::stripos(
5623 2
        $str,
5624 2
        $needle,
5625 2
        $offset,
5626 2
        $encoding
5627
    );
5628
  }
5629
5630
  /**
5631
   * Returns the index of the last occurrence of $needle in the string,
5632
   * and false if not found. Accepts an optional offset from which to begin
5633
   * the search. Offsets may be negative to count from the last character
5634
   * in the string.
5635
   *
5636
   * @param string $str      <p>The input string.</p>
5637
   * @param string $needle   <p>Substring to look for.</p>
5638
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5639
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5640
   *
5641
   * @return int|false
5642
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5643
   */
5644 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5645
  {
5646 2
    return self::strripos(
5647 2
        $str,
5648 2
        $needle,
5649 2
        $offset,
5650 2
        $encoding
5651
    );
5652
  }
5653
5654
  /**
5655
   * Returns the index of the first occurrence of $needle in the string,
5656
   * and false if not found. Accepts an optional offset from which to begin
5657
   * the search.
5658
   *
5659
   * @param string $str      <p>The input string.</p>
5660
   * @param string $needle   <p>Substring to look for.</p>
5661
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5662
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5663
   *
5664
   * @return int|false
5665
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5666
   */
5667 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5668
  {
5669 12
    return self::strpos(
5670 12
        $str,
5671 12
        $needle,
5672 12
        $offset,
5673 12
        $encoding
5674
    );
5675
  }
5676
5677
  /**
5678
   * Returns the index of the last occurrence of $needle in the string,
5679
   * and false if not found. Accepts an optional offset from which to begin
5680
   * the search. Offsets may be negative to count from the last character
5681
   * in the string.
5682
   *
5683
   * @param string $str      <p>The input string.</p>
5684
   * @param string $needle   <p>Substring to look for.</p>
5685
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5686
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5687
   *
5688
   * @return int|false
5689
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5690
   */
5691 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5692
  {
5693 12
    return self::strrpos(
5694 12
        $str,
5695 12
        $needle,
5696 12
        $offset,
5697 12
        $encoding
5698
    );
5699
  }
5700
5701
  /**
5702
   * Inserts $substring into the string at the $index provided.
5703
   *
5704
   * @param string $str       <p>The input string.</p>
5705
   * @param string $substring <p>String to be inserted.</p>
5706
   * @param int    $index     <p>The index at which to insert the substring.</p>
5707
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5708
   *
5709
   * @return string
5710
   */
5711 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5712
  {
5713 8
    $len = self::strlen($str, $encoding);
5714
5715 8
    if ($index > $len) {
5716 1
      return $str;
5717
    }
5718
5719 7
    $start = self::substr($str, 0, $index, $encoding);
5720 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5720
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5721
5722 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $start of type string|false can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5722
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5722
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
5723
  }
5724
5725
  /**
5726
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5727
   *
5728
   * @link  http://php.net/manual/en/function.str-ireplace.php
5729
   *
5730
   * @param mixed $search  <p>
5731
   *                       Every replacement with search array is
5732
   *                       performed on the result of previous replacement.
5733
   *                       </p>
5734
   * @param mixed $replace <p>
5735
   *                       </p>
5736
   * @param mixed $subject <p>
5737
   *                       If subject is an array, then the search and
5738
   *                       replace is performed with every entry of
5739
   *                       subject, and the return value is an array as
5740
   *                       well.
5741
   *                       </p>
5742
   * @param int   $count   [optional] <p>
5743
   *                       The number of matched and replaced needles will
5744
   *                       be returned in count which is passed by
5745
   *                       reference.
5746
   *                       </p>
5747
   *
5748
   * @return mixed A string or an array of replacements.
5749
   */
5750 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5751
  {
5752 41
    $search = (array)$search;
5753
5754
    /** @noinspection AlterInForeachInspection */
5755 41
    foreach ($search as &$s) {
5756 41
      if ('' === $s .= '') {
5757 7
        $s = '/^(?<=.)$/';
5758
      } else {
5759 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5760
      }
5761
    }
5762
5763 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5764 41
    $count = $replace; // used as reference parameter
5765
5766 41
    return $subject;
5767
  }
5768
5769
  /**
5770
   * Check if the string starts with the given substring, case insensitive.
5771
   *
5772
   * @param string $haystack <p>The string to search in.</p>
5773
   * @param string $needle   <p>The substring to search for.</p>
5774
   *
5775
   * @return bool
5776
   */
5777 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5778
  {
5779 12
    if ('' === $haystack || '' === $needle) {
5780 2
      return false;
5781
    }
5782
5783 12
    if (self::stripos($haystack, $needle) === 0) {
5784 12
      return true;
5785
    }
5786
5787 4
    return false;
5788
  }
5789
5790
  /**
5791
   * Returns true if the string begins with any of $substrings, false otherwise.
5792
   *
5793
   * - case-insensitive
5794
   *
5795
   * @param string $str        <p>The input string.</p>
5796
   * @param array  $substrings <p>Substrings to look for.</p>
5797
   *
5798
   * @return bool Whether or not $str starts with $substring.
5799
   */
5800 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5801
  {
5802 4
    if ('' === $str) {
5803
      return false;
5804
    }
5805
5806 4
    if (empty($substrings)) {
5807
      return false;
5808
    }
5809
5810 4
    foreach ($substrings as $substring) {
5811 4
      if (self::str_istarts_with($str, $substring)) {
5812 4
        return true;
5813
      }
5814
    }
5815
5816
    return false;
5817
  }
5818
5819
  /**
5820
   * Gets the substring after the first occurrence of a separator.
5821
   *
5822
   * @param string $str       <p>The input string.</p>
5823
   * @param string $separator <p>The string separator.</p>
5824
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5825
   *
5826
   * @return string
5827
   */
5828 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5829
  {
5830
    if (
5831 1
        $separator === ''
5832
        ||
5833 1
        $str === ''
5834
    ) {
5835 1
      return '';
5836
    }
5837
5838 1
    $offset = self::str_iindex_first($str, $separator);
5839 1
    if ($offset === false) {
5840 1
      return '';
5841
    }
5842
5843 1
    return (string)self::substr(
5844 1
        $str,
5845 1
        $offset + self::strlen($separator, $encoding),
5846 1
        null,
5847 1
        $encoding
5848
    );
5849
  }
5850
5851
  /**
5852
   * Gets the substring after the last occurrence of a separator.
5853
   *
5854
   * @param string $str       <p>The input string.</p>
5855
   * @param string $separator <p>The string separator.</p>
5856
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5857
   *
5858
   * @return string
5859
   */
5860 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5861
  {
5862
    if (
5863 1
        $separator === ''
5864
        ||
5865 1
        $str === ''
5866
    ) {
5867 1
      return '';
5868
    }
5869
5870 1
    $offset = self::str_iindex_last($str, $separator);
5871 1
    if ($offset === false) {
5872 1
      return '';
5873
    }
5874
5875 1
    return (string)self::substr(
5876 1
        $str,
5877 1
        $offset + self::strlen($separator, $encoding),
5878 1
        null,
5879 1
        $encoding
5880
    );
5881
  }
5882
5883
  /**
5884
   * Gets the substring before the first occurrence of a separator.
5885
   *
5886
   * @param string $str       <p>The input string.</p>
5887
   * @param string $separator <p>The string separator.</p>
5888
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5889
   *
5890
   * @return string
5891
   */
5892 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5893
  {
5894
    if (
5895 1
        $separator === ''
5896
        ||
5897 1
        $str === ''
5898
    ) {
5899 1
      return '';
5900
    }
5901
5902 1
    $offset = self::str_iindex_first($str, $separator);
5903 1
    if ($offset === false) {
5904 1
      return '';
5905
    }
5906
5907 1
    return (string)self::substr($str, 0, $offset, $encoding);
5908
  }
5909
5910
  /**
5911
   * Gets the substring before the last occurrence of a separator.
5912
   *
5913
   * @param string $str       <p>The input string.</p>
5914
   * @param string $separator <p>The string separator.</p>
5915
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5916
   *
5917
   * @return string
5918
   */
5919 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5920
  {
5921
    if (
5922 1
        $separator === ''
5923
        ||
5924 1
        $str === ''
5925
    ) {
5926 1
      return '';
5927
    }
5928
5929 1
    $offset = self::str_iindex_last($str, $separator);
5930 1
    if ($offset === false) {
5931 1
      return '';
5932
    }
5933
5934 1
    return (string)self::substr($str, 0, $offset, $encoding);
5935
  }
5936
5937
  /**
5938
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5939
   *
5940
   * @param string $str          <p>The input string.</p>
5941
   * @param string $needle       <p>The string to look for.</p>
5942
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5943
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5944
   *
5945
   * @return string
5946
   */
5947 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5948
  {
5949
    if (
5950 2
        '' === $needle
5951
        ||
5952 2
        '' === $str
5953
    ) {
5954 2
      return '';
5955
    }
5956
5957 2
    $part = self::stristr(
5958 2
        $str,
5959 2
        $needle,
5960 2
        $beforeNeedle,
5961 2
        $encoding
5962
    );
5963 2
    if (false === $part) {
5964 2
      return '';
5965
    }
5966
5967 2
    return $part;
5968
  }
5969
5970
  /**
5971
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5972
   *
5973
   * @param string $str          <p>The input string.</p>
5974
   * @param string $needle       <p>The string to look for.</p>
5975
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5976
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5977
   *
5978
   * @return string
5979
   */
5980 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5981
  {
5982
    if (
5983 1
        '' === $needle
5984
        ||
5985 1
        '' === $str
5986
    ) {
5987 1
      return '';
5988
    }
5989
5990 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5991 1
    if (false === $part) {
5992 1
      return '';
5993
    }
5994
5995 1
    return $part;
5996
  }
5997
5998
  /**
5999
   * Returns the last $n characters of the string.
6000
   *
6001
   * @param string $str      <p>The input string.</p>
6002
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6003
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6004
   *
6005
   * @return string
6006
   */
6007 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6008
  {
6009 12
    if ($n <= 0) {
6010 4
      return '';
6011
    }
6012
6013 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
6014
6015 8
    return ($returnTmp === false ? '' : $returnTmp);
6016
  }
6017
6018
  /**
6019
   * Limit the number of characters in a string.
6020
   *
6021
   * @param string $str      <p>The input string.</p>
6022
   * @param int    $length   [optional] <p>Default: 100</p>
6023
   * @param string $strAddOn [optional] <p>Default: …</p>
6024
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6025
   *
6026
   * @return string
6027
   */
6028 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6029
  {
6030 2
    if ('' === $str) {
6031 2
      return '';
6032
    }
6033
6034 2
    if ($length <= 0) {
6035 2
      return '';
6036
    }
6037
6038 2
    if (self::strlen($str, $encoding) <= $length) {
6039 2
      return $str;
6040
    }
6041
6042 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type string|false can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6042
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
6043
  }
6044
6045
  /**
6046
   * Limit the number of characters in a string, but also after the next word.
6047
   *
6048
   * @param string $str      <p>The input string.</p>
6049
   * @param int    $length   [optional] <p>Default: 100</p>
6050
   * @param string $strAddOn [optional] <p>Default: …</p>
6051
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6052
   *
6053
   * @return string
6054
   */
6055 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6056
  {
6057 6
    if ('' === $str) {
6058 2
      return '';
6059
    }
6060
6061 6
    if ($length <= 0) {
6062 2
      return '';
6063
    }
6064
6065 6
    if (self::strlen($str, $encoding) <= $length) {
6066 2
      return $str;
6067
    }
6068
6069 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6070 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type string|false can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6070
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
6071
    }
6072
6073 3
    $str = (string)self::substr($str, 0, $length, $encoding);
6074 3
    $array = \explode(' ', $str);
6075 3
    \array_pop($array);
6076 3
    $new_str = \implode(' ', $array);
6077
6078 3
    if ($new_str === '') {
6079 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6080
    } else {
6081 3
      $str = $new_str . $strAddOn;
6082
    }
6083
6084 3
    return $str;
6085
  }
6086
6087
  /**
6088
   * Returns the longest common prefix between the string and $otherStr.
6089
   *
6090
   * @param string $str      <p>The input sting.</p>
6091
   * @param string $otherStr <p>Second string for comparison.</p>
6092
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6093
   *
6094
   * @return string
6095
   */
6096 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6097
  {
6098 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6099
6100 10
    $longestCommonPrefix = '';
6101 10
    for ($i = 0; $i < $maxLength; $i++) {
6102 8
      $char = self::substr($str, $i, 1, $encoding);
6103
6104 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6105 6
        $longestCommonPrefix .= $char;
6106
      } else {
6107 6
        break;
6108
      }
6109
    }
6110
6111 10
    return $longestCommonPrefix;
6112
  }
6113
6114
  /**
6115
   * Returns the longest common substring between the string and $otherStr.
6116
   * In the case of ties, it returns that which occurs first.
6117
   *
6118
   * @param string $str
6119
   * @param string $otherStr <p>Second string for comparison.</p>
6120
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6121
   *
6122
   * @return string String with its $str being the longest common substring.
6123
   */
6124 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6125
  {
6126
    // Uses dynamic programming to solve
6127
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6128 11
    $strLength = self::strlen($str, $encoding);
6129 11
    $otherLength = self::strlen($otherStr, $encoding);
6130
6131
    // Return if either string is empty
6132 11
    if ($strLength == 0 || $otherLength == 0) {
6133 2
      return '';
6134
    }
6135
6136 9
    $len = 0;
6137 9
    $end = 0;
6138 9
    $table = \array_fill(
6139 9
        0,
6140 9
        $strLength + 1,
6141 9
        \array_fill(0, $otherLength + 1, 0)
6142
    );
6143
6144 9
    for ($i = 1; $i <= $strLength; $i++) {
6145 9
      for ($j = 1; $j <= $otherLength; $j++) {
6146 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6147 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6148
6149 9
        if ($strChar == $otherChar) {
6150 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6151 8
          if ($table[$i][$j] > $len) {
6152 8
            $len = $table[$i][$j];
6153 8
            $end = $i;
6154
          }
6155
        } else {
6156 9
          $table[$i][$j] = 0;
6157
        }
6158
      }
6159
    }
6160
6161 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6162
6163 9
    return ($returnTmp === false ? '' : $returnTmp);
6164
  }
6165
6166
  /**
6167
   * Returns the longest common suffix between the string and $otherStr.
6168
   *
6169
   * @param string $str
6170
   * @param string $otherStr <p>Second string for comparison.</p>
6171
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6172
   *
6173
   * @return string
6174
   */
6175 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6176
  {
6177 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6178
6179 10
    $longestCommonSuffix = '';
6180 10
    for ($i = 1; $i <= $maxLength; $i++) {
6181 8
      $char = self::substr($str, -$i, 1, $encoding);
6182
6183 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6184 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type string|false can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6184
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6185
      } else {
6186 6
        break;
6187
      }
6188
    }
6189
6190 10
    return $longestCommonSuffix;
6191
  }
6192
6193
  /**
6194
   * Returns true if $str matches the supplied pattern, false otherwise.
6195
   *
6196
   * @param string $str     <p>The input string.</p>
6197
   * @param string $pattern <p>Regex pattern to match against.</p>
6198
   *
6199
   * @return bool Whether or not $str matches the pattern.
6200
   */
6201 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6202
  {
6203 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6204 87
      return true;
6205
    }
6206
6207 39
    return false;
6208
  }
6209
6210
  /**
6211
   * Returns whether or not a character exists at an index. Offsets may be
6212
   * negative to count from the last character in the string. Implements
6213
   * part of the ArrayAccess interface.
6214
   *
6215
   * @param string $str      <p>The input string.</p>
6216
   * @param int    $offset   <p>The index to check.</p>
6217
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6218
   *
6219
   *
6220
   * @return bool Whether or not the index exists.
6221
   */
6222 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6223
  {
6224
    // init
6225 6
    $length = self::strlen($str, $encoding);
6226
6227 6
    if ($offset >= 0) {
6228 3
      return ($length > $offset);
6229
    }
6230
6231 3
    return ($length >= \abs($offset));
6232
  }
6233
6234
  /**
6235
   * Returns the character at the given index. Offsets may be negative to
6236
   * count from the last character in the string. Implements part of the
6237
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6238
   * does not exist.
6239
   *
6240
   * @param string $str      <p>The input string.</p>
6241
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6242
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6243
   *
6244
   * @return string The character at the specified index.
6245
   *
6246
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6247
   */
6248 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6249
  {
6250
    // init
6251 2
    $length = self::strlen($str);
6252
6253
    if (
6254 2
        ($index >= 0 && $length <= $index)
6255
        ||
6256 2
        $length < \abs($index)
6257
    ) {
6258 1
      throw new \OutOfBoundsException('No character exists at the index');
6259
    }
6260
6261 1
    return self::char_at($str, $index, $encoding);
6262
  }
6263
6264
  /**
6265
   * Pad a UTF-8 string to given length with another string.
6266
   *
6267
   * @param string $str        <p>The input string.</p>
6268
   * @param int    $pad_length <p>The length of return string.</p>
6269
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6270
   * @param int    $pad_type   [optional] <p>
6271
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6272
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6273
   *                           </p>
6274
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6275
   *
6276
   * @return string Returns the padded string.
6277
   */
6278 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6279
  {
6280 41
    if ('' === $str) {
6281
      return '';
6282
    }
6283
6284 41
    if ($pad_type !== (int)$pad_type) {
6285 13
      if ($pad_type == 'left') {
6286 3
        $pad_type = STR_PAD_LEFT;
6287 10
      } elseif ($pad_type == 'right') {
6288 6
        $pad_type = STR_PAD_RIGHT;
6289 4
      } elseif ($pad_type == 'both') {
6290 3
        $pad_type = STR_PAD_BOTH;
6291
      } else {
6292 1
        throw new \InvalidArgumentException(
6293 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6294
        );
6295
      }
6296
    }
6297
6298 40
    $str_length = self::strlen($str, $encoding);
6299
6300
    if (
6301 40
        $pad_length > 0
6302
        &&
6303 40
        $pad_length >= $str_length
6304
    ) {
6305 39
      $ps_length = self::strlen($pad_string, $encoding);
6306
6307 39
      $diff = ($pad_length - $str_length);
6308
6309
      switch ($pad_type) {
6310 39
        case STR_PAD_LEFT:
6311 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6312 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6313 13
          $post = '';
6314 13
          break;
6315
6316 29
        case STR_PAD_BOTH:
6317 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6318 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6319 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6320 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6321 14
          break;
6322
6323 18
        case STR_PAD_RIGHT:
6324
        default:
6325 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6326 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6327 18
          $pre = '';
6328
      }
6329
6330 39
      return $pre . $str . $post;
6331
    }
6332
6333 4
    return $str;
6334
  }
6335
6336
  /**
6337
   * Returns a new string of a given length such that both sides of the
6338
   * string are padded. Alias for pad() with a $padType of 'both'.
6339
   *
6340
   * @param string $str
6341
   * @param int    $length   <p>Desired string length after padding.</p>
6342
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6343
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6344
   *
6345
   * @return string String with padding applied.
6346
   */
6347 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6348
  {
6349 11
    $padding = $length - self::strlen($str, $encoding);
6350
6351 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6352
  }
6353
6354
  /**
6355
   * Returns a new string of a given length such that the beginning of the
6356
   * string is padded. Alias for pad() with a $padType of 'left'.
6357
   *
6358
   * @param string $str
6359
   * @param int    $length   <p>Desired string length after padding.</p>
6360
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6361
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6362
   *
6363
   * @return string String with left padding.
6364
   */
6365 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6366
  {
6367 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6368
  }
6369
6370
  /**
6371
   * Returns a new string of a given length such that the end of the string
6372
   * is padded. Alias for pad() with a $padType of 'right'.
6373
   *
6374
   * @param string $str
6375
   * @param int    $length   <p>Desired string length after padding.</p>
6376
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6377
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6378
   *
6379
   * @return string String with right padding.
6380
   */
6381 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6382
  {
6383 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6384
  }
6385
6386
  /**
6387
   * Repeat a string.
6388
   *
6389
   * @param string $str        <p>
6390
   *                           The string to be repeated.
6391
   *                           </p>
6392
   * @param int    $multiplier <p>
6393
   *                           Number of time the input string should be
6394
   *                           repeated.
6395
   *                           </p>
6396
   *                           <p>
6397
   *                           multiplier has to be greater than or equal to 0.
6398
   *                           If the multiplier is set to 0, the function
6399
   *                           will return an empty string.
6400
   *                           </p>
6401
   *
6402
   * @return string The repeated string.
6403
   */
6404 9
  public static function str_repeat(string $str, int $multiplier): string
6405
  {
6406 9
    $str = self::filter($str);
6407
6408 9
    return \str_repeat($str, $multiplier);
6409
  }
6410
6411
  /**
6412
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6413
   *
6414
   * Replace all occurrences of the search string with the replacement string
6415
   *
6416
   * @link http://php.net/manual/en/function.str-replace.php
6417
   *
6418
   * @param mixed $search  <p>
6419
   *                       The value being searched for, otherwise known as the needle.
6420
   *                       An array may be used to designate multiple needles.
6421
   *                       </p>
6422
   * @param mixed $replace <p>
6423
   *                       The replacement value that replaces found search
6424
   *                       values. An array may be used to designate multiple replacements.
6425
   *                       </p>
6426
   * @param mixed $subject <p>
6427
   *                       The string or array being searched and replaced on,
6428
   *                       otherwise known as the haystack.
6429
   *                       </p>
6430
   *                       <p>
6431
   *                       If subject is an array, then the search and
6432
   *                       replace is performed with every entry of
6433
   *                       subject, and the return value is an array as
6434
   *                       well.
6435
   *                       </p>
6436
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6437
   *
6438
   * @return mixed This function returns a string or an array with the replaced values.
6439
   */
6440 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6441
  {
6442 92
    return \str_replace($search, $replace, $subject, $count);
6443
  }
6444
6445
  /**
6446
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6447
   *
6448
   * @param string $str         <p>The input string.</p>
6449
   * @param string $search      <p>The string to search for.</p>
6450
   * @param string $replacement <p>The replacement.</p>
6451
   *
6452
   * @return string String after the replacements.
6453
   */
6454 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6455
  {
6456 16
    return self::regex_replace(
6457 16
        $str,
6458 16
        '^' . \preg_quote($search, '/'),
6459 16
        self::str_replace('\\', '\\\\', $replacement)
6460
    );
6461
  }
6462
6463
  /**
6464
   * Replaces all occurrences of $search from the ending of string with $replacement.
6465
   *
6466
   * @param string $str         <p>The input string.</p>
6467
   * @param string $search      <p>The string to search for.</p>
6468
   * @param string $replacement <p>The replacement.</p>
6469
   *
6470
   * @return string String after the replacements.
6471
   */
6472 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6473
  {
6474 16
    return self::regex_replace(
6475 16
        $str,
6476 16
        \preg_quote($search, '/') . '$',
6477 16
        self::str_replace('\\', '\\\\', $replacement)
6478
    );
6479
  }
6480
6481
  /**
6482
   * Replace the first "$search"-term with the "$replace"-term.
6483
   *
6484
   * @param string $search
6485
   * @param string $replace
6486
   * @param string $subject
6487
   *
6488
   * @return string
6489
   */
6490 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6491
  {
6492 2
    $pos = self::strpos($subject, $search);
6493 2
    if ($pos !== false) {
6494 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6494
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6495
    }
6496
6497 2
    return $subject;
6498
  }
6499
6500
  /**
6501
   * Replace the last "$search"-term with the "$replace"-term.
6502
   *
6503
   * @param string $search
6504
   * @param string $replace
6505
   * @param string $subject
6506
   *
6507
   * @return string
6508
   */
6509 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6510
  {
6511 2
    $pos = self::strrpos($subject, $search);
6512 2
    if ($pos !== false) {
6513 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6513
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6514
    }
6515
6516 2
    return $subject;
6517
  }
6518
6519
  /**
6520
   * Shuffles all the characters in the string.
6521
   *
6522
   * PS: uses random algorithm which is weak for cryptography purposes
6523
   *
6524
   * @param string $str <p>The input string</p>
6525
   *
6526
   * @return string The shuffled string.
6527
   */
6528 5
  public static function str_shuffle(string $str): string
6529
  {
6530 5
    $indexes = \range(0, self::strlen($str) - 1);
6531
    /** @noinspection NonSecureShuffleUsageInspection */
6532 5
    \shuffle($indexes);
6533
6534 5
    $shuffledStr = '';
6535 5
    foreach ($indexes as $i) {
6536 5
      $shuffledStr .= self::substr($str, $i, 1);
6537
    }
6538
6539 5
    return $shuffledStr;
6540
  }
6541
6542
  /**
6543
   * Returns the substring beginning at $start, and up to, but not including
6544
   * the index specified by $end. If $end is omitted, the function extracts
6545
   * the remaining string. If $end is negative, it is computed from the end
6546
   * of the string.
6547
   *
6548
   * @param string $str
6549
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6550
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6551
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6552
   *
6553
   * @return string|false
6554
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6555
   *                     characters long, <b>FALSE</b> will be returned.
6556
   */
6557 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6558
  {
6559 18
    if ($end === null) {
6560 6
      $length = self::strlen($str);
6561 12
    } elseif ($end >= 0 && $end <= $start) {
6562 4
      return '';
6563 8
    } elseif ($end < 0) {
6564 2
      $length = self::strlen($str) + $end - $start;
6565
    } else {
6566 6
      $length = $end - $start;
6567
    }
6568
6569 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6569
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6570
  }
6571
6572
  /**
6573
   * Convert a string to e.g.: "snake_case"
6574
   *
6575
   * @param string $str
6576
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6577
   *
6578
   * @return string String in snake_case.
6579
   */
6580 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6581
  {
6582 20
    $str = self::normalize_whitespace($str);
6583 20
    $str = \str_replace('-', '_', $str);
6584
6585 20
    $str = (string)\preg_replace_callback(
6586 20
        '/([\d|A-Z])/u',
6587
        function ($matches) use ($encoding) {
6588 8
          $match = $matches[1];
6589 8
          $matchInt = (int)$match;
6590
6591 8
          if ((string)$matchInt == $match) {
6592 4
            return '_' . $match . '_';
6593
          }
6594
6595 4
          return '_' . UTF8::strtolower($match, $encoding);
6596 20
        },
6597 20
        $str
6598
    );
6599
6600 20
    $str = (string)\preg_replace(
6601
        [
6602 20
            '/\s+/',        // convert spaces to "_"
6603
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6604
            '/_+/',         // remove double "_"
6605
        ],
6606
        [
6607 20
            '_',
6608
            '',
6609
            '_',
6610
        ],
6611 20
        $str
6612
    );
6613
6614 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6615 20
    $str = self::trim($str); // trim leading & trailing whitespace
6616
6617 20
    return $str;
6618
  }
6619
6620
  /**
6621
   * Sort all characters according to code points.
6622
   *
6623
   * @param string $str    <p>A UTF-8 string.</p>
6624
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6625
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6626
   *
6627
   * @return string String of sorted characters.
6628
   */
6629 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6630
  {
6631 2
    $array = self::codepoints($str);
6632
6633 2
    if ($unique) {
6634 2
      $array = \array_flip(\array_flip($array));
6635
    }
6636
6637 2
    if ($desc) {
6638 2
      \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6638
      \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6639
    } else {
6640 2
      \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6640
      \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6641
    }
6642
6643 2
    return self::string($array);
6644
  }
6645
6646
  /**
6647
   * alias for "UTF8::split()"
6648
   *
6649
   * @see UTF8::split()
6650
   *
6651
   * @param string|string[] $str
6652
   * @param int             $len
6653
   *
6654
   * @return string[]
6655
   */
6656 25
  public static function str_split($str, int $len = 1): array
6657
  {
6658 25
    return self::split($str, $len);
6659
  }
6660
6661
  /**
6662
   * Splits the string with the provided regular expression, returning an
6663
   * array of Stringy objects. An optional integer $limit will truncate the
6664
   * results.
6665
   *
6666
   * @param string $str
6667
   * @param string $pattern <p>The regex with which to split the string.</p>
6668
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6669
   *
6670
   * @return string[] An array of strings.
6671
   */
6672 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6673
  {
6674 16
    if ($limit === 0) {
6675 2
      return [];
6676
    }
6677
6678
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6679
    // and current versions of HHVM (3.8 and below)
6680 14
    if ($pattern === '') {
6681 1
      return [$str];
6682
    }
6683
6684
    // this->split returns the remaining unsplit string in the last index when
6685
    // supplying a limit
6686 13
    if ($limit > 0) {
6687 8
      ++$limit;
6688
    } else {
6689 5
      $limit = -1;
6690
    }
6691
6692 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6693
6694 13
    if ($array === false) {
6695
      return [];
6696
    }
6697
6698 13
    if ($limit > 0 && \count($array) === $limit) {
6699 4
      \array_pop($array);
6700
    }
6701
6702 13
    return $array;
6703
  }
6704
6705
  /**
6706
   * Check if the string starts with the given substring.
6707
   *
6708
   * @param string $haystack <p>The string to search in.</p>
6709
   * @param string $needle   <p>The substring to search for.</p>
6710
   *
6711
   * @return bool
6712
   */
6713 41
  public static function str_starts_with(string $haystack, string $needle): bool
6714
  {
6715 41
    if ('' === $haystack || '' === $needle) {
6716 4
      return false;
6717
    }
6718
6719 39
    if (\strpos($haystack, $needle) === 0) {
6720 19
      return true;
6721
    }
6722
6723 24
    return false;
6724
  }
6725
6726
  /**
6727
   * Returns true if the string begins with any of $substrings, false otherwise.
6728
   *
6729
   * - case-sensitive
6730
   *
6731
   * @param string $str        <p>The input string.</p>
6732
   * @param array  $substrings <p>Substrings to look for.</p>
6733
   *
6734
   * @return bool Whether or not $str starts with $substring.
6735
   */
6736 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6737
  {
6738 8
    if ('' === $str) {
6739
      return false;
6740
    }
6741
6742 8
    if (empty($substrings)) {
6743
      return false;
6744
    }
6745
6746 8
    foreach ($substrings as $substring) {
6747 8
      if (self::str_starts_with($str, $substring)) {
6748 8
        return true;
6749
      }
6750
    }
6751
6752 6
    return false;
6753
  }
6754
6755
  /**
6756
   * Gets the substring after the first occurrence of a separator.
6757
   *
6758
   * @param string $str       <p>The input string.</p>
6759
   * @param string $separator <p>The string separator.</p>
6760
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6761
   *
6762
   * @return string
6763
   */
6764 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6765
  {
6766
    if (
6767 1
        $separator === ''
6768
        ||
6769 1
        $str === ''
6770
    ) {
6771 1
      return '';
6772
    }
6773
6774 1
    $offset = self::str_index_first($str, $separator);
6775 1
    if ($offset === false) {
6776 1
      return '';
6777
    }
6778
6779 1
    return (string)self::substr(
6780 1
        $str,
6781 1
        $offset + self::strlen($separator, $encoding),
6782 1
        null,
6783 1
        $encoding
6784
    );
6785
  }
6786
6787
  /**
6788
   * Gets the substring after the last occurrence of a separator.
6789
   *
6790
   * @param string $str       <p>The input string.</p>
6791
   * @param string $separator <p>The string separator.</p>
6792
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6793
   *
6794
   * @return string
6795
   */
6796 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6797
  {
6798
    if (
6799 1
        $separator === ''
6800
        ||
6801 1
        $str === ''
6802
    ) {
6803 1
      return '';
6804
    }
6805
6806 1
    $offset = self::str_index_last($str, $separator);
6807 1
    if ($offset === false) {
6808 1
      return '';
6809
    }
6810
6811 1
    return (string)self::substr(
6812 1
        $str,
6813 1
        $offset + self::strlen($separator, $encoding),
6814 1
        null,
6815 1
        $encoding
6816
    );
6817
  }
6818
6819
  /**
6820
   * Gets the substring before the first occurrence of a separator.
6821
   *
6822
   * @param string $str       <p>The input string.</p>
6823
   * @param string $separator <p>The string separator.</p>
6824
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6825
   *
6826
   * @return string
6827
   */
6828 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6829
  {
6830
    if (
6831 1
        $separator === ''
6832
        ||
6833 1
        $str === ''
6834
    ) {
6835 1
      return '';
6836
    }
6837
6838 1
    $offset = self::str_index_first($str, $separator);
6839 1
    if ($offset === false) {
6840 1
      return '';
6841
    }
6842
6843 1
    return (string)self::substr(
6844 1
        $str,
6845 1
        0,
6846 1
        $offset,
6847 1
        $encoding
6848
    );
6849
  }
6850
6851
  /**
6852
   * Gets the substring before the last occurrence of a separator.
6853
   *
6854
   * @param string $str       <p>The input string.</p>
6855
   * @param string $separator <p>The string separator.</p>
6856
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6857
   *
6858
   * @return string
6859
   */
6860 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6861
  {
6862
    if (
6863 1
        $separator === ''
6864
        ||
6865 1
        $str === ''
6866
    ) {
6867 1
      return '';
6868
    }
6869
6870 1
    $offset = self::str_index_last($str, $separator);
6871 1
    if ($offset === false) {
6872 1
      return '';
6873
    }
6874
6875 1
    return (string)self::substr(
6876 1
        $str,
6877 1
        0,
6878 1
        $offset,
6879 1
        $encoding
6880
    );
6881
  }
6882
6883
  /**
6884
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6885
   *
6886
   * @param string $str          <p>The input string.</p>
6887
   * @param string $needle       <p>The string to look for.</p>
6888
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6889
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6890
   *
6891
   * @return string
6892
   */
6893 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6894
  {
6895
    if (
6896 2
        '' === $str
6897
        ||
6898 2
        '' === $needle
6899
    ) {
6900 2
      return '';
6901
    }
6902
6903 2
    $part = self::strstr(
6904 2
        $str,
6905 2
        $needle,
6906 2
        $beforeNeedle,
6907 2
        $encoding
6908
    );
6909 2
    if (false === $part) {
6910 2
      return '';
6911
    }
6912
6913 2
    return $part;
6914
  }
6915
6916
  /**
6917
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6918
   *
6919
   * @param string $str          <p>The input string.</p>
6920
   * @param string $needle       <p>The string to look for.</p>
6921
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6922
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6923
   *
6924
   * @return string
6925
   */
6926 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6927
  {
6928
    if (
6929 2
        '' === $str
6930
        ||
6931 2
        '' === $needle
6932
    ) {
6933 2
      return '';
6934
    }
6935
6936 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6937 2
    if (false === $part) {
6938 2
      return '';
6939
    }
6940
6941 2
    return $part;
6942
  }
6943
6944
  /**
6945
   * Surrounds $str with the given substring.
6946
   *
6947
   * @param string $str
6948
   * @param string $substring <p>The substring to add to both sides.</P>
6949
   *
6950
   * @return string String with the substring both prepended and appended.
6951
   */
6952 5
  public static function str_surround(string $str, string $substring): string
6953
  {
6954 5
    return \implode('', [$substring, $str, $substring]);
6955
  }
6956
6957
  /**
6958
   * Returns a trimmed string with the first letter of each word capitalized.
6959
   * Also accepts an array, $ignore, allowing you to list words not to be
6960
   * capitalized.
6961
   *
6962
   * @param string              $str
6963
   * @param string[]|array|null $ignore                [optional] <p>An array of words not to capitalize or null.
6964
   *                                                   Default: null</p>
6965
   * @param string              $encoding              [optional] <p>Default: UTF-8</p>
6966
   * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
6967
   * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
6968
   *                                                   tr</p>
6969
   * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
6970
   *                                                   ß</p>
6971
   * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
6972
   *
6973
   * @return string The titleized string.
6974
   */
6975 10
  public static function str_titleize(
6976
      string $str,
6977
      array $ignore = null,
6978
      string $encoding = 'UTF-8',
6979
      bool $cleanUtf8 = false,
6980
      string $lang = null,
6981
      bool $tryToKeepStringLength = false,
6982
      bool $useTrimFirst = true
6983
  ): string
6984
  {
6985 10
    if ($useTrimFirst === true) {
6986 5
      $str = self::trim($str);
6987
    }
6988
6989 10
    $str_array = self::str_to_words($str);
6990
6991 10
    foreach ($str_array as &$str_tmp) {
6992
6993 10
      if ($ignore && \in_array($str_tmp, $ignore, true)) {
6994 2
        continue;
6995
      }
6996
6997 10
      $str_tmp = self::str_upper_first(
6998 10
          self::strtolower(
6999 10
              $str_tmp,
7000 10
              $encoding,
7001 10
              $cleanUtf8,
7002 10
              $lang,
7003 10
              $tryToKeepStringLength
7004
          ),
7005 10
          $encoding,
7006 10
          $cleanUtf8,
7007 10
          $lang,
7008 10
          $tryToKeepStringLength
7009
      );
7010
    }
7011
7012 10
    return \implode('', $str_array);
7013
  }
7014
7015
  /**
7016
   * Returns a trimmed string in proper title case.
7017
   *
7018
   * Also accepts an array, $ignore, allowing you to list words not to be
7019
   * capitalized.
7020
   *
7021
   * Adapted from John Gruber's script.
7022
   *
7023
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7024
   *
7025
   * @param string $str
7026
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
7027
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7028
   *
7029
   * @return string The titleized string.
7030
   */
7031 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7032
  {
7033 35
    $smallWords = \array_merge(
7034
        [
7035 35
            '(?<!q&)a',
7036
            'an',
7037
            'and',
7038
            'as',
7039
            'at(?!&t)',
7040
            'but',
7041
            'by',
7042
            'en',
7043
            'for',
7044
            'if',
7045
            'in',
7046
            'of',
7047
            'on',
7048
            'or',
7049
            'the',
7050
            'to',
7051
            'v[.]?',
7052
            'via',
7053
            'vs[.]?',
7054
        ],
7055 35
        $ignore
7056
    );
7057
7058 35
    $smallWordsRx = \implode('|', $smallWords);
7059 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7060
7061 35
    $str = self::trim($str);
7062
7063 35
    if (self::has_lowercase($str) === false) {
7064 2
      $str = self::strtolower($str);
7065
    }
7066
7067
    // The main substitutions
7068 35
    $str = (string)\preg_replace_callback(
7069
        '~\b (_*) (?:                                                              # 1. Leading underscore and
7070
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7071 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7072
                        |
7073 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7074
                        |
7075 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7076
                        |
7077 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7078
                      ) (_*) \b                                                           # 6. With trailing underscore
7079
                    ~ux',
7080
        function ($matches) use ($encoding) {
7081
          // Preserve leading underscore
7082 35
          $str = $matches[1];
7083 35
          if ($matches[2]) {
7084
            // Preserve URLs, domains, emails and file paths
7085 5
            $str .= $matches[2];
7086 35
          } elseif ($matches[3]) {
7087
            // Lower-case small words
7088 25
            $str .= self::strtolower($matches[3], $encoding);
7089 35
          } elseif ($matches[4]) {
7090
            // Capitalize word w/o internal caps
7091 34
            $str .= static::str_upper_first($matches[4], $encoding);
7092
          } else {
7093
            // Preserve other kinds of word (iPhone)
7094 7
            $str .= $matches[5];
7095
          }
7096
          // Preserve trailing underscore
7097 35
          $str .= $matches[6];
7098
7099 35
          return $str;
7100 35
        },
7101 35
        $str
7102
    );
7103
7104
    // Exceptions for small words: capitalize at start of title...
7105 35
    $str = (string)\preg_replace_callback(
7106
        '~(  \A [[:punct:]]*                # start of title...
7107
                      |  [:.;?!][ ]+               # or of subsentence...
7108
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7109 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7110
                     ~uxi',
7111
        function ($matches) use ($encoding) {
7112 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7113 35
        },
7114 35
        $str
7115
    );
7116
7117
    // ...and end of title
7118 35
    $str = (string)\preg_replace_callback(
7119 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7120
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7121
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7122
                     ~uxi',
7123
        function ($matches) use ($encoding) {
7124 3
          return static::str_upper_first($matches[1], $encoding);
7125 35
        },
7126 35
        $str
7127
    );
7128
7129
    // Exceptions for small words in hyphenated compound words
7130
    // e.g. "in-flight" -> In-Flight
7131 35
    $str = (string)\preg_replace_callback(
7132
        '~\b
7133
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7134 35
                        ( ' . $smallWordsRx . ' )
7135
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7136
                       ~uxi',
7137
        function ($matches) use ($encoding) {
7138
          return static::str_upper_first($matches[1], $encoding);
7139 35
        },
7140 35
        $str
7141
    );
7142
7143
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7144 35
    $str = (string)\preg_replace_callback(
7145
        '~\b
7146
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7147
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7148 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7149
                      (?!	- )                   # Negative lookahead for another -
7150
                     ~uxi',
7151
        function ($matches) use ($encoding) {
7152
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7153 35
        },
7154 35
        $str
7155
    );
7156
7157 35
    return $str;
7158
  }
7159
7160
  /**
7161
   * Get a binary representation of a specific string.
7162
   *
7163
   * @param string $str <p>The input string.</p>
7164
   *
7165
   * @return string
7166
   */
7167 2
  public static function str_to_binary(string $str): string
7168
  {
7169 2
    $value = \unpack('H*', $str);
7170
7171 2
    return \base_convert($value[1], 16, 2);
7172
  }
7173
7174
  /**
7175
   * @param string   $str
7176
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7177
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7178
   *
7179
   * @return string[]
7180
   */
7181 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7182
  {
7183 17
    if ('' === $str) {
7184 1
      return ($removeEmptyValues === true ? [] : ['']);
7185
    }
7186
7187 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7188
7189 16
    if ($return === false) {
7190
      return ($removeEmptyValues === true ? [] : ['']);
7191
    }
7192
7193
    if (
7194 16
        $removeShortValues === null
7195
        &&
7196 16
        $removeEmptyValues === false
7197
    ) {
7198 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7199
    }
7200
7201
    $tmpReturn = self::reduce_string_array(
7202
        $return,
7203
        $removeEmptyValues,
7204
        $removeShortValues
7205
    );
7206
7207
    return $tmpReturn;
7208
  }
7209
7210
  /**
7211
   * Convert a string into an array of words.
7212
   *
7213
   * @param string   $str
7214
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7215
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7216
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7217
   *
7218
   * @return string[]
7219
   */
7220 24
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7221
  {
7222 24
    if ('' === $str) {
7223 4
      return ($removeEmptyValues === true ? [] : ['']);
7224
    }
7225
7226 24
    $charList = self::rxClass($charList, '\pL');
7227
7228 24
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7229
7230 24
    if ($return === false) {
7231
      return ($removeEmptyValues === true ? [] : ['']);
7232
    }
7233
7234
    if (
7235 24
        $removeShortValues === null
7236
        &&
7237 24
        $removeEmptyValues === false
7238
    ) {
7239 24
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7240
    }
7241
7242 2
    $tmpReturn = self::reduce_string_array(
7243 2
        $return,
7244 2
        $removeEmptyValues,
7245 2
        $removeShortValues
7246
    );
7247
7248 2
    foreach ($tmpReturn as &$item) {
7249 2
      $item = (string)$item;
7250
    }
7251
7252 2
    return $tmpReturn;
7253
  }
7254
7255
  /**
7256
   * alias for "UTF8::to_ascii()"
7257
   *
7258
   * @see UTF8::to_ascii()
7259
   *
7260
   * @param string $str
7261
   * @param string $unknown
7262
   * @param bool   $strict
7263
   *
7264
   * @return string
7265
   */
7266 8
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7267
  {
7268 8
    return self::to_ascii($str, $unknown, $strict);
7269
  }
7270
7271
  /**
7272
   * Truncates the string to a given length. If $substring is provided, and
7273
   * truncating occurs, the string is further truncated so that the substring
7274
   * may be appended without exceeding the desired length.
7275
   *
7276
   * @param string $str
7277
   * @param int    $length    <p>Desired length of the truncated string.</p>
7278
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7279
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7280
   *
7281
   * @return string String after truncating.
7282
   */
7283 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7284
  {
7285
    // init
7286 22
    $str = (string)$str;
7287
7288 22
    if ('' === $str) {
7289
      return '';
7290
    }
7291
7292 22
    if ($length >= self::strlen($str, $encoding)) {
7293 4
      return $str;
7294
    }
7295
7296
    // Need to further trim the string so we can append the substring
7297 18
    $substringLength = self::strlen($substring, $encoding);
7298 18
    $length -= $substringLength;
7299
7300 18
    $truncated = self::substr($str, 0, $length, $encoding);
7301
7302 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type string|false can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7302
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7303
  }
7304
7305
  /**
7306
   * Truncates the string to a given length, while ensuring that it does not
7307
   * split words. If $substring is provided, and truncating occurs, the
7308
   * string is further truncated so that the substring may be appended without
7309
   * exceeding the desired length.
7310
   *
7311
   * @param string $str
7312
   * @param int    $length    <p>Desired length of the truncated string.</p>
7313
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7314
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7315
   *
7316
   * @return string String after truncating.
7317
   */
7318 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7319
  {
7320 23
    if ($length >= self::strlen($str, $encoding)) {
7321 4
      return $str;
7322
    }
7323
7324
    // need to further trim the string so we can append the substring
7325 19
    $substringLength = self::strlen($substring, $encoding);
7326 19
    $length -= $substringLength;
7327
7328 19
    $truncated = self::substr($str, 0, $length, $encoding);
7329 19
    if ($truncated === false) {
7330
      return '';
7331
    }
7332
7333
    // if the last word was truncated
7334 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7335 19
    if ($strPosSpace != $length) {
7336
      // find pos of the last occurrence of a space, get up to that
7337 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7338
7339 12
      if ($lastPos !== false || $strPosSpace !== false) {
7340 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7341
      }
7342
    }
7343
7344 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type string|false can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7344
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7345
7346 19
    return $str;
7347
  }
7348
7349
  /**
7350
   * Returns a lowercase and trimmed string separated by underscores.
7351
   * Underscores are inserted before uppercase characters (with the exception
7352
   * of the first character of the string), and in place of spaces as well as
7353
   * dashes.
7354
   *
7355
   * @param string $str
7356
   *
7357
   * @return string The underscored string.
7358
   */
7359 16
  public static function str_underscored(string $str): string
7360
  {
7361 16
    return self::str_delimit($str, '_');
7362
  }
7363
7364
  /**
7365
   * Returns an UpperCamelCase version of the supplied string. It trims
7366
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7367
   * and underscores, and removes spaces, dashes, underscores.
7368
   *
7369
   * @param string      $str                   <p>The input string.</p>
7370
   * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7371
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7372
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7373
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7374
   *
7375
   * @return string String in UpperCamelCase.
7376
   */
7377 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7378
  {
7379 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7380
  }
7381
7382
  /**
7383
   * alias for "UTF8::ucfirst()"
7384
   *
7385
   * @see UTF8::ucfirst()
7386
   *
7387
   * @param string      $str
7388
   * @param string      $encoding
7389
   * @param bool        $cleanUtf8
7390
   * @param string|null $lang
7391
   * @param bool        $tryToKeepStringLength
7392
   *
7393
   *
7394
   * @return string
7395
   */
7396 63
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7397
  {
7398 63
    return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7399
  }
7400
7401
  /**
7402
   * Counts number of words in the UTF-8 string.
7403
   *
7404
   * @param string $str      <p>The input string.</p>
7405
   * @param int    $format   [optional] <p>
7406
   *                         <strong>0</strong> => return a number of words (default)<br>
7407
   *                         <strong>1</strong> => return an array of words<br>
7408
   *                         <strong>2</strong> => return an array of words with word-offset as key
7409
   *                         </p>
7410
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7411
   *
7412
   * @return string[]|int The number of words in the string
7413
   */
7414 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7415
  {
7416 2
    $strParts = self::str_to_words($str, $charlist);
7417
7418 2
    $len = \count($strParts);
7419
7420 2
    if ($format === 1) {
7421
7422 2
      $numberOfWords = [];
7423 2
      for ($i = 1; $i < $len; $i += 2) {
7424 2
        $numberOfWords[] = $strParts[$i];
7425
      }
7426
7427 2
    } elseif ($format === 2) {
7428
7429 2
      $numberOfWords = [];
7430 2
      $offset = self::strlen($strParts[0]);
7431 2
      for ($i = 1; $i < $len; $i += 2) {
7432 2
        $numberOfWords[$offset] = $strParts[$i];
7433 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7434
      }
7435
7436
    } else {
7437
7438 2
      $numberOfWords = (int)(($len - 1) / 2);
7439
7440
    }
7441
7442 2
    return $numberOfWords;
7443
  }
7444
7445
  /**
7446
   * Case-insensitive string comparison.
7447
   *
7448
   * INFO: Case-insensitive version of UTF8::strcmp()
7449
   *
7450
   * @param string $str1     <p>The first string.</p>
7451
   * @param string $str2     <p>The second string.</p>
7452
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7453
   *
7454
   * @return int
7455
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7456
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7457
   *             <strong>0</strong> if they are equal.
7458
   */
7459 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7460
  {
7461 23
    return self::strcmp(
7462 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7463 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7464
    );
7465
  }
7466
7467
  /**
7468
   * alias for "UTF8::strstr()"
7469
   *
7470
   * @see UTF8::strstr()
7471
   *
7472
   * @param string $haystack
7473
   * @param string $needle
7474
   * @param bool   $before_needle
7475
   * @param string $encoding
7476
   * @param bool   $cleanUtf8
7477
   *
7478
   * @return string|false
7479
   */
7480 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7481
  {
7482 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7483
  }
7484
7485
  /**
7486
   * Case-sensitive string comparison.
7487
   *
7488
   * @param string $str1 <p>The first string.</p>
7489
   * @param string $str2 <p>The second string.</p>
7490
   *
7491
   * @return int
7492
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7493
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7494
   *              <strong>0</strong> if they are equal.
7495
   */
7496 29
  public static function strcmp(string $str1, string $str2): int
7497
  {
7498
    /** @noinspection PhpUndefinedClassInspection */
7499 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7500 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7501 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7502
    );
7503
  }
7504
7505
  /**
7506
   * Find length of initial segment not matching mask.
7507
   *
7508
   * @param string $str
7509
   * @param string $charList
7510
   * @param int    $offset
7511
   * @param int    $length
7512
   *
7513
   * @return int|null
7514
   */
7515 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7516
  {
7517 15
    if ('' === $charList .= '') {
7518 1
      return null;
7519
    }
7520
7521 14
    if ($offset || $length !== null) {
7522 2
      $strTmp = self::substr($str, $offset, $length);
7523 2
      if ($strTmp === false) {
7524
        return null;
7525
      }
7526 2
      $str = (string)$strTmp;
7527
    }
7528
7529 14
    if ('' === $str) {
7530 1
      return null;
7531
    }
7532
7533 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept array|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7533
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7534 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7535
    }
7536
7537 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7538
  }
7539
7540
  /**
7541
   * alias for "UTF8::stristr()"
7542
   *
7543
   * @see UTF8::stristr()
7544
   *
7545
   * @param string $haystack
7546
   * @param string $needle
7547
   * @param bool   $before_needle
7548
   * @param string $encoding
7549
   * @param bool   $cleanUtf8
7550
   *
7551
   * @return string|false
7552
   */
7553 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7554
  {
7555 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7556
  }
7557
7558
  /**
7559
   * Create a UTF-8 string from code points.
7560
   *
7561
   * INFO: opposite to UTF8::codepoints()
7562
   *
7563
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7564
   *
7565
   * @return string UTF-8 encoded string.
7566
   */
7567 4
  public static function string(array $array): string
7568
  {
7569 4
    return \implode(
7570 4
        '',
7571 4
        \array_map(
7572
            [
7573 4
                self::class,
7574
                'chr',
7575
            ],
7576 4
            $array
7577
        )
7578
    );
7579
  }
7580
7581
  /**
7582
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7583
   *
7584
   * @param string $str <p>The input string.</p>
7585
   *
7586
   * @return bool
7587
   *              <strong>true</strong> if the string has BOM at the start,<br>
7588
   *              <strong>false</strong> otherwise.
7589
   */
7590 6
  public static function string_has_bom(string $str): bool
7591
  {
7592 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7593 6
      if (0 === \strpos($str, $bomString)) {
7594 6
        return true;
7595
      }
7596
    }
7597
7598 6
    return false;
7599
  }
7600
7601
  /**
7602
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7603
   *
7604
   * @link http://php.net/manual/en/function.strip-tags.php
7605
   *
7606
   * @param string $str             <p>
7607
   *                                The input string.
7608
   *                                </p>
7609
   * @param string $allowable_tags  [optional] <p>
7610
   *                                You can use the optional second parameter to specify tags which should
7611
   *                                not be stripped.
7612
   *                                </p>
7613
   *                                <p>
7614
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7615
   *                                can not be changed with allowable_tags.
7616
   *                                </p>
7617
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7618
   *
7619
   * @return string The stripped string.
7620
   */
7621 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7622
  {
7623 4
    if ('' === $str) {
7624 1
      return '';
7625
    }
7626
7627 4
    if ($cleanUtf8 === true) {
7628 2
      $str = self::clean($str);
7629
    }
7630
7631 4
    return \strip_tags($str, $allowable_tags);
7632
  }
7633
7634
  /**
7635
   * Strip all whitespace characters. This includes tabs and newline
7636
   * characters, as well as multibyte whitespace such as the thin space
7637
   * and ideographic space.
7638
   *
7639
   * @param string $str
7640
   *
7641
   * @return string
7642
   */
7643 36
  public static function strip_whitespace(string $str): string
7644
  {
7645 36
    if ('' === $str) {
7646 3
      return '';
7647
    }
7648
7649 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7650
  }
7651
7652
  /**
7653
   * Finds position of first occurrence of a string within another, case insensitive.
7654
   *
7655
   * @link http://php.net/manual/en/function.mb-stripos.php
7656
   *
7657
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7658
   * @param string $needle    <p>The string to find in haystack.</p>
7659
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7660
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7661
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7662
   *
7663
   * @return int|false
7664
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7665
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7666
   */
7667 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7668
  {
7669 75
    if ('' === $haystack || '' === $needle) {
7670 5
      return false;
7671
    }
7672
7673 74
    if ($cleanUtf8 === true) {
7674
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7675
      // if invalid characters are found in $haystack before $needle
7676 1
      $haystack = self::clean($haystack);
7677 1
      $needle = self::clean($needle);
7678
    }
7679
7680 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7681 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7682
    }
7683
7684 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7685
      self::checkForSupport();
7686
    }
7687
7688 74
    if (self::$SUPPORT['mbstring'] === true) {
7689 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7690 74
      if ($returnTmp !== false) {
7691 54
        return $returnTmp;
7692
      }
7693
    }
7694
7695
    if (
7696 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7697
        &&
7698 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7699
        &&
7700 31
        self::$SUPPORT['intl'] === true
7701
    ) {
7702 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7703 31
      if ($returnTmp !== false) {
7704
        return $returnTmp;
7705
      }
7706
    }
7707
7708
    //
7709
    // fallback for ascii only
7710
    //
7711
7712 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7713 15
      return \stripos($haystack, $needle, $offset);
7714
    }
7715
7716
    //
7717
    // fallback via vanilla php
7718
    //
7719
7720 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7721 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7722
7723 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7724
  }
7725
7726
  /**
7727
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7728
   *
7729
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7730
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7731
   * @param bool   $before_needle  [optional] <p>
7732
   *                               If <b>TRUE</b>, it returns the part of the
7733
   *                               haystack before the first occurrence of the needle (excluding the needle).
7734
   *                               </p>
7735
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7736
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7737
   *
7738
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7739
   */
7740 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7741
  {
7742 19
    if ('' === $haystack || '' === $needle) {
7743 6
      return false;
7744
    }
7745
7746 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7747 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7748
    }
7749
7750 13
    if ($cleanUtf8 === true) {
7751
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7752
      // if invalid characters are found in $haystack before $needle
7753 1
      $needle = self::clean($needle);
7754 1
      $haystack = self::clean($haystack);
7755
    }
7756
7757 13
    if (!$needle) {
7758
      return $haystack;
7759
    }
7760
7761 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7762
      self::checkForSupport();
7763
    }
7764
7765
    if (
7766 13
        $encoding !== 'UTF-8'
7767
        &&
7768 13
        self::$SUPPORT['mbstring'] === false
7769
    ) {
7770
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7771
    }
7772
7773 13
    if (self::$SUPPORT['mbstring'] === true) {
7774 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7775
    }
7776
7777
    if (
7778
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7779
        &&
7780
        self::$SUPPORT['intl'] === true
7781
    ) {
7782
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7783
      if ($returnTmp !== false) {
7784
        return $returnTmp;
7785
      }
7786
    }
7787
7788
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7789
      return \stristr($haystack, $needle, $before_needle);
7790
    }
7791
7792
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7793
7794
    if (!isset($match[1])) {
7795
      return false;
7796
    }
7797
7798
    if ($before_needle) {
7799
      return $match[1];
7800
    }
7801
7802
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7802
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7803
  }
7804
7805
  /**
7806
   * Get the string length, not the byte-length!
7807
   *
7808
   * @link     http://php.net/manual/en/function.mb-strlen.php
7809
   *
7810
   * @param string $str       <p>The string being checked for length.</p>
7811
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7812
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7813
   *
7814
   * @return int|false
7815
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7816
   *             (One multi-byte character counted as +1).
7817
   *             <br>
7818
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7819
   */
7820 263
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7821
  {
7822 263
    if ('' === $str) {
7823 37
      return 0;
7824
    }
7825
7826 261
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7827 83
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7828
    }
7829
7830
    //
7831
    // fallback for binary || ascii only
7832
    //
7833
7834
    if (
7835 261
        $encoding === 'CP850'
7836
        ||
7837 261
        $encoding === 'ASCII'
7838
    ) {
7839 2
      return self::strlen_in_byte($str);
7840
    }
7841
7842 261
    if ($cleanUtf8 === true) {
7843
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7844
      // if invalid characters are found in $str
7845 4
      $str = self::clean($str);
7846
    }
7847
7848 261
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7849
      self::checkForSupport();
7850
    }
7851
7852
    if (
7853 261
        $encoding !== 'UTF-8'
7854
        &&
7855 261
        self::$SUPPORT['mbstring'] === false
7856
        &&
7857 261
        self::$SUPPORT['iconv'] === false
7858
    ) {
7859 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7860
    }
7861
7862
    //
7863
    // fallback via mbstring
7864
    //
7865
7866 261
    if (self::$SUPPORT['mbstring'] === true) {
7867 257
      $returnTmp = \mb_strlen($str, $encoding);
7868 257
      if ($returnTmp !== false) {
7869 257
        return $returnTmp;
7870
      }
7871
    }
7872
7873
    //
7874
    // fallback via iconv
7875
    //
7876
7877 8
    if (self::$SUPPORT['iconv'] === true) {
7878
      $returnTmp = \iconv_strlen($str, $encoding);
7879
      if ($returnTmp !== false) {
7880
        return $returnTmp;
7881
      }
7882
    }
7883
7884
    //
7885
    // fallback via intl
7886
    //
7887
7888
    if (
7889 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7890
        &&
7891 8
        self::$SUPPORT['intl'] === true
7892
    ) {
7893
      $returnTmp = \grapheme_strlen($str);
7894
      if ($returnTmp !== null) {
7895
        return $returnTmp;
7896
      }
7897
    }
7898
7899
    //
7900
    // fallback for ascii only
7901
    //
7902
7903 8
    if (self::is_ascii($str)) {
7904 4
      return \strlen($str);
7905
    }
7906
7907
    //
7908
    // fallback via vanilla php
7909
    //
7910
7911 8
    \preg_match_all('/./us', $str, $parts);
7912
7913 8
    $returnTmp = \count($parts[0]);
7914 8
    if ($returnTmp === 0 && isset($str[0])) {
7915
      return false;
7916
    }
7917
7918 8
    return $returnTmp;
7919
  }
7920
7921
  /**
7922
   * Get string length in byte.
7923
   *
7924
   * @param string $str
7925
   *
7926
   * @return int
7927
   */
7928 195
  public static function strlen_in_byte(string $str): int
7929
  {
7930 195
    if ($str === '') {
7931
      return 0;
7932
    }
7933
7934 195
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7935
      self::checkForSupport();
7936
    }
7937
7938 195
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7939
      // "mb_" is available if overload is used, so use it ...
7940
      return \mb_strlen($str, 'CP850'); // 8-BIT
7941
    }
7942
7943 195
    return \strlen($str);
7944
  }
7945
7946
  /**
7947
   * Case insensitive string comparisons using a "natural order" algorithm.
7948
   *
7949
   * INFO: natural order version of UTF8::strcasecmp()
7950
   *
7951
   * @param string $str1     <p>The first string.</p>
7952
   * @param string $str2     <p>The second string.</p>
7953
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7954
   *
7955
   * @return int
7956
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7957
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7958
   *             <strong>0</strong> if they are equal
7959
   */
7960 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7961
  {
7962 2
    return self::strnatcmp(
7963 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7964 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7965
    );
7966
  }
7967
7968
  /**
7969
   * String comparisons using a "natural order" algorithm
7970
   *
7971
   * INFO: natural order version of UTF8::strcmp()
7972
   *
7973
   * @link  http://php.net/manual/en/function.strnatcmp.php
7974
   *
7975
   * @param string $str1 <p>The first string.</p>
7976
   * @param string $str2 <p>The second string.</p>
7977
   *
7978
   * @return int
7979
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7980
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7981
   *             <strong>0</strong> if they are equal
7982
   */
7983 4
  public static function strnatcmp(string $str1, string $str2): int
7984
  {
7985 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7986
  }
7987
7988
  /**
7989
   * Case-insensitive string comparison of the first n characters.
7990
   *
7991
   * @link  http://php.net/manual/en/function.strncasecmp.php
7992
   *
7993
   * @param string $str1     <p>The first string.</p>
7994
   * @param string $str2     <p>The second string.</p>
7995
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7996
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7997
   *
7998
   * @return int
7999
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8000
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8001
   *             <strong>0</strong> if they are equal
8002
   */
8003 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
8004
  {
8005 2
    return self::strncmp(
8006 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
8007 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
8008 2
        $len
8009
    );
8010
  }
8011
8012
  /**
8013
   * String comparison of the first n characters.
8014
   *
8015
   * @link  http://php.net/manual/en/function.strncmp.php
8016
   *
8017
   * @param string $str1 <p>The first string.</p>
8018
   * @param string $str2 <p>The second string.</p>
8019
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
8020
   *
8021
   * @return int
8022
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8023
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8024
   *             <strong>0</strong> if they are equal
8025
   */
8026 4
  public static function strncmp(string $str1, string $str2, int $len): int
8027
  {
8028 4
    $str1 = (string)self::substr($str1, 0, $len);
8029 4
    $str2 = (string)self::substr($str2, 0, $len);
8030
8031 4
    return self::strcmp($str1, $str2);
8032
  }
8033
8034
  /**
8035
   * Search a string for any of a set of characters.
8036
   *
8037
   * @link  http://php.net/manual/en/function.strpbrk.php
8038
   *
8039
   * @param string $haystack  <p>The string where char_list is looked for.</p>
8040
   * @param string $char_list <p>This parameter is case sensitive.</p>
8041
   *
8042
   * @return string|false String starting from the character found, or false if it is not found.
8043
   */
8044 2
  public static function strpbrk(string $haystack, string $char_list)
8045
  {
8046 2
    if ('' === $haystack || '' === $char_list) {
8047 2
      return false;
8048
    }
8049
8050 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8051 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
8052
    }
8053
8054 2
    return false;
8055
  }
8056
8057
  /**
8058
   * Find position of first occurrence of string in a string.
8059
   *
8060
   * @link http://php.net/manual/en/function.mb-strpos.php
8061
   *
8062
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8063
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8064
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8065
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8066
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8067
   *
8068
   * @return int|false
8069
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8070
   *                   string.<br> If needle is not found it returns false.
8071
   */
8072 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
8073
  {
8074 142
    if ('' === $haystack) {
8075 4
      return false;
8076
    }
8077
8078
    // iconv and mbstring do not support integer $needle
8079 141
    if ((int)$needle === $needle && $needle >= 0) {
8080
      $needle = (string)self::chr($needle);
8081
    }
8082 141
    $needle = (string)$needle;
8083
8084 141
    if ('' === $needle) {
8085 2
      return false;
8086
    }
8087
8088 141
    if ($cleanUtf8 === true) {
8089
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8090
      // if invalid characters are found in $haystack before $needle
8091 3
      $needle = self::clean($needle);
8092 3
      $haystack = self::clean($haystack);
8093
    }
8094
8095 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8096 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8097
    }
8098
8099 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8100
      self::checkForSupport();
8101
    }
8102
8103
    //
8104
    // fallback for binary || ascii only
8105
    //
8106
8107
    if (
8108 141
        $encoding === 'CP850'
8109
        ||
8110 141
        $encoding === 'ASCII'
8111
    ) {
8112 2
      return self::strpos_in_byte($haystack, $needle, $offset);
8113
    }
8114
8115
    if (
8116 141
        $encoding !== 'UTF-8'
8117
        &&
8118 141
        self::$SUPPORT['iconv'] === false
8119
        &&
8120 141
        self::$SUPPORT['mbstring'] === false
8121
    ) {
8122 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8123
    }
8124
8125
    //
8126
    // fallback via mbstring
8127
    //
8128
8129 141
    if (self::$SUPPORT['mbstring'] === true) {
8130 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8131 141
      if ($returnTmp !== false) {
8132 86
        return $returnTmp;
8133
      }
8134
    }
8135
8136
    //
8137
    // fallback via intl
8138
    //
8139
8140
    if (
8141 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8142
        &&
8143 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8144
        &&
8145 69
        self::$SUPPORT['intl'] === true
8146
    ) {
8147 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8148 69
      if ($returnTmp !== false) {
8149
        return $returnTmp;
8150
      }
8151
    }
8152
8153
    //
8154
    // fallback via iconv
8155
    //
8156
8157
    if (
8158 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8159
        &&
8160 69
        self::$SUPPORT['iconv'] === true
8161
    ) {
8162
      // ignore invalid negative offset to keep compatibility
8163
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8164 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8165 69
      if ($returnTmp !== false) {
8166
        return $returnTmp;
8167
      }
8168
    }
8169
8170
    //
8171
    // fallback for ascii only
8172
    //
8173
8174 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8175 35
      return \strpos($haystack, $needle, $offset);
8176
    }
8177
8178
    //
8179
    // fallback via vanilla php
8180
    //
8181
8182 39
    if ($haystackIsAscii) {
8183
      $haystackTmp = \substr($haystack, $offset);
8184
    } else {
8185 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8186
    }
8187 39
    if ($haystackTmp === false) {
8188
      $haystackTmp = '';
8189
    }
8190 39
    $haystack = (string)$haystackTmp;
8191
8192 39
    if ($offset < 0) {
8193 2
      $offset = 0;
8194
    }
8195
8196 39
    $pos = \strpos($haystack, $needle);
8197 39
    if ($pos === false) {
8198 39
      return false;
8199
    }
8200
8201 4
    if ($pos) {
8202 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8203
    }
8204
8205 2
    return ($offset + 0);
8206
  }
8207
8208
  /**
8209
   * Find position of first occurrence of string in a string.
8210
   *
8211
   * @param string $haystack <p>
8212
   *                         The string being checked.
8213
   *                         </p>
8214
   * @param string $needle   <p>
8215
   *                         The position counted from the beginning of haystack.
8216
   *                         </p>
8217
   * @param int    $offset   [optional] <p>
8218
   *                         The search offset. If it is not specified, 0 is used.
8219
   *                         </p>
8220
   *
8221
   * @return int|false The numeric position of the first occurrence of needle in the
8222
   *                   haystack string. If needle is not found, it returns false.
8223
   */
8224 83
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8225
  {
8226 83
    if ($haystack === '' || $needle === '') {
8227
      return false;
8228
    }
8229
8230 83
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8231
      self::checkForSupport();
8232
    }
8233
8234 83
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8235
      // "mb_" is available if overload is used, so use it ...
8236
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8237
    }
8238
8239 83
    return \strpos($haystack, $needle, $offset);
8240
  }
8241
8242
  /**
8243
   * Finds the last occurrence of a character in a string within another.
8244
   *
8245
   * @link http://php.net/manual/en/function.mb-strrchr.php
8246
   *
8247
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8248
   * @param string $needle        <p>The string to find in haystack</p>
8249
   * @param bool   $before_needle [optional] <p>
8250
   *                              Determines which portion of haystack
8251
   *                              this function returns.
8252
   *                              If set to true, it returns all of haystack
8253
   *                              from the beginning to the last occurrence of needle.
8254
   *                              If set to false, it returns all of haystack
8255
   *                              from the last occurrence of needle to the end,
8256
   *                              </p>
8257
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8258
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8259
   *
8260
   * @return string|false The portion of haystack or false if needle is not found.
8261
   */
8262 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8263
  {
8264 4
    if ('' === $haystack || '' === $needle) {
8265 2
      return false;
8266
    }
8267
8268 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8269 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8270
    }
8271
8272 4
    if ($cleanUtf8 === true) {
8273
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8274
      // if invalid characters are found in $haystack before $needle
8275 2
      $needle = self::clean($needle);
8276 2
      $haystack = self::clean($haystack);
8277
    }
8278
8279 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8280
      self::checkForSupport();
8281
    }
8282
8283
    if (
8284 4
        $encoding !== 'UTF-8'
8285
        &&
8286 4
        self::$SUPPORT['mbstring'] === false
8287
    ) {
8288
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8289
    }
8290
8291 4
    if (self::$SUPPORT['mbstring'] === true) {
8292 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8293
    }
8294
8295
    //
8296
    // fallback for binary || ascii only
8297
    //
8298
8299
    if (
8300
        $before_needle === false
8301
        &&
8302
        (
8303
            $encoding === 'CP850'
8304
            ||
8305
            $encoding === 'ASCII'
8306
        )
8307
    ) {
8308
      return \strrchr($haystack, $needle);
8309
    }
8310
8311
    //
8312
    // fallback via iconv
8313
    //
8314
8315
    if (self::$SUPPORT['iconv'] === true) {
8316
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8317
      if ($needleTmp === false) {
8318
        return false;
8319
      }
8320
      $needle = (string)$needleTmp;
8321
8322
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8323
      if (false === $pos) {
8324
        return false;
8325
      }
8326
8327
      if ($before_needle) {
8328
        return self::substr($haystack, 0, $pos, $encoding);
8329
      }
8330
8331
      return self::substr($haystack, $pos, null, $encoding);
8332
    }
8333
8334
    //
8335
    // fallback via vanilla php
8336
    //
8337
8338
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8339
    if ($needleTmp === false) {
8340
      return false;
8341
    }
8342
    $needle = (string)$needleTmp;
8343
8344
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8345
    if ($pos === false) {
8346
      return false;
8347
    }
8348
8349
    if ($before_needle) {
8350
      return self::substr($haystack, 0, $pos, $encoding);
8351
    }
8352
8353
    return self::substr($haystack, $pos, null, $encoding);
8354
  }
8355
8356
  /**
8357
   * Reverses characters order in the string.
8358
   *
8359
   * @param string $str <p>The input string.</p>
8360
   *
8361
   * @return string The string with characters in the reverse sequence.
8362
   */
8363 10
  public static function strrev(string $str): string
8364
  {
8365 10
    if ('' === $str) {
8366 4
      return '';
8367
    }
8368
8369 8
    $reversed = '';
8370 8
    $i = self::strlen($str);
8371 8
    while ($i--) {
8372 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8372
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8373
    }
8374
8375 8
    return $reversed;
8376
  }
8377
8378
  /**
8379
   * Finds the last occurrence of a character in a string within another, case insensitive.
8380
   *
8381
   * @link http://php.net/manual/en/function.mb-strrichr.php
8382
   *
8383
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8384
   * @param string $needle         <p>The string to find in haystack.</p>
8385
   * @param bool   $before_needle  [optional] <p>
8386
   *                               Determines which portion of haystack
8387
   *                               this function returns.
8388
   *                               If set to true, it returns all of haystack
8389
   *                               from the beginning to the last occurrence of needle.
8390
   *                               If set to false, it returns all of haystack
8391
   *                               from the last occurrence of needle to the end,
8392
   *                               </p>
8393
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8394
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8395
   *
8396
   * @return string|false The portion of haystack or<br>false if needle is not found.
8397
   */
8398 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8399
  {
8400 3
    if ('' === $haystack || '' === $needle) {
8401 2
      return false;
8402
    }
8403
8404 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8405 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8406
    }
8407
8408 3
    if ($cleanUtf8 === true) {
8409
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8410
      // if invalid characters are found in $haystack before $needle
8411 2
      $needle = self::clean($needle);
8412 2
      $haystack = self::clean($haystack);
8413
    }
8414
8415 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8416
      self::checkForSupport();
8417
    }
8418
8419
    //
8420
    // fallback via mbstring
8421
    //
8422
8423 3
    if (self::$SUPPORT['mbstring'] === true) {
8424 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8425
    }
8426
8427
    //
8428
    // fallback via vanilla php
8429
    //
8430
8431
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8432
    if ($needleTmp === false) {
8433
      return false;
8434
    }
8435
    $needle = (string)$needleTmp;
8436
8437
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8438
    if ($pos === false) {
8439
      return false;
8440
    }
8441
8442
    if ($before_needle) {
8443
      return self::substr($haystack, 0, $pos, $encoding);
8444
    }
8445
8446
    return self::substr($haystack, $pos, null, $encoding);
8447
  }
8448
8449
  /**
8450
   * Find position of last occurrence of a case-insensitive string.
8451
   *
8452
   * @param string     $haystack  <p>The string to look in.</p>
8453
   * @param string|int $needle    <p>The string to look for.</p>
8454
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8455
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8456
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8457
   *
8458
   * @return int|false
8459
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8460
   *                   string.<br>If needle is not found, it returns false.
8461
   */
8462 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8463
  {
8464 4
    if ('' === $haystack) {
8465
      return false;
8466
    }
8467
8468
    // iconv and mbstring do not support integer $needle
8469 4
    if ((int)$needle === $needle && $needle >= 0) {
8470
      $needle = (string)self::chr($needle);
8471
    }
8472 4
    $needle = (string)$needle;
8473
8474 4
    if ('' === $needle) {
8475
      return false;
8476
    }
8477
8478 4
    if ($cleanUtf8 === true) {
8479
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8480 2
      $needle = self::clean($needle);
8481 2
      $haystack = self::clean($haystack);
8482
    }
8483
8484 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8485 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8486
    }
8487
8488
    //
8489
    // fallback for binary || ascii only
8490
    //
8491
8492
    if (
8493 4
        $encoding === 'CP850'
8494
        ||
8495 4
        $encoding === 'ASCII'
8496
    ) {
8497
      return self::strripos_in_byte($haystack, $needle, $offset);
8498
    }
8499
8500 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8501
      self::checkForSupport();
8502
    }
8503
8504
    if (
8505 4
        $encoding !== 'UTF-8'
8506
        &&
8507 4
        self::$SUPPORT['mbstring'] === false
8508
    ) {
8509
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8510
    }
8511
8512
    //
8513
    // fallback via mbstrig
8514
    //
8515
8516 4
    if (self::$SUPPORT['mbstring'] === true) {
8517 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8518
    }
8519
8520
    //
8521
    // fallback via intl
8522
    //
8523
8524
    if (
8525
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8526
        &&
8527
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8528
        &&
8529
        self::$SUPPORT['intl'] === true
8530
    ) {
8531
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8532
      if ($returnTmp !== false) {
8533
        return $returnTmp;
8534
      }
8535
    }
8536
8537
    //
8538
    // fallback for ascii only
8539
    //
8540
8541
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8542
      return self::strripos_in_byte($haystack, $needle, $offset);
8543
    }
8544
8545
    //
8546
    // fallback via vanilla php
8547
    //
8548
8549
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8550
    $needle = self::strtocasefold($needle, true, false, $encoding);
8551
8552
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8553
  }
8554
8555
  /**
8556
   * Finds position of last occurrence of a string within another, case insensitive.
8557
   *
8558
   * @param string $haystack <p>
8559
   *                         The string from which to get the position of the last occurrence
8560
   *                         of needle.
8561
   *                         </p>
8562
   * @param string $needle   <p>
8563
   *                         The string to find in haystack.
8564
   *                         </p>
8565
   * @param int    $offset   [optional] <p>
8566
   *                         The position in haystack
8567
   *                         to start searching.
8568
   *                         </p>
8569
   *
8570
   * @return int|false Return the numeric position of the last occurrence of needle in the
8571
   *                   haystack string, or false if needle is not found.
8572
   */
8573
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8574
  {
8575
    if ($haystack === '' || $needle === '') {
8576
      return false;
8577
    }
8578
8579
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8580
      self::checkForSupport();
8581
    }
8582
8583
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8584
      // "mb_" is available if overload is used, so use it ...
8585
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8586
    }
8587
8588
    return \strripos($haystack, $needle, $offset);
8589
  }
8590
8591
  /**
8592
   * Find position of last occurrence of a string in a string.
8593
   *
8594
   * @link http://php.net/manual/en/function.mb-strrpos.php
8595
   *
8596
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8597
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8598
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8599
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8600
   *                              the end of the string.
8601
   *                              </p>
8602
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8603
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8604
   *
8605
   * @return int|false
8606
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8607
   *                   string.<br>If needle is not found, it returns false.
8608
   */
8609 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8610
  {
8611 38
    if ('' === $haystack) {
8612 3
      return false;
8613
    }
8614
8615
    // iconv and mbstring do not support integer $needle
8616 37
    if ((int)$needle === $needle && $needle >= 0) {
8617 2
      $needle = (string)self::chr($needle);
8618
    }
8619 37
    $needle = (string)$needle;
8620
8621 37
    if ('' === $needle) {
8622 2
      return false;
8623
    }
8624
8625 37
    if ($cleanUtf8 === true) {
8626
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8627 4
      $needle = self::clean($needle);
8628 4
      $haystack = self::clean($haystack);
8629
    }
8630
8631 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8632 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8633
    }
8634
8635
    //
8636
    // fallback for binary || ascii only
8637
    //
8638
8639
    if (
8640 37
        $encoding === 'CP850'
8641
        ||
8642 37
        $encoding === 'ASCII'
8643
    ) {
8644 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8644
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8645
    }
8646
8647 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8648
      self::checkForSupport();
8649
    }
8650
8651
    if (
8652 37
        $encoding !== 'UTF-8'
8653
        &&
8654 37
        self::$SUPPORT['mbstring'] === false
8655
    ) {
8656
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8657
    }
8658
8659
    //
8660
    // fallback via mbstring
8661
    //
8662
8663 37
    if (self::$SUPPORT['mbstring'] === true) {
8664 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8665
    }
8666
8667
    //
8668
    // fallback via intl
8669
    //
8670
8671
    if (
8672
        $offset !== null
8673
        &&
8674
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8675
        &&
8676
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8677
        &&
8678
        self::$SUPPORT['intl'] === true
8679
    ) {
8680
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8681
      if ($returnTmp !== false) {
8682
        return $returnTmp;
8683
      }
8684
    }
8685
8686
    //
8687
    // fallback for ascii only
8688
    //
8689
8690
    if (
8691
        $offset !== null
8692
        &&
8693
        self::is_ascii($haystack)
8694
        &&
8695
        self::is_ascii($needle)
8696
    ) {
8697
      return self::strrpos_in_byte($haystack, $needle, $offset);
8698
    }
8699
8700
    //
8701
    // fallback via vanilla php
8702
    //
8703
8704
    $haystackTmp = null;
8705
    if ($offset > 0) {
8706
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8706
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8707
    } elseif ($offset < 0) {
8708
      $haystackTmp = self::substr($haystack, 0, $offset);
8709
      $offset = 0;
8710
    }
8711
8712
    if ($haystackTmp !== null) {
8713
      if ($haystackTmp === false) {
8714
        $haystackTmp = '';
8715
      }
8716
      $haystack = (string)$haystackTmp;
8717
    }
8718
8719
    $pos = self::strrpos_in_byte($haystack, $needle);
8720
    if ($pos === false) {
8721
      return false;
8722
    }
8723
8724
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8725
  }
8726
8727
  /**
8728
   * Find position of last occurrence of a string in a string.
8729
   *
8730
   * @param string $haystack <p>
8731
   *                         The string being checked, for the last occurrence
8732
   *                         of needle.
8733
   *                         </p>
8734
   * @param string $needle   <p>
8735
   *                         The string to find in haystack.
8736
   *                         </p>
8737
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8738
   *                         the string. Negative values will stop searching at an arbitrary point
8739
   *                         prior to the end of the string.
8740
   *
8741
   * @return int|false The numeric position of the last occurrence of needle in the
8742
   *                   haystack string. If needle is not found, it returns false.
8743
   */
8744 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8745
  {
8746 2
    if ($haystack === '' || $needle === '') {
8747
      return false;
8748
    }
8749
8750 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8751
      self::checkForSupport();
8752
    }
8753
8754 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8755
      // "mb_" is available if overload is used, so use it ...
8756
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8757
    }
8758
8759 2
    return \strrpos($haystack, $needle, $offset);
8760
  }
8761
8762
  /**
8763
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8764
   * mask.
8765
   *
8766
   * @param string $str    <p>The input string.</p>
8767
   * @param string $mask   <p>The mask of chars</p>
8768
   * @param int    $offset [optional]
8769
   * @param int    $length [optional]
8770
   *
8771
   * @return int
8772
   */
8773 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8774
  {
8775 10
    if ($offset || $length !== null) {
8776 2
      $strTmp = self::substr($str, $offset, $length);
8777 2
      if ($strTmp === false) {
8778
        $strTmp = '';
8779
      }
8780 2
      $str = (string)$strTmp;
8781
    }
8782
8783 10
    if ('' === $str || '' === $mask) {
8784 2
      return 0;
8785
    }
8786
8787 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
$str of type string is incompatible with the type array|null expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8787
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
8788
  }
8789
8790
  /**
8791
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8792
   *
8793
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8794
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8795
   * @param bool   $before_needle  [optional] <p>
8796
   *                               If <b>TRUE</b>, strstr() returns the part of the
8797
   *                               haystack before the first occurrence of the needle (excluding the needle).
8798
   *                               </p>
8799
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8800
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8801
   *
8802
   * @return string|false
8803
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8804
   */
8805 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8806
  {
8807 5
    if ('' === $haystack || '' === $needle) {
8808 2
      return false;
8809
    }
8810
8811 5
    if ($cleanUtf8 === true) {
8812
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8813
      // if invalid characters are found in $haystack before $needle
8814
      $needle = self::clean($needle);
8815
      $haystack = self::clean($haystack);
8816
    }
8817
8818 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8819 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8820
    }
8821
8822
    //
8823
    // fallback for binary || ascii only
8824
    //
8825
8826
    if (
8827 5
        $encoding === 'CP850'
8828
        ||
8829 5
        $encoding === 'ASCII'
8830
    ) {
8831
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8832
    }
8833
8834 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8835
      self::checkForSupport();
8836
    }
8837
8838
    if (
8839 5
        $encoding !== 'UTF-8'
8840
        &&
8841 5
        self::$SUPPORT['mbstring'] === false
8842
    ) {
8843
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8844
    }
8845
8846
    //
8847
    // fallback via mbstring
8848
    //
8849
8850 5
    if (self::$SUPPORT['mbstring'] === true) {
8851 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8852
    }
8853
8854
    //
8855
    // fallback via intl
8856
    //
8857
8858
    if (
8859
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8860
        &&
8861
        self::$SUPPORT['intl'] === true
8862
    ) {
8863
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8864
      if ($returnTmp !== false) {
8865
        return $returnTmp;
8866
      }
8867
    }
8868
8869
    //
8870
    // fallback for ascii only
8871
    //
8872
8873
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8874
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8875
    }
8876
8877
    //
8878
    // fallback via vanilla php
8879
    //
8880
8881
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8882
8883
    if (!isset($match[1])) {
8884
      return false;
8885
    }
8886
8887
    if ($before_needle) {
8888
      return $match[1];
8889
    }
8890
8891
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8891
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8892
  }
8893
8894
  /**
8895
   *  * Finds first occurrence of a string within another.
8896
   *
8897
   * @param string $haystack      <p>
8898
   *                              The string from which to get the first occurrence
8899
   *                              of needle.
8900
   *                              </p>
8901
   * @param string $needle        <p>
8902
   *                              The string to find in haystack.
8903
   *                              </p>
8904
   * @param bool   $before_needle [optional] <p>
8905
   *                              Determines which portion of haystack
8906
   *                              this function returns.
8907
   *                              If set to true, it returns all of haystack
8908
   *                              from the beginning to the first occurrence of needle.
8909
   *                              If set to false, it returns all of haystack
8910
   *                              from the first occurrence of needle to the end,
8911
   *                              </p>
8912
   *
8913
   * @return string|false The portion of haystack,
8914
   *                      or false if needle is not found.
8915
   */
8916
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8917
  {
8918
    if ($haystack === '' || $needle === '') {
8919
      return false;
8920
    }
8921
8922
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8923
      self::checkForSupport();
8924
    }
8925
8926
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8927
      // "mb_" is available if overload is used, so use it ...
8928
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8929
    }
8930
8931
    return \strstr($haystack, $needle, $before_needle);
8932
  }
8933
8934
  /**
8935
   * Unicode transformation for case-less matching.
8936
   *
8937
   * @link http://unicode.org/reports/tr21/tr21-5.html
8938
   *
8939
   * @param string      $str       <p>The input string.</p>
8940
   * @param bool        $full      [optional] <p>
8941
   *                               <b>true</b>, replace full case folding chars (default)<br>
8942
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8943
   *                               </p>
8944
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8945
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8946
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8947
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8948
   *                               for some languages better ...</p>
8949
   *
8950
   * @return string
8951
   */
8952 53
  public static function strtocasefold(
8953
      string $str,
8954
      bool $full = true,
8955
      bool $cleanUtf8 = false,
8956
      string $encoding = 'UTF-8',
8957
      string $lang = null,
8958
      $lower = true
8959
  ): string
8960
  {
8961 53
    if ('' === $str) {
8962 5
      return '';
8963
    }
8964
8965 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8966
8967 52
    if ($lower === true) {
8968 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8969
    }
8970
8971 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8972
  }
8973
8974
  /**
8975
   * Make a string lowercase.
8976
   *
8977
   * @link http://php.net/manual/en/function.mb-strtolower.php
8978
   *
8979
   * @param string      $str                   <p>The string being lowercased.</p>
8980
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8981
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8982
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8983
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8984
   *
8985
   * @return string String with all alphabetic characters converted to lowercase.
8986
   */
8987 156
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8988
  {
8989
    // init
8990 156
    $str = (string)$str;
8991
8992 156
    if ('' === $str) {
8993 12
      return '';
8994
    }
8995
8996 154
    if ($cleanUtf8 === true) {
8997
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8998
      // if invalid characters are found in $haystack before $needle
8999 4
      $str = self::clean($str);
9000
    }
9001
9002 154
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9003 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9004
    }
9005
9006
    // hack for old php version or for the polyfill ...
9007 154
    if ($tryToKeepStringLength === true) {
9008
      $str = self::fixStrCaseHelper($str, true);
9009
    }
9010
9011 154
    if ($lang !== null) {
9012
9013 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9014
        self::checkForSupport();
9015
      }
9016
9017 2
      if (self::$SUPPORT['intl'] === true) {
9018
9019 2
        $langCode = $lang . '-Lower';
9020 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9021
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
9022
9023
          $langCode = 'Any-Lower';
9024
        }
9025
9026
        /** @noinspection PhpComposerExtensionStubsInspection */
9027 2
        return transliterator_transliterate($langCode, $str);
9028
      }
9029
9030
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
9031
    }
9032
9033
    // always fallback via symfony polyfill
9034 154
    return \mb_strtolower($str, $encoding);
9035
  }
9036
9037
  /**
9038
   * Generic case sensitive transformation for collation matching.
9039
   *
9040
   * @param string $str <p>The input string</p>
9041
   *
9042
   * @return string
9043
   */
9044 6
  private static function strtonatfold(string $str): string
9045
  {
9046
    /** @noinspection PhpUndefinedClassInspection */
9047 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
9048
  }
9049
9050
  /**
9051
   * Make a string uppercase.
9052
   *
9053
   * @link http://php.net/manual/en/function.mb-strtoupper.php
9054
   *
9055
   * @param string      $str                   <p>The string being uppercased.</p>
9056
   * @param string      $encoding              [optional] <p>Set the charset.</p>
9057
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9058
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9059
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9060
   *
9061
   * @return string String with all alphabetic characters converted to uppercase.
9062
   */
9063 165
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9064
  {
9065
    // init
9066 165
    $str = (string)$str;
9067
9068 165
    if ('' === $str) {
9069 12
      return '';
9070
    }
9071
9072 163
    if ($cleanUtf8 === true) {
9073
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9074
      // if invalid characters are found in $haystack before $needle
9075 3
      $str = self::clean($str);
9076
    }
9077
9078 163
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9079 76
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9080
    }
9081
9082
    // hack for old php version or for the polyfill ...
9083 163
    if ($tryToKeepStringLength === true) {
9084 2
      $str = self::fixStrCaseHelper($str, false);
9085
    }
9086
9087 163
    if ($lang !== null) {
9088
9089 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9090
        self::checkForSupport();
9091
      }
9092
9093 2
      if (self::$SUPPORT['intl'] === true) {
9094
9095 2
        $langCode = $lang . '-Upper';
9096 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9097
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
9098
9099
          $langCode = 'Any-Upper';
9100
        }
9101
9102
        /** @noinspection PhpComposerExtensionStubsInspection */
9103 2
        return transliterator_transliterate($langCode, $str);
9104
      }
9105
9106
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
9107
    }
9108
9109
    // always fallback via symfony polyfill
9110 163
    return \mb_strtoupper($str, $encoding);
9111
  }
9112
9113
  /**
9114
   * Translate characters or replace sub-strings.
9115
   *
9116
   * @link  http://php.net/manual/en/function.strtr.php
9117
   *
9118
   * @param string          $str  <p>The string being translated.</p>
9119
   * @param string|string[] $from <p>The string replacing from.</p>
9120
   * @param string|string[] $to   <p>The string being translated to to.</p>
9121
   *
9122
   * @return string
9123
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9124
   *                corresponding character in to.
9125
   */
9126 2
  public static function strtr(string $str, $from, $to = INF): string
9127
  {
9128 2
    if ('' === $str) {
9129
      return '';
9130
    }
9131
9132 2
    if ($from === $to) {
9133
      return $str;
9134
    }
9135
9136 2
    if (INF !== $to) {
9137 2
      $from = self::str_split($from);
9138 2
      $to = self::str_split($to);
9139 2
      $countFrom = \count($from);
9140 2
      $countTo = \count($to);
9141
9142 2
      if ($countFrom > $countTo) {
9143 2
        $from = \array_slice($from, 0, $countTo);
9144 2
      } elseif ($countFrom < $countTo) {
9145 2
        $to = \array_slice($to, 0, $countFrom);
9146
      }
9147
9148 2
      $from = \array_combine($from, $to);
9149
    }
9150
9151 2
    if (\is_string($from)) {
9152 2
      return \str_replace($from, '', $str);
9153
    }
9154
9155 2
    return \strtr($str, $from);
9156
  }
9157
9158
  /**
9159
   * Return the width of a string.
9160
   *
9161
   * @param string $str       <p>The input string.</p>
9162
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9163
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9164
   *
9165
   * @return int
9166
   */
9167 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9168
  {
9169 2
    if ('' === $str) {
9170 2
      return 0;
9171
    }
9172
9173 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9174 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9175
    }
9176
9177 2
    if ($cleanUtf8 === true) {
9178
      // iconv and mbstring are not tolerant to invalid encoding
9179
      // further, their behaviour is inconsistent with that of PHP's substr
9180 2
      $str = self::clean($str);
9181
    }
9182
9183 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9184
      self::checkForSupport();
9185
    }
9186
9187
    //
9188
    // fallback via mbstring
9189
    //
9190
9191 2
    if (self::$SUPPORT['mbstring'] === true) {
9192 2
      return \mb_strwidth($str, $encoding);
9193
    }
9194
9195
    //
9196
    // fallback via vanilla php
9197
    //
9198
9199
    if ('UTF-8' !== $encoding) {
9200
      $str = self::encode('UTF-8', $str, false, $encoding);
9201
    }
9202
9203
    $wide = 0;
9204
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9205
9206
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9207
  }
9208
9209
  /**
9210
   * Get part of a string.
9211
   *
9212
   * @link http://php.net/manual/en/function.mb-substr.php
9213
   *
9214
   * @param string $str       <p>The string being checked.</p>
9215
   * @param int    $offset    <p>The first position used in str.</p>
9216
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9217
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9218
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9219
   *
9220
   * @return string|false
9221
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9222
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9223
   *                      characters long, <b>FALSE</b> will be returned.
9224
   */
9225 407
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9226
  {
9227 407
    if ('' === $str) {
9228 27
      return '';
9229
    }
9230
9231
    // Empty string
9232 401
    if ($length === 0) {
9233 20
      return '';
9234
    }
9235
9236 398
    if ($cleanUtf8 === true) {
9237
      // iconv and mbstring are not tolerant to invalid encoding
9238
      // further, their behaviour is inconsistent with that of PHP's substr
9239 2
      $str = self::clean($str);
9240
    }
9241
9242
    // Whole string
9243 398
    if (!$offset && $length === null) {
9244 40
      return $str;
9245
    }
9246
9247 369
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9248 161
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9249
    }
9250
9251 369
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9252
      self::checkForSupport();
9253
    }
9254
9255
    //
9256
    // fallback for binary || ascii only
9257
    //
9258
9259
    if (
9260 369
        $encoding === 'CP850'
9261
        ||
9262 369
        $encoding === 'ASCII'
9263
    ) {
9264 5
      return self::substr_in_byte($str, $offset, $length);
9265
    }
9266
9267
    //
9268
    // fallback via mbstring
9269
    //
9270
9271 364
    if (self::$SUPPORT['mbstring'] === true) {
9272 364
      $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9273 364
      if ($return !== false) {
9274 364
        return $return;
9275
      }
9276
    }
9277
9278
    // otherwise we need the string-length and can't fake it via "2147483647"
9279 4
    $str_length = 0;
9280 4
    if ($offset || $length === null) {
9281 4
      $str_length = self::strlen($str, $encoding);
9282
    }
9283
9284
    // e.g.: invalid chars + mbstring not installed
9285 4
    if ($str_length === false) {
9286
      return false;
9287
    }
9288
9289
    // Empty string
9290 4
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9291
      return '';
9292
    }
9293
9294
    // Impossible
9295 4
    if ($offset && $offset > $str_length) {
9296
      // "false" is the php native return type here,
9297
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9298
      return '';
9299
9300
    }
9301
9302 4
    if ($length === null) {
9303 4
      $length = (int)$str_length;
9304
    } else {
9305 2
      $length = (int)$length;
9306
    }
9307
9308
    if (
9309 4
        $encoding !== 'UTF-8'
9310
        &&
9311 4
        self::$SUPPORT['mbstring'] === false
9312
    ) {
9313 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9314
    }
9315
9316
    //
9317
    // fallback via intl
9318
    //
9319
9320
    if (
9321 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9322
        &&
9323 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
9324
        &&
9325 4
        self::$SUPPORT['intl'] === true
9326
    ) {
9327
      $returnTmp = \grapheme_substr($str, $offset, $length);
9328
      if ($returnTmp !== false) {
9329
        return $returnTmp;
9330
      }
9331
    }
9332
9333
    //
9334
    // fallback via iconv
9335
    //
9336
9337
    if (
9338 4
        $length >= 0 // "iconv_substr()" can't handle negative length
9339
        &&
9340 4
        self::$SUPPORT['iconv'] === true
9341
    ) {
9342
      $returnTmp = \iconv_substr($str, $offset, $length);
9343
      if ($returnTmp !== false) {
9344
        return $returnTmp;
9345
      }
9346
    }
9347
9348
    //
9349
    // fallback for ascii only
9350
    //
9351
9352 4
    if (self::is_ascii($str)) {
9353
      return \substr($str, $offset, $length);
9354
    }
9355
9356
    //
9357
    // fallback via vanilla php
9358
    //
9359
9360
    // split to array, and remove invalid characters
9361 4
    $array = self::split($str);
9362
9363
    // extract relevant part, and join to make sting again
9364 4
    return \implode('', \array_slice($array, $offset, $length));
9365
  }
9366
9367
  /**
9368
   * Binary safe comparison of two strings from an offset, up to length characters.
9369
   *
9370
   * @param string   $str1               <p>The main string being compared.</p>
9371
   * @param string   $str2               <p>The secondary string being compared.</p>
9372
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9373
   *                                     counting from the end of the string.</p>
9374
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9375
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9376
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9377
   *                                     insensitive.</p>
9378
   *
9379
   * @return int
9380
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9381
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9382
   *             <strong>0</strong> if they are equal.
9383
   */
9384 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9385
  {
9386
    if (
9387 2
        $offset !== 0
9388
        ||
9389 2
        $length !== null
9390
    ) {
9391 2
      $str1Tmp = self::substr($str1, $offset, $length);
9392 2
      if ($str1Tmp === false) {
9393
        $str1Tmp = '';
9394
      }
9395 2
      $str1 = (string)$str1Tmp;
9396
9397 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9397
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9398 2
      if ($str2Tmp === false) {
9399
        $str2Tmp = '';
9400
      }
9401 2
      $str2 = (string)$str2Tmp;
9402
    }
9403
9404 2
    if ($case_insensitivity === true) {
9405 2
      return self::strcasecmp($str1, $str2);
9406
    }
9407
9408 2
    return self::strcmp($str1, $str2);
9409
  }
9410
9411
  /**
9412
   * Count the number of substring occurrences.
9413
   *
9414
   * @link  http://php.net/manual/en/function.substr-count.php
9415
   *
9416
   * @param string $haystack   <p>The string to search in.</p>
9417
   * @param string $needle     <p>The substring to search for.</p>
9418
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9419
   * @param int    $length     [optional] <p>
9420
   *                           The maximum length after the specified offset to search for the
9421
   *                           substring. It outputs a warning if the offset plus the length is
9422
   *                           greater than the haystack length.
9423
   *                           </p>
9424
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9425
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9426
   *
9427
   * @return int|false This functions returns an integer or false if there isn't a string.
9428
   */
9429 18
  public static function substr_count(
9430
      string $haystack,
9431
      string $needle,
9432
      int $offset = 0,
9433
      int $length = null,
9434
      string $encoding = 'UTF-8',
9435
      bool $cleanUtf8 = false
9436
  )
9437
  {
9438 18
    if ('' === $haystack || '' === $needle) {
9439 2
      return false;
9440
    }
9441
9442 18
    if ($offset || $length !== null) {
9443
9444 2
      if ($length === null) {
9445 2
        $lengthTmp = self::strlen($haystack);
9446 2
        if ($lengthTmp === false) {
9447
          return false;
9448
        }
9449 2
        $length = (int)$lengthTmp;
9450
      }
9451
9452
      if (
9453
          (
9454 2
              $length !== 0
9455
              &&
9456 2
              $offset !== 0
9457
          )
9458
          &&
9459 2
          ($length + $offset) <= 0
9460
          &&
9461 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9462
      ) {
9463 2
        return false;
9464
      }
9465
9466 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9467 2
      if ($haystackTmp === false) {
9468
        $haystackTmp = '';
9469
      }
9470 2
      $haystack = (string)$haystackTmp;
9471
    }
9472
9473 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9474 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9475
    }
9476
9477 18
    if ($cleanUtf8 === true) {
9478
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9479
      // if invalid characters are found in $haystack before $needle
9480
      $needle = self::clean($needle);
9481
      $haystack = self::clean($haystack);
9482
    }
9483
9484 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9485
      self::checkForSupport();
9486
    }
9487
9488
    if (
9489 18
        $encoding !== 'UTF-8'
9490
        &&
9491 18
        self::$SUPPORT['mbstring'] === false
9492
    ) {
9493
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9494
    }
9495
9496 18
    if (self::$SUPPORT['mbstring'] === true) {
9497 18
      return \mb_substr_count($haystack, $needle, $encoding);
9498
    }
9499
9500
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9501
9502
    return \count($matches);
9503
  }
9504
9505
  /**
9506
   * Count the number of substring occurrences.
9507
   *
9508
   * @param string $haystack <p>
9509
   *                         The string being checked.
9510
   *                         </p>
9511
   * @param string $needle   <p>
9512
   *                         The string being found.
9513
   *                         </p>
9514
   * @param int    $offset   [optional] <p>
9515
   *                         The offset where to start counting
9516
   *                         </p>
9517
   * @param int    $length   [optional] <p>
9518
   *                         The maximum length after the specified offset to search for the
9519
   *                         substring. It outputs a warning if the offset plus the length is
9520
   *                         greater than the haystack length.
9521
   *                         </p>
9522
   *
9523
   * @return int|false The number of times the
9524
   *                   needle substring occurs in the
9525
   *                   haystack string.
9526
   */
9527 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9528
  {
9529 38
    if ($haystack === '' || $needle === '') {
9530
      return 0;
9531
    }
9532
9533 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9534
      self::checkForSupport();
9535
    }
9536
9537
    if (
9538 38
        ($offset || $length !== null)
9539
        &&
9540 38
        self::$SUPPORT['mbstring_func_overload'] === true
9541
    ) {
9542
9543
      if ($length === null) {
9544
        $lengthTmp = self::strlen($haystack);
9545
        if ($lengthTmp === false) {
9546
          return false;
9547
        }
9548
        $length = (int)$lengthTmp;
9549
      }
9550
9551
      if (
9552
          (
9553
              $length !== 0
9554
              &&
9555
              $offset !== 0
9556
          )
9557
          &&
9558
          ($length + $offset) <= 0
9559
          &&
9560
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9561
      ) {
9562
        return false;
9563
      }
9564
9565
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9566
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9567
        $haystackTmp = '';
9568
      }
9569
      $haystack = (string)$haystackTmp;
9570
    }
9571
9572 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9573
      // "mb_" is available if overload is used, so use it ...
9574
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9575
    }
9576
9577 38
    return \substr_count($haystack, $needle, $offset, $length);
9578
  }
9579
9580
  /**
9581
   * Returns the number of occurrences of $substring in the given string.
9582
   * By default, the comparison is case-sensitive, but can be made insensitive
9583
   * by setting $caseSensitive to false.
9584
   *
9585
   * @param string $str           <p>The input string.</p>
9586
   * @param string $substring     <p>The substring to search for.</p>
9587
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9588
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9589
   *
9590
   * @return int
9591
   */
9592 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9593
  {
9594 15
    if ('' === $str || '' === $substring) {
9595 2
      return 0;
9596
    }
9597
9598
    // only a fallback to prevent BC in the api ...
9599 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9600 4
      $encoding = (string)$caseSensitive;
9601
    }
9602
9603 13
    if (!$caseSensitive) {
9604 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9605 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9606
    }
9607
9608 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9609
  }
9610
9611
  /**
9612
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9613
   *
9614
   * @param string $haystack <p>The string to search in.</p>
9615
   * @param string $needle   <p>The substring to search for.</p>
9616
   *
9617
   * @return string Return the sub-string.
9618
   */
9619 2
  public static function substr_ileft(string $haystack, string $needle): string
9620
  {
9621 2
    if ('' === $haystack) {
9622 2
      return '';
9623
    }
9624
9625 2
    if ('' === $needle) {
9626 2
      return $haystack;
9627
    }
9628
9629 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9630 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9630
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9631 2
      if ($haystackTmp === false) {
9632
        $haystackTmp = '';
9633
      }
9634 2
      $haystack = (string)$haystackTmp;
9635
    }
9636
9637 2
    return $haystack;
9638
  }
9639
9640
  /**
9641
   * Get part of a string process in bytes.
9642
   *
9643
   * @param string $str    <p>The string being checked.</p>
9644
   * @param int    $offset <p>The first position used in str.</p>
9645
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9646
   *
9647
   * @return string|false
9648
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9649
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9650
   *                      characters long, <b>FALSE</b> will be returned.
9651
   */
9652 55
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9653
  {
9654 55
    if ($str === '') {
9655
      return '';
9656
    }
9657
9658
    // Empty string
9659 55
    if ($length === 0) {
9660
      return '';
9661
    }
9662
9663
    // Whole string
9664 55
    if (!$offset && $length === null) {
9665
      return $str;
9666
    }
9667
9668 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9669
      self::checkForSupport();
9670
    }
9671
9672 55
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9673
      // "mb_" is available if overload is used, so use it ...
9674
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9675
    }
9676
9677 55
    return \substr($str, $offset, $length ?? 2147483647);
9678
  }
9679
9680
  /**
9681
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9682
   *
9683
   * @param string $haystack <p>The string to search in.</p>
9684
   * @param string $needle   <p>The substring to search for.</p>
9685
   *
9686
   * @return string Return the sub-string.
9687
   */
9688 2
  public static function substr_iright(string $haystack, string $needle): string
9689
  {
9690 2
    if ('' === $haystack) {
9691 2
      return '';
9692
    }
9693
9694 2
    if ('' === $needle) {
9695 2
      return $haystack;
9696
    }
9697
9698 2
    if (self::str_iends_with($haystack, $needle) === true) {
9699 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9700 2
      if ($haystackTmp === false) {
9701
        $haystackTmp = '';
9702
      }
9703 2
      $haystack = (string)$haystackTmp;
9704
    }
9705
9706 2
    return $haystack;
9707
  }
9708
9709
  /**
9710
   * Removes an prefix ($needle) from start of the string ($haystack).
9711
   *
9712
   * @param string $haystack <p>The string to search in.</p>
9713
   * @param string $needle   <p>The substring to search for.</p>
9714
   *
9715
   * @return string Return the sub-string.
9716
   */
9717 2
  public static function substr_left(string $haystack, string $needle): string
9718
  {
9719 2
    if ('' === $haystack) {
9720 2
      return '';
9721
    }
9722
9723 2
    if ('' === $needle) {
9724 2
      return $haystack;
9725
    }
9726
9727 2
    if (self::str_starts_with($haystack, $needle) === true) {
9728 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9728
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9729 2
      if ($haystackTmp === false) {
9730
        $haystackTmp = '';
9731
      }
9732 2
      $haystack = (string)$haystackTmp;
9733
    }
9734
9735 2
    return $haystack;
9736
  }
9737
9738
  /**
9739
   * Replace text within a portion of a string.
9740
   *
9741
   * source: https://gist.github.com/stemar/8287074
9742
   *
9743
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9744
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9745
   * @param int|int[]       $offset           <p>
9746
   *                                          If start is positive, the replacing will begin at the start'th offset
9747
   *                                          into string.
9748
   *                                          <br><br>
9749
   *                                          If start is negative, the replacing will begin at the start'th character
9750
   *                                          from the end of string.
9751
   *                                          </p>
9752
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9753
   *                                          portion of string which is to be replaced. If it is negative, it
9754
   *                                          represents the number of characters from the end of string at which to
9755
   *                                          stop replacing. If it is not given, then it will default to strlen(
9756
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9757
   *                                          length is zero then this function will have the effect of inserting
9758
   *                                          replacement into string at the given start offset.</p>
9759
   * @param string          $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
9760
   *
9761
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9762
   */
9763 10
  public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9764
  {
9765 10
    if (\is_array($str) === true) {
9766 1
      $num = \count($str);
9767
9768
      // the replacement
9769 1
      if (\is_array($replacement) === true) {
9770 1
        $replacement = \array_slice($replacement, 0, $num);
9771
      } else {
9772 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9773
      }
9774
9775
      // the offset
9776 1
      if (\is_array($offset) === true) {
9777 1
        $offset = \array_slice($offset, 0, $num);
9778 1
        foreach ($offset as &$valueTmp) {
9779 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9780
        }
9781 1
        unset($valueTmp);
9782
      } else {
9783 1
        $offset = \array_pad([$offset], $num, $offset);
9784
      }
9785
9786
      // the length
9787 1
      if (null === $length) {
9788 1
        $length = \array_fill(0, $num, 0);
9789 1
      } elseif (\is_array($length) === true) {
9790 1
        $length = \array_slice($length, 0, $num);
9791 1
        foreach ($length as &$valueTmpV2) {
9792 1
          if (null !== $valueTmpV2) {
9793 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9794
          } else {
9795 1
            $valueTmpV2 = 0;
9796
          }
9797
        }
9798 1
        unset($valueTmpV2);
9799
      } else {
9800 1
        $length = \array_pad([$length], $num, $length);
9801
      }
9802
9803
      // recursive call
9804 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9805
    }
9806
9807 10
    if (\is_array($replacement) === true) {
9808 1
      if (\count($replacement) > 0) {
9809 1
        $replacement = $replacement[0];
9810
      } else {
9811 1
        $replacement = '';
9812
      }
9813
    }
9814
9815
    // init
9816 10
    $str = (string)$str;
9817 10
    $replacement = (string)$replacement;
9818
9819 10
    if ('' === $str) {
9820 1
      return $replacement;
9821
    }
9822
9823 9
    if (self::is_ascii($str)) {
9824 6
      return ($length === null) ?
9825
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9825
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9826 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9826
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9827
    }
9828
9829 8
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9830
      self::checkForSupport();
9831
    }
9832
9833 8
    if (self::$SUPPORT['mbstring'] === true) {
9834 8
      $string_length = self::strlen($str, $encoding);
9835
9836 8
      if ($offset < 0) {
9837 1
        $offset = \max(0, $string_length + $offset);
9838 8
      } elseif ($offset > $string_length) {
9839
        $offset = $string_length;
9840
      }
9841
9842 8
      if ($length < 0) {
9843 1
        $length = \max(0, $string_length - $offset + $length);
9844 8
      } elseif ($length === null || $length > $string_length) {
9845 3
        $length = $string_length;
9846
      }
9847
9848 8
      if (($offset + $length) > $string_length) {
9849 3
        $length = $string_length - $offset;
9850
      }
9851
9852 8
      return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9852
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type string|false can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9852
      return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9853
    }
9854
9855
    \preg_match_all('/./us', $str, $smatches);
9856
    \preg_match_all('/./us', $replacement, $rmatches);
9857
9858
    if ($length === null) {
9859
      $lengthTmp = self::strlen($str, $encoding);
9860
      if ($lengthTmp === false) {
9861
        // e.g.: non mbstring support + invalid chars
9862
        return '';
9863
      }
9864
      $length = (int)$lengthTmp;
9865
    }
9866
9867
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9867
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9867
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9868
9869
    return \implode('', $smatches[0]);
9870
  }
9871
9872
  /**
9873
   * Removes an suffix ($needle) from end of the string ($haystack).
9874
   *
9875
   * @param string $haystack <p>The string to search in.</p>
9876
   * @param string $needle   <p>The substring to search for.</p>
9877
   *
9878
   * @return string Return the sub-string.
9879
   */
9880 2
  public static function substr_right(string $haystack, string $needle): string
9881
  {
9882 2
    if ('' === $haystack) {
9883 2
      return '';
9884
    }
9885
9886 2
    if ('' === $needle) {
9887 2
      return $haystack;
9888
    }
9889
9890 2
    if (self::str_ends_with($haystack, $needle) === true) {
9891 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9892 2
      if ($haystackTmp === false) {
9893
        $haystackTmp = '';
9894
      }
9895 2
      $haystack = (string)$haystackTmp;
9896
    }
9897
9898 2
    return $haystack;
9899
  }
9900
9901
  /**
9902
   * Returns a case swapped version of the string.
9903
   *
9904
   * @param string $str       <p>The input string.</p>
9905
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9906
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9907
   *
9908
   * @return string Each character's case swapped.
9909
   */
9910 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9911
  {
9912 6
    if ('' === $str) {
9913 1
      return '';
9914
    }
9915
9916 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9917 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9918
    }
9919
9920 6
    if ($cleanUtf8 === true) {
9921
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9922
      // if invalid characters are found in $haystack before $needle
9923 2
      $str = self::clean($str);
9924
    }
9925
9926 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9927
  }
9928
9929
  /**
9930
   * Checks whether mbstring is available on the server.
9931
   *
9932
   * @return bool
9933
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9934
   */
9935
  public static function symfony_polyfill_used(): bool
9936
  {
9937
    // init
9938
    $return = false;
9939
9940
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9941
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9942
      $return = true;
9943
    }
9944
9945
    $returnTmp = \extension_loaded('iconv') ? true : false;
9946
    if ($returnTmp === false && \function_exists('iconv')) {
9947
      $return = true;
9948
    }
9949
9950
    return $return;
9951
  }
9952
9953
  /**
9954
   * @param string $str
9955
   * @param int    $tabLength
9956
   *
9957
   * @return string
9958
   */
9959 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9960
  {
9961 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9962
  }
9963
9964
  /**
9965
   * Converts the first character of each word in the string to uppercase
9966
   * and all other chars to lowercase.
9967
   *
9968
   * @param string      $str                   <p>The input string.</p>
9969
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9970
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9971
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9972
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9973
   *
9974
   * @return string String with all characters of $str being title-cased.
9975
   */
9976 5
  public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9977
  {
9978 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9979 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9980
    }
9981
9982 5
    return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
9983
  }
9984
9985
  /**
9986
   * alias for "UTF8::to_ascii()"
9987
   *
9988
   * @see        UTF8::to_ascii()
9989
   *
9990
   * @param string $str
9991
   * @param string $subst_chr
9992
   * @param bool   $strict
9993
   *
9994
   * @return string
9995
   *
9996
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9997
   */
9998 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9999
  {
10000 7
    return self::to_ascii($str, $subst_chr, $strict);
10001
  }
10002
10003
  /**
10004
   * alias for "UTF8::to_iso8859()"
10005
   *
10006
   * @see        UTF8::to_iso8859()
10007
   *
10008
   * @param string|string[] $str
10009
   *
10010
   * @return string|string[]
10011
   *
10012
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
10013
   */
10014 2
  public static function toIso8859($str)
10015
  {
10016 2
    return self::to_iso8859($str);
10017
  }
10018
10019
  /**
10020
   * alias for "UTF8::to_latin1()"
10021
   *
10022
   * @see        UTF8::to_latin1()
10023
   *
10024
   * @param string|string[] $str
10025
   *
10026
   * @return string|string[]
10027
   *
10028
   * @deprecated <p>use "UTF8::to_latin1()"</p>
10029
   */
10030 2
  public static function toLatin1($str)
10031
  {
10032 2
    return self::to_latin1($str);
10033
  }
10034
10035
  /**
10036
   * alias for "UTF8::to_utf8()"
10037
   *
10038
   * @see        UTF8::to_utf8()
10039
   *
10040
   * @param string|string[] $str
10041
   *
10042
   * @return string|string[]
10043
   *
10044
   * @deprecated <p>use "UTF8::to_utf8()"</p>
10045
   */
10046 2
  public static function toUTF8($str)
10047
  {
10048 2
    return self::to_utf8($str);
10049
  }
10050
10051
  /**
10052
   * Convert a string into ASCII.
10053
   *
10054
   * @param string $str     <p>The input string.</p>
10055
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10056
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10057
   *                        performance</p>
10058
   *
10059
   * @return string
10060
   */
10061 38
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
10062
  {
10063 38
    static $UTF8_TO_ASCII;
10064
10065 38
    if ('' === $str) {
10066 3
      return '';
10067
    }
10068
10069
    // check if we only have ASCII, first (better performance)
10070 35
    if (self::is_ascii($str) === true) {
10071 7
      return $str;
10072
    }
10073
10074 30
    $str = self::clean(
10075 30
        $str,
10076 30
        true,
10077 30
        true,
10078 30
        true,
10079 30
        false,
10080 30
        true,
10081 30
        true
10082
    );
10083
10084
    // check again, if we only have ASCII, now ...
10085 30
    if (self::is_ascii($str) === true) {
10086 12
      return $str;
10087
    }
10088
10089 19
    if ($strict === true) {
10090
10091 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10092
        self::checkForSupport();
10093
      }
10094
10095 1
      if (self::$SUPPORT['intl'] === true) {
10096
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10097
        /** @noinspection PhpComposerExtensionStubsInspection */
10098 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10099
10100
        // check again, if we only have ASCII, now ...
10101 1
        if (self::is_ascii($str) === true) {
10102 1
          return $str;
10103
        }
10104
10105
      }
10106
    }
10107
10108 19
    if (self::$ORD === null) {
10109
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10110
    }
10111
10112 19
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10113 19
    $chars = $ar[0];
10114 19
    $ord = null;
10115 19
    foreach ($chars as &$c) {
10116
10117 19
      $ordC0 = self::$ORD[$c[0]];
10118
10119 19
      if ($ordC0 >= 0 && $ordC0 <= 127) {
10120 15
        continue;
10121
      }
10122
10123 19
      $ordC1 = self::$ORD[$c[1]];
10124
10125
      // ASCII - next please
10126 19
      if ($ordC0 >= 192 && $ordC0 <= 223) {
10127 17
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10128
      }
10129
10130 19
      if ($ordC0 >= 224) {
10131 8
        $ordC2 = self::$ORD[$c[2]];
10132
10133 8
        if ($ordC0 <= 239) {
10134 7
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10135
        }
10136
10137 8
        if ($ordC0 >= 240) {
10138 2
          $ordC3 = self::$ORD[$c[3]];
10139
10140 2
          if ($ordC0 <= 247) {
10141 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10142
          }
10143
10144 2
          if ($ordC0 >= 248) {
10145
            $ordC4 = self::$ORD[$c[4]];
10146
10147
            if ($ordC0 <= 251) {
10148
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10149
            }
10150
10151
            if ($ordC0 >= 252) {
10152
              $ordC5 = self::$ORD[$c[5]];
10153
10154
              if ($ordC0 <= 253) {
10155
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10156
              }
10157
            }
10158
          }
10159
        }
10160
      }
10161
10162 19
      if ($ordC0 === 254 || $ordC0 === 255) {
10163
        $c = $unknown;
10164
        continue;
10165
      }
10166
10167 19
      if ($ord === null) {
10168
        $c = $unknown;
10169
        continue;
10170
      }
10171
10172 19
      $bank = $ord >> 8;
10173 19
      if (!isset($UTF8_TO_ASCII[$bank])) {
10174 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10175 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10176 2
          $UTF8_TO_ASCII[$bank] = [];
10177
        }
10178
      }
10179
10180 19
      $newchar = $ord & 255;
10181
10182 19
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10183
10184
        // keep for debugging
10185
        /*
10186
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10187
        echo "char: " . $c . "\n";
10188
        echo "ord: " . $ord . "\n";
10189
        echo "newchar: " . $newchar . "\n";
10190
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10191
        echo "bank:" . $bank . "\n\n";
10192
        */
10193
10194 18
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10195
      } else {
10196
10197
        // keep for debugging missing chars
10198
        /*
10199
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10200
        echo "char: " . $c . "\n";
10201
        echo "ord: " . $ord . "\n";
10202
        echo "newchar: " . $newchar . "\n";
10203
        echo "bank:" . $bank . "\n\n";
10204
        */
10205
10206 19
        $c = $unknown;
10207
      }
10208
    }
10209
10210 19
    return \implode('', $chars);
10211
  }
10212
10213
  /**
10214
   * @param mixed $str
10215
   *
10216
   * @return bool
10217
   */
10218 19
  public static function to_boolean($str): bool
10219
  {
10220
    // init
10221 19
    $str = (string)$str;
10222
10223 19
    if ('' === $str) {
10224 2
      return false;
10225
    }
10226
10227 17
    $key = \strtolower($str);
10228
10229
    // Info: http://php.net/manual/en/filter.filters.validate.php
10230
    $map = [
10231 17
        'true'  => true,
10232
        '1'     => true,
10233
        'on'    => true,
10234
        'yes'   => true,
10235
        'false' => false,
10236
        '0'     => false,
10237
        'off'   => false,
10238
        'no'    => false,
10239
    ];
10240
10241 17
    if (isset($map[$key])) {
10242 13
      return $map[$key];
10243
    }
10244
10245
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10246 4
    if (\is_numeric($str)) {
10247 2
      return (((float)$str + 0) > 0);
10248
    }
10249
10250 2
    return (bool)self::trim($str);
10251
  }
10252
10253
  /**
10254
   * Convert given string to safe filename (and keep string case).
10255
   *
10256
   * @param string $string
10257
   * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10258
   *                                  simply replaced with hyphen.
10259
   * @param string $fallback_char
10260
   *
10261
   * @return string
10262
   */
10263 1
  public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10264
  {
10265 1
    if ($use_transliterate === true) {
10266 1
      $string = self::str_transliterate($string, $fallback_char);
10267
    }
10268
10269 1
    $fallback_char_escaped = \preg_quote($fallback_char, '/');
10270
10271 1
    $string = (string)\preg_replace(
10272
        [
10273 1
            '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10274 1
            '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10275 1
            '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10276
        ],
10277
        [
10278 1
            '',
10279 1
            $fallback_char,
10280 1
            $fallback_char,
10281
        ],
10282 1
        $string
10283
    );
10284
10285
    // trim "$fallback_char" from beginning and end of the string
10286 1
    return \trim($string, $fallback_char);
10287
  }
10288
10289
  /**
10290
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10291
   *
10292
   * @param string|string[] $str
10293
   *
10294
   * @return string|string[]
10295
   */
10296 8
  public static function to_iso8859($str)
10297
  {
10298 8
    if (\is_array($str) === true) {
10299 2
      foreach ($str as $k => $v) {
10300 2
        $str[$k] = self::to_iso8859($v);
10301
      }
10302
10303 2
      return $str;
10304
    }
10305
10306 8
    $str = (string)$str;
10307 8
    if ('' === $str) {
10308 2
      return '';
10309
    }
10310
10311 8
    return self::utf8_decode($str);
10312
  }
10313
10314
  /**
10315
   * alias for "UTF8::to_iso8859()"
10316
   *
10317
   * @see UTF8::to_iso8859()
10318
   *
10319
   * @param string|string[] $str
10320
   *
10321
   * @return string|string[]
10322
   */
10323 2
  public static function to_latin1($str)
10324
  {
10325 2
    return self::to_iso8859($str);
10326
  }
10327
10328
  /**
10329
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10330
   *
10331
   * <ul>
10332
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10333
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10334
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10335
   * case.</li>
10336
   * </ul>
10337
   *
10338
   * @param string|string[] $str                    <p>Any string or array.</p>
10339
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10340
   *
10341
   * @return string|string[] The UTF-8 encoded string.
10342
   */
10343 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10344
  {
10345 38
    if (\is_array($str) === true) {
10346 4
      foreach ($str as $k => $v) {
10347 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10348
      }
10349
10350 4
      return $str;
10351
    }
10352
10353 38
    $str = (string)$str;
10354 38
    if ('' === $str) {
10355 6
      return $str;
10356
    }
10357
10358 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10359
      self::checkForSupport();
10360
    }
10361
10362 38
    $max = self::strlen_in_byte($str);
10363 38
    $buf = '';
10364
10365
    /** @noinspection ForeachInvariantsInspection */
10366 38
    for ($i = 0; $i < $max; $i++) {
10367 38
      $c1 = $str[$i];
10368
10369 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10370
10371 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10372
10373 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10374
10375 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10376 17
            $buf .= $c1 . $c2;
10377 17
            $i++;
10378
          } else { // not valid UTF8 - convert it
10379 31
            $buf .= self::to_utf8_convert_helper($c1);
10380
          }
10381
10382 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10383
10384 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10385 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10386
10387 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10388 14
            $buf .= $c1 . $c2 . $c3;
10389 14
            $i += 2;
10390
          } else { // not valid UTF8 - convert it
10391 32
            $buf .= self::to_utf8_convert_helper($c1);
10392
          }
10393
10394 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10395
10396 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10397 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10398 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10399
10400 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10401 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10402 8
            $i += 3;
10403
          } else { // not valid UTF8 - convert it
10404 26
            $buf .= self::to_utf8_convert_helper($c1);
10405
          }
10406
10407
        } else { // doesn't look like UTF8, but should be converted
10408 34
          $buf .= self::to_utf8_convert_helper($c1);
10409
        }
10410
10411 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10412
10413 4
        $buf .= self::to_utf8_convert_helper($c1);
10414
10415
      } else { // it doesn't need conversion
10416 35
        $buf .= $c1;
10417
      }
10418
    }
10419
10420
    // decode unicode escape sequences
10421 38
    $buf = \preg_replace_callback(
10422 38
        '/\\\\u([0-9a-f]{4})/i',
10423 38
        function ($match) {
10424
          // always fallback via symfony polyfill
10425 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10426 38
        },
10427 38
        $buf
10428
    );
10429
10430
    // decode UTF-8 codepoints
10431 38
    if ($decodeHtmlEntityToUtf8 === true) {
10432 2
      $buf = self::html_entity_decode($buf);
10433
    }
10434
10435 38
    return $buf;
10436
  }
10437
10438
  /**
10439
   * @param int|string $input
10440
   *
10441
   * @return string
10442
   */
10443 30
  private static function to_utf8_convert_helper($input): string
10444
  {
10445
    // init
10446 30
    $buf = '';
10447
10448 30
    if (self::$ORD === null) {
10449 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10450
    }
10451
10452 30
    if (self::$CHR === null) {
10453 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10454
    }
10455
10456 30
    if (self::$WIN1252_TO_UTF8 === null) {
10457 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10458
    }
10459
10460 30
    $ordC1 = self::$ORD[$input];
10461 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10462 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10463
    } else {
10464 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10465 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10466 2
      $buf .= $cc1 . $cc2;
10467
    }
10468
10469 30
    return $buf;
10470
  }
10471
10472
  /**
10473
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10474
   *
10475
   * INFO: This is slower then "trim()"
10476
   *
10477
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10478
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10479
   *
10480
   * @param string $str   <p>The string to be trimmed</p>
10481
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10482
   *
10483
   * @return string The trimmed string.
10484
   */
10485 214
  public static function trim(string $str = '', $chars = INF): string
10486
  {
10487 214
    if ('' === $str) {
10488 11
      return '';
10489
    }
10490
10491
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10492 206
    if ($chars === INF || !$chars) {
10493 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10494
    } else {
10495 47
      $chars = \preg_quote($chars, '/');
10496 47
      $pattern = "^[$chars]+|[$chars]+\$";
10497
    }
10498
10499 206
    return self::regex_replace($str, $pattern, '', '', '/');
10500
  }
10501
10502
  /**
10503
   * Makes string's first char uppercase.
10504
   *
10505
   * @param string      $str                   <p>The input string.</p>
10506
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10507
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10508
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10509
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10510
   *
10511
   * @return string The resulting string.
10512
   */
10513 81
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10514
  {
10515 81
    if ($cleanUtf8 === true) {
10516
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10517
      // if invalid characters are found in $haystack before $needle
10518 1
      $str = self::clean($str);
10519
    }
10520
10521 81
    $strPartTwo = self::substr($str, 1, null, $encoding);
10522 81
    if ($strPartTwo === false) {
10523
      $strPartTwo = '';
10524
    }
10525
10526 81
    $strPartOne = self::strtoupper(
10527 81
        (string)self::substr($str, 0, 1, $encoding),
10528 81
        $encoding,
10529 81
        $cleanUtf8,
10530 81
        $lang,
10531 81
        $tryToKeepStringLength
10532
    );
10533
10534 81
    return $strPartOne . $strPartTwo;
10535
  }
10536
10537
  /**
10538
   * alias for "UTF8::ucfirst()"
10539
   *
10540
   * @see UTF8::ucfirst()
10541
   *
10542
   * @param string $str
10543
   * @param string $encoding
10544
   * @param bool   $cleanUtf8
10545
   *
10546
   * @return string
10547
   */
10548 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10549
  {
10550 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10551
  }
10552
10553
  /**
10554
   * Uppercase for all words in the string.
10555
   *
10556
   * @param string   $str        <p>The input string.</p>
10557
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10558
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10559
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10560
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10561
   *
10562
   * @return string
10563
   */
10564 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10565
  {
10566 9
    if (!$str) {
10567 2
      return '';
10568
    }
10569
10570
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10571
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10572
10573 8
    if ($cleanUtf8 === true) {
10574
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10575
      // if invalid characters are found in $haystack before $needle
10576 1
      $str = self::clean($str);
10577
    }
10578
10579 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10580
10581
    if (
10582 8
        $usePhpDefaultFunctions === true
10583
        &&
10584 8
        self::is_ascii($str) === true
10585
    ) {
10586
      return \ucwords($str);
10587
    }
10588
10589 8
    $words = self::str_to_words($str, $charlist);
10590 8
    $newWords = [];
10591
10592 8
    if (\count($exceptions) > 0) {
10593 1
      $useExceptions = true;
10594
    } else {
10595 8
      $useExceptions = false;
10596
    }
10597
10598 8
    foreach ($words as $word) {
10599
10600 8
      if (!$word) {
10601 8
        continue;
10602
      }
10603
10604
      if (
10605 8
          $useExceptions === false
10606
          ||
10607
          (
10608 1
              $useExceptions === true
10609
              &&
10610 8
              !\in_array($word, $exceptions, true)
10611
          )
10612
      ) {
10613 8
        $word = self::ucfirst($word, $encoding);
10614
      }
10615
10616 8
      $newWords[] = $word;
10617
    }
10618
10619 8
    return \implode('', $newWords);
10620
  }
10621
10622
  /**
10623
   * Multi decode html entity & fix urlencoded-win1252-chars.
10624
   *
10625
   * e.g:
10626
   * 'test+test'                     => 'test test'
10627
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10628
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10629
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10630
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10631
   * 'Düsseldorf'                   => 'Düsseldorf'
10632
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10633
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10634
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10635
   *
10636
   * @param string $str          <p>The input string.</p>
10637
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10638
   *
10639
   * @return string
10640
   */
10641 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10642
  {
10643 2
    if ('' === $str) {
10644 2
      return '';
10645
    }
10646
10647 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10648 2
    if (\preg_match($pattern, $str)) {
10649 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10650
    }
10651
10652 2
    $flags = ENT_QUOTES | ENT_HTML5;
10653
10654
    do {
10655 2
      $str_compare = $str;
10656
10657 2
      $str = self::fix_simple_utf8(
10658 2
          \urldecode(
10659 2
              self::html_entity_decode(
10660 2
                  self::to_utf8($str),
10661 2
                  $flags
10662
              )
10663
          )
10664
      );
10665
10666 2
    } while ($multi_decode === true && $str_compare !== $str);
10667
10668 2
    return $str;
10669
  }
10670
10671
  /**
10672
   * Return a array with "urlencoded"-win1252 -> UTF-8
10673
   *
10674
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10675
   *
10676
   * @return string[]
10677
   */
10678 2
  public static function urldecode_fix_win1252_chars(): array
10679
  {
10680
    return [
10681 2
        '%20' => ' ',
10682
        '%21' => '!',
10683
        '%22' => '"',
10684
        '%23' => '#',
10685
        '%24' => '$',
10686
        '%25' => '%',
10687
        '%26' => '&',
10688
        '%27' => "'",
10689
        '%28' => '(',
10690
        '%29' => ')',
10691
        '%2A' => '*',
10692
        '%2B' => '+',
10693
        '%2C' => ',',
10694
        '%2D' => '-',
10695
        '%2E' => '.',
10696
        '%2F' => '/',
10697
        '%30' => '0',
10698
        '%31' => '1',
10699
        '%32' => '2',
10700
        '%33' => '3',
10701
        '%34' => '4',
10702
        '%35' => '5',
10703
        '%36' => '6',
10704
        '%37' => '7',
10705
        '%38' => '8',
10706
        '%39' => '9',
10707
        '%3A' => ':',
10708
        '%3B' => ';',
10709
        '%3C' => '<',
10710
        '%3D' => '=',
10711
        '%3E' => '>',
10712
        '%3F' => '?',
10713
        '%40' => '@',
10714
        '%41' => 'A',
10715
        '%42' => 'B',
10716
        '%43' => 'C',
10717
        '%44' => 'D',
10718
        '%45' => 'E',
10719
        '%46' => 'F',
10720
        '%47' => 'G',
10721
        '%48' => 'H',
10722
        '%49' => 'I',
10723
        '%4A' => 'J',
10724
        '%4B' => 'K',
10725
        '%4C' => 'L',
10726
        '%4D' => 'M',
10727
        '%4E' => 'N',
10728
        '%4F' => 'O',
10729
        '%50' => 'P',
10730
        '%51' => 'Q',
10731
        '%52' => 'R',
10732
        '%53' => 'S',
10733
        '%54' => 'T',
10734
        '%55' => 'U',
10735
        '%56' => 'V',
10736
        '%57' => 'W',
10737
        '%58' => 'X',
10738
        '%59' => 'Y',
10739
        '%5A' => 'Z',
10740
        '%5B' => '[',
10741
        '%5C' => '\\',
10742
        '%5D' => ']',
10743
        '%5E' => '^',
10744
        '%5F' => '_',
10745
        '%60' => '`',
10746
        '%61' => 'a',
10747
        '%62' => 'b',
10748
        '%63' => 'c',
10749
        '%64' => 'd',
10750
        '%65' => 'e',
10751
        '%66' => 'f',
10752
        '%67' => 'g',
10753
        '%68' => 'h',
10754
        '%69' => 'i',
10755
        '%6A' => 'j',
10756
        '%6B' => 'k',
10757
        '%6C' => 'l',
10758
        '%6D' => 'm',
10759
        '%6E' => 'n',
10760
        '%6F' => 'o',
10761
        '%70' => 'p',
10762
        '%71' => 'q',
10763
        '%72' => 'r',
10764
        '%73' => 's',
10765
        '%74' => 't',
10766
        '%75' => 'u',
10767
        '%76' => 'v',
10768
        '%77' => 'w',
10769
        '%78' => 'x',
10770
        '%79' => 'y',
10771
        '%7A' => 'z',
10772
        '%7B' => '{',
10773
        '%7C' => '|',
10774
        '%7D' => '}',
10775
        '%7E' => '~',
10776
        '%7F' => '',
10777
        '%80' => '`',
10778
        '%81' => '',
10779
        '%82' => '‚',
10780
        '%83' => 'ƒ',
10781
        '%84' => '„',
10782
        '%85' => '…',
10783
        '%86' => '†',
10784
        '%87' => '‡',
10785
        '%88' => 'ˆ',
10786
        '%89' => '‰',
10787
        '%8A' => 'Š',
10788
        '%8B' => '‹',
10789
        '%8C' => 'Œ',
10790
        '%8D' => '',
10791
        '%8E' => 'Ž',
10792
        '%8F' => '',
10793
        '%90' => '',
10794
        '%91' => '‘',
10795
        '%92' => '’',
10796
        '%93' => '“',
10797
        '%94' => '”',
10798
        '%95' => '•',
10799
        '%96' => '–',
10800
        '%97' => '—',
10801
        '%98' => '˜',
10802
        '%99' => '™',
10803
        '%9A' => 'š',
10804
        '%9B' => '›',
10805
        '%9C' => 'œ',
10806
        '%9D' => '',
10807
        '%9E' => 'ž',
10808
        '%9F' => 'Ÿ',
10809
        '%A0' => '',
10810
        '%A1' => '¡',
10811
        '%A2' => '¢',
10812
        '%A3' => '£',
10813
        '%A4' => '¤',
10814
        '%A5' => '¥',
10815
        '%A6' => '¦',
10816
        '%A7' => '§',
10817
        '%A8' => '¨',
10818
        '%A9' => '©',
10819
        '%AA' => 'ª',
10820
        '%AB' => '«',
10821
        '%AC' => '¬',
10822
        '%AD' => '',
10823
        '%AE' => '®',
10824
        '%AF' => '¯',
10825
        '%B0' => '°',
10826
        '%B1' => '±',
10827
        '%B2' => '²',
10828
        '%B3' => '³',
10829
        '%B4' => '´',
10830
        '%B5' => 'µ',
10831
        '%B6' => '¶',
10832
        '%B7' => '·',
10833
        '%B8' => '¸',
10834
        '%B9' => '¹',
10835
        '%BA' => 'º',
10836
        '%BB' => '»',
10837
        '%BC' => '¼',
10838
        '%BD' => '½',
10839
        '%BE' => '¾',
10840
        '%BF' => '¿',
10841
        '%C0' => 'À',
10842
        '%C1' => 'Á',
10843
        '%C2' => 'Â',
10844
        '%C3' => 'Ã',
10845
        '%C4' => 'Ä',
10846
        '%C5' => 'Å',
10847
        '%C6' => 'Æ',
10848
        '%C7' => 'Ç',
10849
        '%C8' => 'È',
10850
        '%C9' => 'É',
10851
        '%CA' => 'Ê',
10852
        '%CB' => 'Ë',
10853
        '%CC' => 'Ì',
10854
        '%CD' => 'Í',
10855
        '%CE' => 'Î',
10856
        '%CF' => 'Ï',
10857
        '%D0' => 'Ð',
10858
        '%D1' => 'Ñ',
10859
        '%D2' => 'Ò',
10860
        '%D3' => 'Ó',
10861
        '%D4' => 'Ô',
10862
        '%D5' => 'Õ',
10863
        '%D6' => 'Ö',
10864
        '%D7' => '×',
10865
        '%D8' => 'Ø',
10866
        '%D9' => 'Ù',
10867
        '%DA' => 'Ú',
10868
        '%DB' => 'Û',
10869
        '%DC' => 'Ü',
10870
        '%DD' => 'Ý',
10871
        '%DE' => 'Þ',
10872
        '%DF' => 'ß',
10873
        '%E0' => 'à',
10874
        '%E1' => 'á',
10875
        '%E2' => 'â',
10876
        '%E3' => 'ã',
10877
        '%E4' => 'ä',
10878
        '%E5' => 'å',
10879
        '%E6' => 'æ',
10880
        '%E7' => 'ç',
10881
        '%E8' => 'è',
10882
        '%E9' => 'é',
10883
        '%EA' => 'ê',
10884
        '%EB' => 'ë',
10885
        '%EC' => 'ì',
10886
        '%ED' => 'í',
10887
        '%EE' => 'î',
10888
        '%EF' => 'ï',
10889
        '%F0' => 'ð',
10890
        '%F1' => 'ñ',
10891
        '%F2' => 'ò',
10892
        '%F3' => 'ó',
10893
        '%F4' => 'ô',
10894
        '%F5' => 'õ',
10895
        '%F6' => 'ö',
10896
        '%F7' => '÷',
10897
        '%F8' => 'ø',
10898
        '%F9' => 'ù',
10899
        '%FA' => 'ú',
10900
        '%FB' => 'û',
10901
        '%FC' => 'ü',
10902
        '%FD' => 'ý',
10903
        '%FE' => 'þ',
10904
        '%FF' => 'ÿ',
10905
    ];
10906
  }
10907
10908
  /**
10909
   * Decodes an UTF-8 string to ISO-8859-1.
10910
   *
10911
   * @param string $str <p>The input string.</p>
10912
   * @param bool   $keepUtf8Chars
10913
   *
10914
   * @return string
10915
   */
10916 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10917
  {
10918 14
    if ('' === $str) {
10919 5
      return '';
10920
    }
10921
10922 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10923 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10924
10925 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10926
10927 1
      if (self::$WIN1252_TO_UTF8 === null) {
10928
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10929
      }
10930
10931 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10931
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10932 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10932
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10933
    }
10934
10935
    /** @noinspection PhpInternalEntityUsedInspection */
10936 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10937
10938 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10939
      self::checkForSupport();
10940
    }
10941
10942
    // save for later comparision
10943 14
    $str_backup = $str;
10944 14
    $len = self::strlen_in_byte($str);
10945
10946 14
    if (self::$ORD === null) {
10947
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10948
    }
10949
10950 14
    if (self::$CHR === null) {
10951
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10952
    }
10953
10954 14
    $noCharFound = '?';
10955
    /** @noinspection ForeachInvariantsInspection */
10956 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10957 14
      switch ($str[$i] & "\xF0") {
10958 14
        case "\xC0":
10959 12
        case "\xD0":
10960 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10961 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10962 14
          break;
10963
10964
        /** @noinspection PhpMissingBreakStatementInspection */
10965 12
        case "\xF0":
10966
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10967 12
        case "\xE0":
10968 10
          $str[$j] = $noCharFound;
10969 10
          $i += 2;
10970 10
          break;
10971
10972
        default:
10973 12
          $str[$j] = $str[$i];
10974
      }
10975
    }
10976
10977 14
    $return = self::substr_in_byte($str, 0, $j);
10978 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10979
      $return = '';
10980
    }
10981
10982
    if (
10983 14
        $keepUtf8Chars === true
10984
        &&
10985 14
        self::strlen($return) >= self::strlen($str_backup)
10986
    ) {
10987 2
      return $str_backup;
10988
    }
10989
10990 14
    return $return;
10991
  }
10992
10993
  /**
10994
   * Encodes an ISO-8859-1 string to UTF-8.
10995
   *
10996
   * @param string $str <p>The input string.</p>
10997
   *
10998
   * @return string
10999
   */
11000 14
  public static function utf8_encode(string $str): string
11001
  {
11002 14
    if ('' === $str) {
11003 13
      return '';
11004
    }
11005
11006 14
    $str = \utf8_encode($str);
11007
11008
    // the polyfill maybe return false
11009
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11010 14
    if ($str === false) {
11011
      return '';
11012
    }
11013
11014 14
    if (false === \strpos($str, "\xC2")) {
11015 6
      return $str;
11016
    }
11017
11018 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
11019 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
11020
11021 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
11022
11023 1
      if (self::$WIN1252_TO_UTF8 === null) {
11024
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
11025
      }
11026
11027 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11027
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11028 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11028
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11029
    }
11030
11031 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
11032
  }
11033
11034
  /**
11035
   * fix -> utf8-win1252 chars
11036
   *
11037
   * @param string $str <p>The input string.</p>
11038
   *
11039
   * @return string
11040
   *
11041
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
11042
   */
11043 2
  public static function utf8_fix_win1252_chars(string $str): string
11044
  {
11045 2
    return self::fix_simple_utf8($str);
11046
  }
11047
11048
  /**
11049
   * Returns an array with all utf8 whitespace characters.
11050
   *
11051
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11052
   *
11053
   * @author: Derek E. [email protected]
11054
   *
11055
   * @return string[]
11056
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
11057
   *                 as defined in above URL.
11058
   */
11059 2
  public static function whitespace_table(): array
11060
  {
11061 2
    return self::$WHITESPACE_TABLE;
11062
  }
11063
11064
  /**
11065
   * Limit the number of words in a string.
11066
   *
11067
   * @param string $str      <p>The input string.</p>
11068
   * @param int    $limit    <p>The limit of words as integer.</p>
11069
   * @param string $strAddOn <p>Replacement for the striped string.</p>
11070
   *
11071
   * @return string
11072
   */
11073 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
11074
  {
11075 2
    if ('' === $str) {
11076 2
      return '';
11077
    }
11078
11079 2
    if ($limit < 1) {
11080 2
      return '';
11081
    }
11082
11083 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
11084
11085
    if (
11086 2
        !isset($matches[0])
11087
        ||
11088 2
        self::strlen($str) === self::strlen($matches[0])
11089
    ) {
11090 2
      return $str;
11091
    }
11092
11093 2
    return self::rtrim($matches[0]) . $strAddOn;
11094
  }
11095
11096
  /**
11097
   * Wraps a string to a given number of characters
11098
   *
11099
   * @link  http://php.net/manual/en/function.wordwrap.php
11100
   *
11101
   * @param string $str   <p>The input string.</p>
11102
   * @param int    $width [optional] <p>The column width.</p>
11103
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11104
   * @param bool   $cut   [optional] <p>
11105
   *                      If the cut is set to true, the string is
11106
   *                      always wrapped at or before the specified width. So if you have
11107
   *                      a word that is larger than the given width, it is broken apart.
11108
   *                      </p>
11109
   *
11110
   * @return string The given string wrapped at the specified column.
11111
   */
11112 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
11113
  {
11114 10
    if ('' === $str || '' === $break) {
11115 3
      return '';
11116
    }
11117
11118 8
    $w = '';
11119 8
    $strSplit = \explode($break, $str);
11120 8
    if ($strSplit === false) {
11121
      $count = 0;
11122
    } else {
11123 8
      $count = \count($strSplit);
11124
    }
11125
11126 8
    $chars = [];
11127
    /** @noinspection ForeachInvariantsInspection */
11128 8
    for ($i = 0; $i < $count; ++$i) {
11129
11130 8
      if ($i) {
11131 1
        $chars[] = $break;
11132 1
        $w .= '#';
11133
      }
11134
11135 8
      $c = $strSplit[$i];
11136 8
      unset($strSplit[$i]);
11137
11138 8
      if ($c !== null) {
11139 8
        foreach (self::split($c) as $c) {
11140 8
          $chars[] = $c;
11141 8
          $w .= ' ' === $c ? ' ' : '?';
11142
        }
11143
      }
11144
    }
11145
11146 8
    $strReturn = '';
11147 8
    $j = 0;
11148 8
    $b = $i = -1;
11149 8
    $w = \wordwrap($w, $width, '#', $cut);
11150
11151 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
11152 6
      for (++$i; $i < $b; ++$i) {
11153 6
        $strReturn .= $chars[$j];
11154 6
        unset($chars[$j++]);
11155
      }
11156
11157 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
11158 3
        unset($chars[$j++]);
11159
      }
11160
11161 6
      $strReturn .= $break;
11162
    }
11163
11164 8
    return $strReturn . \implode('', $chars);
11165
  }
11166
11167
  /**
11168
   * Line-Wrap the string after $limit, but also after the next word.
11169
   *
11170
   * @param string $str
11171
   * @param int    $limit
11172
   *
11173
   * @return string
11174
   */
11175 1
  public static function wordwrap_per_line(string $str, int $limit): string
11176
  {
11177 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
11178
11179 1
    $string = '';
11180 1
    foreach ($strings as $value) {
11181 1
      if ($value === false) {
11182
        continue;
11183
      }
11184
11185 1
      $string .= wordwrap($value, $limit);
11186 1
      $string .= "\n";
11187
    }
11188
11189 1
    return $string;
11190
  }
11191
11192
  /**
11193
   * Returns an array of Unicode White Space characters.
11194
   *
11195
   * @return string[] An array with numeric code point as key and White Space Character as value.
11196
   */
11197 2
  public static function ws(): array
11198
  {
11199 2
    return self::$WHITESPACE;
11200
  }
11201
11202
11203
}
11204