Passed
Push — master ( 4704e6...a9ce73 )
by Lars
03:42
created

UTF8   F

Complexity

Total Complexity 1568

Size/Duplication

Total Lines 11299
Duplicated Lines 0 %

Test Coverage

Coverage 84.05%

Importance

Changes 0
Metric Value
eloc 3841
dl 0
loc 11299
ccs 2820
cts 3355
cp 0.8405
rs 0.8
c 0
b 0
f 0
wmc 1568

290 Methods

Rating   Name   Duplication   Size   Complexity  
A chr_to_decimal() 0 30 6
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 22 5
A count_chars() 0 3 1
A ctype_loaded() 0 3 1
F chr() 0 88 21
A apply_padding() 0 19 5
A chr_to_int() 0 3 1
A decode_mimeheader() 0 19 6
A chunk_split() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 3 1
A decimal_to_chr() 0 3 1
A between() 0 24 5
A codepoints() 0 29 4
A chr_map() 0 5 1
A cleanup() 0 28 2
A char_at() 0 3 1
A chars() 0 3 1
A checkForSupport() 0 42 4
A chr_to_hex() 0 11 3
A collapse_whitespace() 0 4 1
A access() 0 11 3
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A chr_size_list() 0 22 4
A file_has_bom() 0 8 2
A filter_input() 0 9 2
C filter() 0 53 13
A getData() 0 9 2
A fix_utf8() 0 20 4
A first_char() 0 12 3
D getCharDirection() 0 114 119
A filter_var_array() 0 9 2
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A filter_var() 0 9 2
A fix_simple_utf8() 0 20 4
A fixStrCaseHelper() 0 34 5
A filter_input_array() 0 9 2
A getSupportInfo() 0 15 4
A encode_mimeheader() 0 30 5
D extract_text() 0 109 20
F encode() 0 132 39
B file_get_contents() 0 58 10
A max() 0 14 3
A parse_str() 0 20 5
A str_contains() 0 16 6
A get_unique_string() 0 15 2
A is_bom() 0 9 3
A is_hexadecimal() 0 3 1
A str_isubstr_last() 0 16 4
A has_uppercase() 0 3 1
A remove_left() 0 13 2
A str_offset_exists() 0 10 2
A str_iends_with() 0 11 4
A max_chr_width() 0 8 2
A isBinary() 0 3 1
A ltrim() 0 15 4
D is_utf8() 0 148 32
A remove_html() 0 3 1
A str_longest_common_suffix() 0 16 3
A lcword() 0 9 1
A str_pad_both() 0 5 1
A str_index_last() 0 7 1
A mbstring_loaded() 0 9 3
A str_limit() 0 15 4
A html_escape() 0 6 1
C normalize_encoding() 0 132 14
B rxClass() 0 39 8
C get_file_type() 0 87 12
A str_ensure_right() 0 7 2
C is_utf16() 0 63 16
A isHtml() 0 3 1
A normalize_whitespace() 0 31 6
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 12 2
A html_decode() 0 3 1
A isUtf32() 0 3 1
A str_index_first() 0 7 1
A rtrim() 0 15 4
B str_longest_common_substring() 0 40 8
A regex_replace() 0 18 3
A str_iindex_first() 0 7 1
A str_isubstr_before_first_separator() 0 16 4
A replace_all() 0 7 2
A removeBOM() 0 3 1
A str_matches_pattern() 0 7 2
A is_alpha() 0 3 1
A get_random_string() 0 25 4
A str_pad_right() 0 3 1
A isUtf8() 0 3 1
A is_serialized() 0 11 3
A is_uppercase() 0 3 1
B str_pad() 0 56 11
A str_ireplace() 0 18 3
B str_contains_all() 0 22 7
A is_ascii() 0 7 2
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 20 4
B range() 0 46 11
A rawurldecode() 0 28 4
B str_capitalize_name_helper() 0 78 10
A normalize_msword() 0 20 4
C str_detect_encoding() 0 116 14
A spaces_to_tabs() 0 3 1
A str_istarts_with() 0 11 4
A is_blank() 0 3 1
A str_replace() 0 3 1
A htmlspecialchars() 0 7 3
A replace() 0 7 2
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 16 4
A lowerCaseFirst() 0 9 1
A str_ends_with_any() 0 13 4
A remove_right() 0 12 2
A remove_html_breaks() 0 3 1
A showSupport() 0 11 3
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 19 6
A str_iindex_last() 0 7 1
C is_binary() 0 48 12
A intlChar_loaded() 0 3 1
A lcfirst() 0 16 2
A str_ends_with() 0 7 3
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A reduce_string_array() 0 26 6
A str_longest_common_prefix() 0 16 3
A str_pad_left() 0 3 1
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A str_iends_with_any() 0 13 4
A str_isubstr_after_first_separator() 0 20 4
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 7 2
A str_offset_get() 0 14 4
A hasBom() 0 3 1
A str_capitalize_name() 0 8 1
A str_limit_after_word() 0 30 6
A iconv_loaded() 0 3 2
B lcwords() 0 45 8
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A is_empty() 0 3 1
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 7 2
C is_utf32() 0 63 16
F ord() 0 64 17
A is_alphanumeric() 0 3 1
A json_decode() 0 16 3
A is_json() 0 24 6
A int_to_hex() 0 7 2
A has_lowercase() 0 3 1
A json_encode() 0 16 3
A str_isubstr_first() 0 21 4
A is_base64() 0 13 4
A str_last_char() 0 9 3
A hex_to_int() 0 14 3
A htmlentities() 0 19 3
A hex_to_chr() 0 3 1
A isJson() 0 3 1
A str_insert() 0 12 2
B replace_diamond_question_mark() 0 45 7
A str_delimit() 0 16 1
A min() 0 14 3
A str_istarts_with_any() 0 17 5
A str_contains_any() 0 13 4
A remove_duplicates() 0 14 4
A str_substr_after_first_separator() 0 20 4
A str_camelize() 0 26 2
B str_to_lines() 0 27 7
B substr_in_byte() 0 26 7
A strnatcasecmp() 0 5 1
A substr_left() 0 19 5
F strlen() 0 99 21
A str_replace_beginning() 0 21 6
C stripos() 0 57 15
F strrchr() 0 92 20
A to_filename() 0 24 2
F utf8_decode() 0 75 16
C wordwrap() 0 53 13
A ucfirst() 0 22 3
A str_substr_last() 0 16 4
A toUTF8() 0 3 1
A string() 0 10 1
B str_titleize_for_humans() 0 127 5
A str_starts_with() 0 11 4
C substr_count_in_byte() 0 51 15
A strchr() 0 3 1
A strichr() 0 3 1
A strlen_in_byte() 0 16 4
A str_ireplace_ending() 0 21 6
A titlecase() 0 7 3
B strtolower() 0 48 10
A urldecode() 0 28 4
A strrev() 0 13 3
F substr_replace() 0 107 26
A strstr_in_byte() 0 16 5
A str_titleize() 0 38 5
A ws() 0 3 1
A str_replace_first() 0 8 2
A toLatin1() 0 3 1
B ucwords() 0 56 11
A to_boolean() 0 33 4
D stristr() 0 63 18
A strncasecmp() 0 6 1
B strwidth() 0 40 8
A trim() 0 15 4
A str_upper_camelize() 0 3 1
A substr_compare() 0 25 6
F substr_count() 0 74 19
A strnatcmp() 0 3 2
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 9 3
B strtr() 0 30 7
B strspn() 0 15 7
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
A utf8_encode() 0 32 6
A substr_iright() 0 19 5
A to_iso8859() 0 16 4
A words_limit() 0 21 5
A strip_tags() 0 11 3
A str_truncate_safe() 0 29 6
A substr_right() 0 19 5
A str_split() 0 3 1
A strrpos_in_byte() 0 16 5
F strrpos() 0 116 27
A str_replace_last() 0 8 2
A str_substr_before_last_separator() 0 20 4
A strtocasefold() 0 20 3
A tabs_to_spaces() 0 3 1
A str_truncate() 0 20 3
F strripos() 0 91 20
A strpos_in_byte() 0 16 5
F to_ascii() 0 150 28
A mbstring_overloaded() 0 11 2
A str_substr_first() 0 21 4
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 17 6
A str_substr_after_last_separator() 0 20 4
A str_snakeize() 0 38 2
A str_sort() 0 15 3
F to_utf8() 0 93 32
A ucword() 0 3 1
A str_underscored() 0 3 1
A strip_whitespace() 0 7 2
A toAscii() 0 3 1
A str_upper_first() 0 3 1
A swapCase() 0 17 5
A substr_ileft() 0 19 5
B html_encode() 0 39 7
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
A strtonatfold() 0 4 1
B strcspn() 0 23 7
B str_split_pattern() 0 31 7
F strstr() 0 87 19
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 20 4
F substr() 0 140 31
A wordwrap_per_line() 0 15 3
A str_surround() 0 3 1
A strncmp() 0 6 1
A utf8_fix_win1252_chars() 0 3 1
A to_utf8_convert_helper() 0 27 5
B strtoupper() 0 48 10
D html_entity_decode() 0 87 18
A str_starts_with_any() 0 17 5
B strrichr() 0 49 11
D split() 0 122 27
A str_slice() 0 13 5
F strpos() 0 134 31
A str_shuffle() 0 12 2
A strcmp() 0 6 2
A str_word_count() 0 29 5
A strripos_in_byte() 0 16 5
A str_to_binary() 0 5 1
B symfony_polyfill_used() 0 16 7
B str_to_words() 0 33 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 17
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 17
    static $CHAR_CACHE = [];
516
517 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 17
        $encoding !== 'UTF-8'
527
        &&
528 17
        $encoding !== 'ISO-8859-1'
529
        &&
530 17
        $encoding !== 'WINDOWS-1252'
531
        &&
532 17
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 17
    $cacheKey = $code_point . $encoding;
538 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 16
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 10
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 10
      $chr = self::$CHR[$code_point];
552
553 10
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 10
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642
      return \array_map(
643
          function ($data) {
644
            return UTF8::strlen_in_byte($data);
645
          },
646
          $strSplit
647
      );
648
    }
649
650 4
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 113
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 113
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 113
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 113
    if ($replace_diamond_question_mark === true) {
788 62
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 113
    if ($remove_invisible_characters === true) {
792 113
      $str = self::remove_invisible_characters($str);
793
    }
794
795 113
    if ($normalize_whitespace === true) {
796 66
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 113
    if ($normalize_msword === true) {
800 34
      $str = self::normalize_msword($str);
801
    }
802
803 113
    if ($remove_bom === true) {
804 64
      $str = self::remove_bom($str);
805
    }
806
807 113
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 19
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 19
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string $toEncoding                  <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string $str                         <p>The input string</p>
1000
   * @param bool   $autodetectFromEncoding      [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string $fromEncoding                [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1004
   *                                            A empty string will trigger the autodetect anyway.</p>
1005
   *
1006
   * @return string
1007
   */
1008 28
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
1009
  {
1010 28
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 28
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 6
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 28
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019 2
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 28
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 28
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 28
    if ($toEncoding === 'JSON') {
1031 1
      return self::json_encode($str);
1032
    }
1033 28
    if ($fromEncoding === 'JSON') {
1034 1
      $str = self::json_decode($str);
1035 1
      $fromEncoding = '';
1036
    }
1037
1038 28
    if ($toEncoding === 'BASE64') {
1039 2
      return base64_encode($str);
1040
    }
1041 28
    if ($fromEncoding === 'BASE64') {
1042 2
      $str = base64_decode($str);
1043 2
      $fromEncoding = '';
1044
    }
1045
1046 28
    if ($toEncoding === 'HTML-ENTITIES') {
1047 2
      return self::html_encode($str, true, 'UTF-8');
1048
    }
1049 28
    if ($fromEncoding === 'HTML-ENTITIES') {
1050 2
      $str = self::html_decode($str, ENT_COMPAT, 'UTF-8');
1051 2
      $fromEncoding = '';
1052
    }
1053
1054 28
    $fromEncodingDetected = false;
1055
    if (
1056 28
        $autodetectFromEncoding === true
1057
        ||
1058 28
        !$fromEncoding
1059
    ) {
1060 28
      $fromEncodingDetected = self::str_detect_encoding($str);
1061
    }
1062
1063
    // DEBUG
1064
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1065
1066 28
    if ($fromEncodingDetected !== false) {
1067 24
      $fromEncoding = $fromEncodingDetected;
1068 6
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1069
      // fallback for the "autodetect"-mode
1070 6
      return self::to_utf8($str);
1071
    }
1072
1073
    if (
1074 24
        !$fromEncoding
1075
        ||
1076 24
        $fromEncoding === $toEncoding
1077
    ) {
1078 15
      return $str;
1079
    }
1080
1081
    if (
1082 18
        $toEncoding === 'UTF-8'
1083
        &&
1084
        (
1085 17
            $fromEncoding === 'WINDOWS-1252'
1086
            ||
1087 18
            $fromEncoding === 'ISO-8859-1'
1088
        )
1089
    ) {
1090 14
      return self::to_utf8($str);
1091
    }
1092
1093
    if (
1094 10
        $toEncoding === 'ISO-8859-1'
1095
        &&
1096
        (
1097 5
            $fromEncoding === 'WINDOWS-1252'
1098
            ||
1099 10
            $fromEncoding === 'UTF-8'
1100
        )
1101
    ) {
1102 5
      return self::to_iso8859($str);
1103
    }
1104
1105
    if (
1106 9
        $toEncoding !== 'UTF-8'
1107
        &&
1108 9
        $toEncoding !== 'ISO-8859-1'
1109
        &&
1110 9
        $toEncoding !== 'WINDOWS-1252'
1111
        &&
1112 9
        self::$SUPPORT['mbstring'] === false
1113
    ) {
1114
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1115
    }
1116
1117 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1118
      self::checkForSupport();
1119
    }
1120
1121 9
    if (self::$SUPPORT['mbstring'] === true) {
1122
      // info: do not use the symfony polyfill here
1123 9
      $strEncoded = \mb_convert_encoding(
1124 9
          $str,
1125 9
          $toEncoding,
1126 9
          $fromEncoding
1127
      );
1128
1129 9
      if ($strEncoded) {
1130 9
        return $strEncoded;
1131
      }
1132
    }
1133
1134
    $return = \iconv($fromEncoding, $toEncoding, $str);
1135
    if ($return !== false) {
1136
      return $return;
1137
    }
1138
1139
    return $str;
1140
  }
1141
1142
  /**
1143
   * @param string $str
1144
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1145
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1146
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1147
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1148
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1149
   *
1150
   * @return string|false
1151
   *                      An encoded MIME field on success,
1152
   *                      or false if an error occurs during the encoding.
1153
   */
1154
  public static function encode_mimeheader(
1155
      $str,
1156
      $fromCharset = 'UTF-8',
1157
      $toCharset = 'UTF-8',
1158
      $transferEncoding = 'Q',
1159
      $linefeed = "\r\n",
1160
      $indent = 76
1161
  )
1162
  {
1163
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1164
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1165
    }
1166
1167
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1168
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1169
    }
1170
1171
    $output = \iconv_mime_encode(
1172
        '',
1173
        $str,
1174
        [
1175
            'scheme'           => $transferEncoding,
1176
            'line-length'      => $indent,
1177
            'input-charset'    => $fromCharset,
1178
            'output-charset'   => $toCharset,
1179
            'line-break-chars' => $linefeed,
1180
        ]
1181
    );
1182
1183
    return $output;
1184
  }
1185
1186
  /**
1187
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1188
   *
1189
   * @param string   $str                    <p>The input string.</p>
1190
   * @param string   $search                 <p>The searched string.</p>
1191
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1192
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1193
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1194
   *
1195
   * @return string
1196
   */
1197 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1198
  {
1199 1
    if ('' === $str) {
1200 1
      return '';
1201
    }
1202
1203 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1204
1205 1
    if ($length === null) {
1206 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1207
    }
1208
1209 1
    if (empty($search)) {
1210
1211 1
      $stringLength = self::strlen($str, $encoding);
1212
1213 1
      if ($length > 0) {
1214 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1215
      } else {
1216 1
        $end = 0;
1217
      }
1218
1219 1
      $pos = (int)\min(
1220 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1220
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1221 1
          self::strpos($str, '.', $end, $encoding)
1222
      );
1223
1224 1
      if ($pos) {
1225 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1226 1
        if ($strSub === false) {
1227
          return '';
1228
        }
1229
1230 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1231
      }
1232
1233
      return $str;
1234
    }
1235
1236 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1237 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1238
1239 1
    $pos_start = 0;
1240 1
    if ($halfSide > 0) {
1241 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1242 1
      if ($halfText !== false) {
1243 1
        $pos_start = (int)\max(
1244 1
            self::strrpos($halfText, ' ', 0, $encoding),
1245 1
            self::strrpos($halfText, '.', 0, $encoding)
1246
        );
1247
      }
1248
    }
1249
1250 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type false|integer is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1251 1
      $l = $pos_start + $length - 1;
1252 1
      $realLength = self::strlen($str, $encoding);
1253
1254 1
      if ($l > $realLength) {
1255
        $l = $realLength;
1256
      }
1257
1258 1
      $pos_end = (int)\min(
1259 1
              self::strpos($str, ' ', $l, $encoding),
1260 1
              self::strpos($str, '.', $l, $encoding)
1261 1
          ) - $pos_start;
1262
1263 1
      if (!$pos_end || $pos_end <= 0) {
1264 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1264
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1265 1
        if ($strSub !== false) {
1266 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1267
        } else {
1268 1
          $extract = '';
1269
        }
1270
      } else {
1271 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1272 1
        if ($strSub !== false) {
1273 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1274
        } else {
1275 1
          $extract = '';
1276
        }
1277
      }
1278
1279
    } else {
1280
1281 1
      $l = $length - 1;
1282 1
      $trueLength = self::strlen($str, $encoding);
1283
1284 1
      if ($l > $trueLength) {
1285
        $l = $trueLength;
1286
      }
1287
1288 1
      $pos_end = \min(
1289 1
          self::strpos($str, ' ', $l, $encoding),
1290 1
          self::strpos($str, '.', $l, $encoding)
1291
      );
1292
1293 1
      if ($pos_end) {
1294 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1295 1
        if ($strSub !== false) {
1296 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1297
        } else {
1298 1
          $extract = '';
1299
        }
1300
      } else {
1301 1
        $extract = $str;
1302
      }
1303
    }
1304
1305 1
    return $extract;
1306
  }
1307
1308
  /**
1309
   * Reads entire file into a string.
1310
   *
1311
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1312
   *
1313
   * @link http://php.net/manual/en/function.file-get-contents.php
1314
   *
1315
   * @param string        $filename             <p>
1316
   *                                            Name of the file to read.
1317
   *                                            </p>
1318
   * @param bool          $use_include_path     [optional] <p>
1319
   *                                            Prior to PHP 5, this parameter is called
1320
   *                                            use_include_path and is a bool.
1321
   *                                            As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1322
   *                                            to trigger include path
1323
   *                                            search.
1324
   *                                            </p>
1325
   * @param resource|null $context              [optional] <p>
1326
   *                                            A valid context resource created with
1327
   *                                            stream_context_create. If you don't need to use a
1328
   *                                            custom context, you can skip this parameter by &null;.
1329
   *                                            </p>
1330
   * @param int|null      $offset               [optional] <p>
1331
   *                                            The offset where the reading starts.
1332
   *                                            </p>
1333
   * @param int|null      $maxLength            [optional] <p>
1334
   *                                            Maximum length of data read. The default is to read until end
1335
   *                                            of file is reached.
1336
   *                                            </p>
1337
   * @param int           $timeout              <p>The time in seconds for the timeout.</p>
1338
   *
1339
   * @param bool          $convertToUtf8        <strong>WARNING!!!</strong> <p>Maybe you can't use this option for some
1340
   *                                            files, because they used non default utf-8 chars. Binary files like
1341
   *                                            images or pdf will not be converted.</p>
1342
   * @param string        $fromEncoding         [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1343
   *                                            A empty string will trigger the autodetect anyway.</p>
1344
   *
1345
   * @return string|false The function returns the read data or false on failure.
1346
   */
1347 12
  public static function file_get_contents(
1348
      string $filename,
1349
      bool $use_include_path = false,
1350
      $context = null,
1351
      int $offset = null,
1352
      int $maxLength = null,
1353
      int $timeout = 10,
1354
      bool $convertToUtf8 = true,
1355
      string $fromEncoding = ''
1356
  )
1357
  {
1358
    // init
1359 12
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1360
1361 12
    if ($timeout && $context === null) {
1362 9
      $context = \stream_context_create(
1363
          [
1364
              'http' =>
1365
                  [
1366 9
                      'timeout' => $timeout,
1367
                  ],
1368
          ]
1369
      );
1370
    }
1371
1372 12
    if ($offset === null) {
1373 12
      $offset = 0;
1374
    }
1375
1376 12
    if (\is_int($maxLength) === true) {
1377 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1378
    } else {
1379 12
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1380
    }
1381
1382
    // return false on error
1383 12
    if ($data === false) {
1384
      return false;
1385
    }
1386
1387 12
    if ($convertToUtf8 === true) {
1388
      if (
1389 12
          self::is_binary($data, true) === true
1390
          &&
1391 12
          self::is_utf16($data, false) === false
1392
          &&
1393 12
          self::is_utf32($data, false) === false
1394 7
      ) {
1395
        // do nothing, it's binary and not UTF16 or UTF32
1396
      } else {
1397
1398 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1399 9
        $data = self::cleanup($data);
1400
1401
      }
1402
    }
1403
1404 12
    return $data;
1405
  }
1406
1407
  /**
1408
   * Checks if a file starts with BOM (Byte Order Mark) character.
1409
   *
1410
   * @param string $file_path <p>Path to a valid file.</p>
1411
   *
1412
   * @throws \RuntimeException if file_get_contents() returned false
1413
   *
1414
   * @return bool
1415
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1416
   */
1417 2
  public static function file_has_bom(string $file_path): bool
1418
  {
1419 2
    $file_content = \file_get_contents($file_path);
1420 2
    if ($file_content === false) {
1421
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1422
    }
1423
1424 2
    return self::string_has_bom($file_content);
1425
  }
1426
1427
  /**
1428
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1429
   *
1430
   * @param mixed  $var
1431
   * @param int    $normalization_form
1432
   * @param string $leading_combining
1433
   *
1434
   * @return mixed
1435
   */
1436 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1437
  {
1438 43
    switch (\gettype($var)) {
1439 43
      case 'array':
1440 6
        foreach ($var as $k => $v) {
1441
          /** @noinspection AlterInForeachInspection */
1442 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1443
        }
1444 6
        break;
1445 43
      case 'object':
1446 4
        foreach ($var as $k => $v) {
1447 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1448
        }
1449 4
        break;
1450 43
      case 'string':
1451
1452 43
        if (false !== \strpos($var, "\r")) {
1453
          // Workaround https://bugs.php.net/65732
1454 3
          $var = self::normalize_line_ending($var);
1455
        }
1456
1457 43
        if (self::is_ascii($var) === false) {
1458
          /** @noinspection PhpUndefinedClassInspection */
1459 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1460 21
            $n = '-';
1461
          } else {
1462
            /** @noinspection PhpUndefinedClassInspection */
1463 13
            $n = \Normalizer::normalize($var, $normalization_form);
1464
1465 13
            if (isset($n[0])) {
1466 7
              $var = $n;
1467
            } else {
1468 9
              $var = self::encode('UTF-8', $var, true);
1469
            }
1470
          }
1471
1472
          if (
1473 26
              $var[0] >= "\x80"
1474
              &&
1475 26
              isset($n[0], $leading_combining[0])
1476
              &&
1477 26
              \preg_match('/^\p{Mn}/u', $var)
1478
          ) {
1479
            // Prevent leading combining chars
1480
            // for NFC-safe concatenations.
1481 3
            $var = $leading_combining . $var;
1482
          }
1483
        }
1484
1485 43
        break;
1486
    }
1487
1488 43
    return $var;
1489
  }
1490
1491
  /**
1492
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1493
   *
1494
   * Gets a specific external variable by name and optionally filters it
1495
   *
1496
   * @link  http://php.net/manual/en/function.filter-input.php
1497
   *
1498
   * @param int    $type          <p>
1499
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1500
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1501
   *                              <b>INPUT_ENV</b>.
1502
   *                              </p>
1503
   * @param string $variable_name <p>
1504
   *                              Name of a variable to get.
1505
   *                              </p>
1506
   * @param int    $filter        [optional] <p>
1507
   *                              The ID of the filter to apply. The
1508
   *                              manual page lists the available filters.
1509
   *                              </p>
1510
   * @param mixed  $options       [optional] <p>
1511
   *                              Associative array of options or bitwise disjunction of flags. If filter
1512
   *                              accepts options, flags can be provided in "flags" field of array.
1513
   *                              </p>
1514
   *
1515
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1516
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1517
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1518
   */
1519
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1520
  {
1521
    if (4 > \func_num_args()) {
1522
      $var = \filter_input($type, $variable_name, $filter);
1523
    } else {
1524
      $var = \filter_input($type, $variable_name, $filter, $options);
1525
    }
1526
1527
    return self::filter($var);
1528
  }
1529
1530
  /**
1531
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1532
   *
1533
   * Gets external variables and optionally filters them
1534
   *
1535
   * @link  http://php.net/manual/en/function.filter-input-array.php
1536
   *
1537
   * @param int   $type       <p>
1538
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1539
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1540
   *                          <b>INPUT_ENV</b>.
1541
   *                          </p>
1542
   * @param mixed $definition [optional] <p>
1543
   *                          An array defining the arguments. A valid key is a string
1544
   *                          containing a variable name and a valid value is either a filter type, or an array
1545
   *                          optionally specifying the filter, flags and options. If the value is an
1546
   *                          array, valid keys are filter which specifies the
1547
   *                          filter type,
1548
   *                          flags which specifies any flags that apply to the
1549
   *                          filter, and options which specifies any options that
1550
   *                          apply to the filter. See the example below for a better understanding.
1551
   *                          </p>
1552
   *                          <p>
1553
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1554
   *                          input array are filtered by this filter.
1555
   *                          </p>
1556
   * @param bool  $add_empty  [optional] <p>
1557
   *                          Add missing keys as <b>NULL</b> to the return value.
1558
   *                          </p>
1559
   *
1560
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1561
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1562
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1563
   *               set and <b>NULL</b> if the filter fails.
1564
   */
1565
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1566
  {
1567
    if (2 > \func_num_args()) {
1568
      $a = \filter_input_array($type);
1569
    } else {
1570
      $a = \filter_input_array($type, $definition, $add_empty);
1571
    }
1572
1573
    return self::filter($a);
1574
  }
1575
1576
  /**
1577
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1578
   *
1579
   * Filters a variable with a specified filter
1580
   *
1581
   * @link  http://php.net/manual/en/function.filter-var.php
1582
   *
1583
   * @param mixed $variable <p>
1584
   *                        Value to filter.
1585
   *                        </p>
1586
   * @param int   $filter   [optional] <p>
1587
   *                        The ID of the filter to apply. The
1588
   *                        manual page lists the available filters.
1589
   *                        </p>
1590
   * @param mixed $options  [optional] <p>
1591
   *                        Associative array of options or bitwise disjunction of flags. If filter
1592
   *                        accepts options, flags can be provided in "flags" field of array. For
1593
   *                        the "callback" filter, callable type should be passed. The
1594
   *                        callback must accept one argument, the value to be filtered, and return
1595
   *                        the value after filtering/sanitizing it.
1596
   *                        </p>
1597
   *                        <p>
1598
   *                        <code>
1599
   *                        // for filters that accept options, use this format
1600
   *                        $options = array(
1601
   *                        'options' => array(
1602
   *                        'default' => 3, // value to return if the filter fails
1603
   *                        // other options here
1604
   *                        'min_range' => 0
1605
   *                        ),
1606
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1607
   *                        );
1608
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1609
   *                        // for filter that only accept flags, you can pass them directly
1610
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1611
   *                        // for filter that only accept flags, you can also pass as an array
1612
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1613
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1614
   *                        // callback validate filter
1615
   *                        function foo($value)
1616
   *                        {
1617
   *                        // Expected format: Surname, GivenNames
1618
   *                        if (strpos($value, ", ") === false) return false;
1619
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1620
   *                        $empty = (empty($surname) || empty($givennames));
1621
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1622
   *                        if ($empty || $notstrings) {
1623
   *                        return false;
1624
   *                        } else {
1625
   *                        return $value;
1626
   *                        }
1627
   *                        }
1628
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1629
   *                        </code>
1630
   *                        </p>
1631
   *
1632
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1633
   */
1634 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1635
  {
1636 2
    if (3 > \func_num_args()) {
1637 2
      $variable = \filter_var($variable, $filter);
1638
    } else {
1639 2
      $variable = \filter_var($variable, $filter, $options);
1640
    }
1641
1642 2
    return self::filter($variable);
1643
  }
1644
1645
  /**
1646
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1647
   *
1648
   * Gets multiple variables and optionally filters them
1649
   *
1650
   * @link  http://php.net/manual/en/function.filter-var-array.php
1651
   *
1652
   * @param array $data       <p>
1653
   *                          An array with string keys containing the data to filter.
1654
   *                          </p>
1655
   * @param mixed $definition [optional] <p>
1656
   *                          An array defining the arguments. A valid key is a string
1657
   *                          containing a variable name and a valid value is either a
1658
   *                          filter type, or an
1659
   *                          array optionally specifying the filter, flags and options.
1660
   *                          If the value is an array, valid keys are filter
1661
   *                          which specifies the filter type,
1662
   *                          flags which specifies any flags that apply to the
1663
   *                          filter, and options which specifies any options that
1664
   *                          apply to the filter. See the example below for a better understanding.
1665
   *                          </p>
1666
   *                          <p>
1667
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1668
   *                          input array are filtered by this filter.
1669
   *                          </p>
1670
   * @param bool  $add_empty  [optional] <p>
1671
   *                          Add missing keys as <b>NULL</b> to the return value.
1672
   *                          </p>
1673
   *
1674
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1675
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1676
   */
1677 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1678
  {
1679 2
    if (2 > \func_num_args()) {
1680 2
      $a = \filter_var_array($data);
1681
    } else {
1682 2
      $a = \filter_var_array($data, $definition, $add_empty);
1683
    }
1684
1685 2
    return self::filter($a);
1686
  }
1687
1688
  /**
1689
   * Checks whether finfo is available on the server.
1690
   *
1691
   * @return bool
1692
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1693
   */
1694
  public static function finfo_loaded(): bool
1695
  {
1696
    return \class_exists('finfo');
1697
  }
1698
1699
  /**
1700
   * Returns the first $n characters of the string.
1701
   *
1702
   * @param string $str      <p>The input string.</p>
1703
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1704
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1705
   *
1706
   * @return string
1707
   */
1708 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1709
  {
1710 13
    if ($n <= 0) {
1711 4
      return '';
1712
    }
1713
1714 9
    $strSub = self::substr($str, 0, $n, $encoding);
1715 9
    if ($strSub === false) {
1716
      return '';
1717
    }
1718
1719 9
    return $strSub;
1720
  }
1721
1722
  /**
1723
   * Check if the number of unicode characters are not more than the specified integer.
1724
   *
1725
   * @param string $str      The original string to be checked.
1726
   * @param int    $box_size The size in number of chars to be checked against string.
1727
   *
1728
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1729
   */
1730 2
  public static function fits_inside(string $str, int $box_size): bool
1731
  {
1732 2
    return (self::strlen($str) <= $box_size);
1733
  }
1734
1735
  /**
1736
   * @param string $str
1737
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1738
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1739
   *
1740
   * @return string
1741
   */
1742 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1743
  {
1744 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1745 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1746
1747 54
    if ($useLower === true) {
1748 2
      $str = (string)\str_replace(
1749 2
          $upper,
1750 2
          $lower,
1751 2
          $str
1752
      );
1753
    } else {
1754 52
      $str = (string)\str_replace(
1755 52
          $lower,
1756 52
          $upper,
1757 52
          $str
1758
      );
1759
    }
1760
1761 54
    if ($fullCaseFold) {
1762
1763 52
      static $FULL_CASE_FOLD = null;
1764 52
      if ($FULL_CASE_FOLD === null) {
1765 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1766
      }
1767
1768 52
      if ($useLower === true) {
1769 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1770
      } else {
1771 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1772
      }
1773
    }
1774
1775 54
    return $str;
1776
  }
1777
1778
  /**
1779
   * Try to fix simple broken UTF-8 strings.
1780
   *
1781
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1782
   *
1783
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1784
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1785
   * See: http://en.wikipedia.org/wiki/Windows-1252
1786
   *
1787
   * @param string $str <p>The input string</p>
1788
   *
1789
   * @return string
1790
   */
1791 42
  public static function fix_simple_utf8(string $str): string
1792
  {
1793 42
    if ('' === $str) {
1794 4
      return '';
1795
    }
1796
1797 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1798 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1799
1800 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1801
1802 1
      if (self::$BROKEN_UTF8_FIX === null) {
1803 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1804
      }
1805
1806 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1806
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1807 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1807
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1808
    }
1809
1810 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1811
  }
1812
1813
  /**
1814
   * Fix a double (or multiple) encoded UTF8 string.
1815
   *
1816
   * @param string[]|string $str You can use a string or an array of strings.
1817
   *
1818
   * @return string[]|string
1819
   *                          Will return the fixed input-"array" or
1820
   *                          the fixed input-"string".
1821
   */
1822 2
  public static function fix_utf8($str)
1823
  {
1824 2
    if (\is_array($str) === true) {
1825 2
      foreach ($str as $k => $v) {
1826 2
        $str[$k] = self::fix_utf8($v);
1827
      }
1828
1829 2
      return $str;
1830
    }
1831
1832 2
    $str = (string)$str;
1833 2
    $last = '';
1834 2
    while ($last !== $str) {
1835 2
      $last = $str;
1836 2
      $str = self::to_utf8(
1837 2
          self::utf8_decode($str, true)
1838
      );
1839
    }
1840
1841 2
    return $str;
1842
  }
1843
1844
  /**
1845
   * Get character of a specific character.
1846
   *
1847
   * @param string $char
1848
   *
1849
   * @return string 'RTL' or 'LTR'
1850
   */
1851 2
  public static function getCharDirection(string $char): string
1852
  {
1853 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1854
      self::checkForSupport();
1855
    }
1856
1857 2
    if (self::$SUPPORT['intlChar'] === true) {
1858
      /** @noinspection PhpComposerExtensionStubsInspection */
1859 2
      $tmpReturn = \IntlChar::charDirection($char);
1860
1861
      // from "IntlChar"-Class
1862
      $charDirection = [
1863 2
          'RTL' => [1, 13, 14, 15, 21],
1864
          'LTR' => [0, 11, 12, 20],
1865
      ];
1866
1867 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1868
        return 'LTR';
1869
      }
1870
1871 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1872 2
        return 'RTL';
1873
      }
1874
    }
1875
1876 2
    $c = static::chr_to_decimal($char);
1877
1878 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1879 2
      return 'LTR';
1880
    }
1881
1882 2
    if (0x85e >= $c) {
1883
1884 2
      if (0x5be === $c ||
1885 2
          0x5c0 === $c ||
1886 2
          0x5c3 === $c ||
1887 2
          0x5c6 === $c ||
1888 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1889 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1890 2
          0x608 === $c ||
1891 2
          0x60b === $c ||
1892 2
          0x60d === $c ||
1893 2
          0x61b === $c ||
1894 2
          (0x61e <= $c && 0x64a >= $c) ||
1895
          (0x66d <= $c && 0x66f >= $c) ||
1896
          (0x671 <= $c && 0x6d5 >= $c) ||
1897
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1898
          (0x6ee <= $c && 0x6ef >= $c) ||
1899
          (0x6fa <= $c && 0x70d >= $c) ||
1900
          0x710 === $c ||
1901
          (0x712 <= $c && 0x72f >= $c) ||
1902
          (0x74d <= $c && 0x7a5 >= $c) ||
1903
          0x7b1 === $c ||
1904
          (0x7c0 <= $c && 0x7ea >= $c) ||
1905
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1906
          0x7fa === $c ||
1907
          (0x800 <= $c && 0x815 >= $c) ||
1908
          0x81a === $c ||
1909
          0x824 === $c ||
1910
          0x828 === $c ||
1911
          (0x830 <= $c && 0x83e >= $c) ||
1912
          (0x840 <= $c && 0x858 >= $c) ||
1913 2
          0x85e === $c
1914
      ) {
1915 2
        return 'RTL';
1916
      }
1917
1918 2
    } elseif (0x200f === $c) {
1919
1920
      return 'RTL';
1921
1922 2
    } elseif (0xfb1d <= $c) {
1923
1924 2
      if (0xfb1d === $c ||
1925 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1926 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1927 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1928 2
          0xfb3e === $c ||
1929 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1930 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1931 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1932 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1933 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1934 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1935 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1936 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1937 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1938 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1939 2
          0x10808 === $c ||
1940 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1941 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1942 2
          0x1083c === $c ||
1943 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1944 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1945 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1946 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1947 2
          0x1093f === $c ||
1948 2
          0x10a00 === $c ||
1949 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1950 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1951 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1952 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1953 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1954 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1955 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1956 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1957 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1958 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1959
      ) {
1960 2
        return 'RTL';
1961
      }
1962
    }
1963
1964 2
    return 'LTR';
1965
  }
1966
1967
  /**
1968
   * get data from "/data/*.ser"
1969
   *
1970
   * @param string $file
1971
   *
1972
   * @return mixed|false Will return false on error.
1973
   */
1974 13
  private static function getData(string $file)
1975
  {
1976 13
    $file = __DIR__ . '/data/' . $file . '.php';
1977 13
    if (\file_exists($file)) {
1978
      /** @noinspection PhpIncludeInspection */
1979 12
      return require $file;
1980
    }
1981
1982 2
    return false;
1983
  }
1984
1985
  /**
1986
   * Check for php-support.
1987
   *
1988
   * @param string|null $key
1989
   *
1990
   * @return mixed
1991
   *               Return the full support-"array", if $key === null<br>
1992
   *               return bool-value, if $key is used and available<br>
1993
   *               otherwise return <strong>null</strong>.
1994
   */
1995 26
  public static function getSupportInfo(string $key = null)
1996
  {
1997 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1998
      self::checkForSupport();
1999
    }
2000
2001 26
    if ($key === null) {
2002 4
      return self::$SUPPORT;
2003
    }
2004
2005 24
    if (!isset(self::$SUPPORT[$key])) {
2006 2
      return null;
2007
    }
2008
2009 22
    return self::$SUPPORT[$key];
2010
  }
2011
2012
  /**
2013
   * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2014
   *          if you need more supported types, please use e.g. "finfo"
2015
   *
2016
   * @param string $str
2017
   * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2018
   *
2019
   * @return array
2020
   *               with this keys: 'ext', 'mime', 'type'
2021
   */
2022 39
  public static function get_file_type(
2023
      string $str,
2024
      array $fallback = [
2025
          'ext'  => null,
2026
          'mime' => 'application/octet-stream',
2027
          'type' => null,
2028
      ]
2029
  ): array
2030
  {
2031 39
    if ($str === '') {
2032
      return $fallback;
2033
    }
2034
2035 39
    $str_info = self::substr_in_byte($str, 0, 2);
2036 39
    if (self::strlen_in_byte($str_info) !== 2) {
2037 10
      return $fallback;
2038
    }
2039
2040 35
    $str_info = \unpack('C2chars', $str_info);
2041 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2042
2043
    // DEBUG
2044
    //var_dump($type_code);
2045
2046
    switch ($type_code) {
2047 35
      case 3780:
2048 5
        $ext = 'pdf';
2049 5
        $mime = 'application/pdf';
2050 5
        $type = 'binary';
2051
2052 5
        break;
2053 35
      case 7790:
2054
        $ext = 'exe';
2055
        $mime = 'application/octet-stream';
2056
        $type = 'binary';
2057
2058
        break;
2059 35
      case 7784:
2060
        $ext = 'midi';
2061
        $mime = 'audio/x-midi';
2062
        $type = 'binary';
2063
2064
        break;
2065 35
      case 8075:
2066 7
        $ext = 'zip';
2067 7
        $mime = 'application/zip';
2068 7
        $type = 'binary';
2069
2070 7
        break;
2071 35
      case 8297:
2072
        $ext = 'rar';
2073
        $mime = 'application/rar';
2074
        $type = 'binary';
2075
2076
        break;
2077 35
      case 255216:
2078
        $ext = 'jpg';
2079
        $mime = 'image/jpeg';
2080
        $type = 'binary';
2081
2082
        break;
2083 35
      case 7173:
2084
        $ext = 'gif';
2085
        $mime = 'image/gif';
2086
        $type = 'binary';
2087
2088
        break;
2089 35
      case 6677:
2090
        $ext = 'bmp';
2091
        $mime = 'image/bmp';
2092
        $type = 'binary';
2093
2094
        break;
2095 35
      case 13780:
2096 7
        $ext = 'png';
2097 7
        $mime = 'image/png';
2098 7
        $type = 'binary';
2099
2100 7
        break;
2101
      default:
2102 32
        return $fallback;
2103
    }
2104
2105
    return [
2106 7
        'ext'  => $ext,
2107 7
        'mime' => $mime,
2108 7
        'type' => $type,
2109
    ];
2110
  }
2111
2112
  /**
2113
   * @param int    $length        <p>Length of the random string.</p>
2114
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2115
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2116
   *
2117
   * @return string
2118
   */
2119 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2120
  {
2121
    // init
2122 1
    $i = 0;
2123 1
    $str = '';
2124 1
    $maxlength = self::strlen($possibleChars, $encoding);
2125
2126 1
    if ($maxlength === 0) {
2127 1
      return '';
2128
    }
2129
2130
    // add random chars
2131 1
    while ($i < $length) {
2132
      try {
2133 1
        $randInt = \random_int(0, $maxlength - 1);
2134
      } catch (\Exception $e) {
2135
        /** @noinspection RandomApiMigrationInspection */
2136
        $randInt = \mt_rand(0, $maxlength - 1);
2137
      }
2138 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2139 1
      $str .= $char;
2140 1
      $i++;
2141
    }
2142
2143 1
    return $str;
2144
  }
2145
2146
  /**
2147
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2148
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2149
   *
2150
   * @return string
2151
   */
2152 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2153
  {
2154 1
    $uniqueHelper = \mt_rand() .
2155 1
                    \session_id() .
2156 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2157 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2158 1
                    $entropyExtra;
2159
2160 1
    $uniqueString = \uniqid($uniqueHelper, true);
2161
2162 1
    if ($md5) {
2163 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2164
    }
2165
2166 1
    return $uniqueString;
2167
  }
2168
2169
  /**
2170
   * alias for "UTF8::string_has_bom()"
2171
   *
2172
   * @see        UTF8::string_has_bom()
2173
   *
2174
   * @param string $str
2175
   *
2176
   * @return bool
2177
   *
2178
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2179
   */
2180 2
  public static function hasBom(string $str): bool
2181
  {
2182 2
    return self::string_has_bom($str);
2183
  }
2184
2185
  /**
2186
   * Returns true if the string contains a lower case char, false otherwise.
2187
   *
2188
   * @param string $str <p>The input string.</p>
2189
   *
2190
   * @return bool Whether or not the string contains a lower case character.
2191
   */
2192 47
  public static function has_lowercase(string $str): bool
2193
  {
2194 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2195
  }
2196
2197
  /**
2198
   * Returns true if the string contains an upper case char, false otherwise.
2199
   *
2200
   * @param string $str <p>The input string.</p>
2201
   *
2202
   * @return bool Whether or not the string contains an upper case character.
2203
   */
2204 12
  public static function has_uppercase(string $str): bool
2205
  {
2206 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2207
  }
2208
2209
  /**
2210
   * Converts a hexadecimal-value into an UTF-8 character.
2211
   *
2212
   * @param string $hexdec <p>The hexadecimal value.</p>
2213
   *
2214
   * @return string|false One single UTF-8 character.
2215
   */
2216 4
  public static function hex_to_chr(string $hexdec)
2217
  {
2218 4
    return self::decimal_to_chr(\hexdec($hexdec));
2219
  }
2220
2221
  /**
2222
   * Converts hexadecimal U+xxxx code point representation to integer.
2223
   *
2224
   * INFO: opposite to UTF8::int_to_hex()
2225
   *
2226
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2227
   *
2228
   * @return int|false The code point, or false on failure.
2229
   */
2230 2
  public static function hex_to_int($hexDec)
2231
  {
2232
    // init
2233 2
    $hexDec = (string)$hexDec;
2234
2235 2
    if ('' === $hexDec) {
2236 2
      return false;
2237
    }
2238
2239 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2240 2
      return \intval($match[1], 16);
2241
    }
2242
2243 2
    return false;
2244
  }
2245
2246
  /**
2247
   * alias for "UTF8::html_entity_decode()"
2248
   *
2249
   * @see UTF8::html_entity_decode()
2250
   *
2251
   * @param string $str
2252
   * @param int    $flags
2253
   * @param string $encoding
2254
   *
2255
   * @return string
2256
   */
2257 4
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2258
  {
2259 4
    return self::html_entity_decode($str, $flags, $encoding);
2260
  }
2261
2262
  /**
2263
   * Converts a UTF-8 string to a series of HTML numbered entities.
2264
   *
2265
   * INFO: opposite to UTF8::html_decode()
2266
   *
2267
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2268
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2269
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2270
   *
2271
   * @return string HTML numbered entities.
2272
   */
2273 13
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2274
  {
2275 13
    if ('' === $str) {
2276 4
      return '';
2277
    }
2278
2279 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2280 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2281
    }
2282
2283 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2284
      self::checkForSupport();
2285
    }
2286
2287
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2288 13
    if (self::$SUPPORT['mbstring'] === true) {
2289 13
      $startCode = 0x00;
2290 13
      if ($keepAsciiChars === true) {
2291 13
        $startCode = 0x80;
2292
      }
2293
2294 13
      return \mb_encode_numericentity(
2295 13
          $str,
2296 13
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2297 13
          $encoding
2298
      );
2299
    }
2300
2301
    //
2302
    // fallback via vanilla php
2303
    //
2304
2305
    return \implode(
2306
        '',
2307
        \array_map(
2308
            function ($chr) use ($keepAsciiChars, $encoding) {
2309
              return UTF8::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2310
            },
2311
            self::split($str)
2312
        )
2313
    );
2314
  }
2315
2316
  /**
2317
   * UTF-8 version of html_entity_decode()
2318
   *
2319
   * The reason we are not using html_entity_decode() by itself is because
2320
   * while it is not technically correct to leave out the semicolon
2321
   * at the end of an entity most browsers will still interpret the entity
2322
   * correctly. html_entity_decode() does not convert entities without
2323
   * semicolons, so we are left with our own little solution here. Bummer.
2324
   *
2325
   * Convert all HTML entities to their applicable characters
2326
   *
2327
   * INFO: opposite to UTF8::html_encode()
2328
   *
2329
   * @link http://php.net/manual/en/function.html-entity-decode.php
2330
   *
2331
   * @param string $str      <p>
2332
   *                         The input string.
2333
   *                         </p>
2334
   * @param int    $flags    [optional] <p>
2335
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2336
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2337
   *                         <table>
2338
   *                         Available <i>flags</i> constants
2339
   *                         <tr valign="top">
2340
   *                         <td>Constant Name</td>
2341
   *                         <td>Description</td>
2342
   *                         </tr>
2343
   *                         <tr valign="top">
2344
   *                         <td><b>ENT_COMPAT</b></td>
2345
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2346
   *                         </tr>
2347
   *                         <tr valign="top">
2348
   *                         <td><b>ENT_QUOTES</b></td>
2349
   *                         <td>Will convert both double and single quotes.</td>
2350
   *                         </tr>
2351
   *                         <tr valign="top">
2352
   *                         <td><b>ENT_NOQUOTES</b></td>
2353
   *                         <td>Will leave both double and single quotes unconverted.</td>
2354
   *                         </tr>
2355
   *                         <tr valign="top">
2356
   *                         <td><b>ENT_HTML401</b></td>
2357
   *                         <td>
2358
   *                         Handle code as HTML 4.01.
2359
   *                         </td>
2360
   *                         </tr>
2361
   *                         <tr valign="top">
2362
   *                         <td><b>ENT_XML1</b></td>
2363
   *                         <td>
2364
   *                         Handle code as XML 1.
2365
   *                         </td>
2366
   *                         </tr>
2367
   *                         <tr valign="top">
2368
   *                         <td><b>ENT_XHTML</b></td>
2369
   *                         <td>
2370
   *                         Handle code as XHTML.
2371
   *                         </td>
2372
   *                         </tr>
2373
   *                         <tr valign="top">
2374
   *                         <td><b>ENT_HTML5</b></td>
2375
   *                         <td>
2376
   *                         Handle code as HTML 5.
2377
   *                         </td>
2378
   *                         </tr>
2379
   *                         </table>
2380
   *                         </p>
2381
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2382
   *
2383
   * @return string The decoded string.
2384
   */
2385 40
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2386
  {
2387 40
    if ('' === $str) {
2388 12
      return '';
2389
    }
2390
2391 40
    if (!isset($str[3])) { // examples: &; || &x;
2392 19
      return $str;
2393
    }
2394
2395
    if (
2396 39
        \strpos($str, '&') === false
2397
        ||
2398
        (
2399 39
            \strpos($str, '&#') === false
2400
            &&
2401 39
            \strpos($str, ';') === false
2402
        )
2403
    ) {
2404 18
      return $str;
2405
    }
2406
2407 39
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2408 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2409
    }
2410
2411 39
    if ($flags === null) {
2412 10
      $flags = ENT_QUOTES | ENT_HTML5;
2413
    }
2414
2415
    if (
2416 39
        $encoding !== 'UTF-8'
2417
        &&
2418 39
        $encoding !== 'ISO-8859-1'
2419
        &&
2420 39
        $encoding !== 'WINDOWS-1252'
2421
        &&
2422 39
        self::$SUPPORT['mbstring'] === false
2423
    ) {
2424
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2425
    }
2426
2427 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2428
      self::checkForSupport();
2429
    }
2430
2431
    do {
2432 39
      $str_compare = $str;
2433
2434
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2435 39
      if (self::$SUPPORT['mbstring'] === true) {
2436
2437 39
        $str = \mb_decode_numericentity(
2438 39
            $str,
2439 39
            [0x80, 0xfffff, 0, 0xfffff, 0],
2440 39
            $encoding
2441
        );
2442
2443
      } else {
2444
2445
        $str = (string)\preg_replace_callback(
2446
            "/&#\d{2,6};/",
2447
            function ($matches) use ($encoding) {
2448
              // always fallback via symfony polyfill
2449
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2450
2451
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2452
                return $returnTmp;
2453
              }
2454
2455
              return $matches[0];
2456
            },
2457
            $str
2458
        );
2459
2460
      }
2461
2462
      // decode numeric & UTF16 two byte entities
2463 39
      $str = \html_entity_decode(
2464 39
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2465 39
          $flags,
2466 39
          $encoding
2467
      );
2468
2469 39
    } while ($str_compare !== $str);
2470
2471 39
    return $str;
2472
  }
2473
2474
  /**
2475
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2476
   *
2477
   * @param string $str
2478
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2479
   *
2480
   * @return string
2481
   */
2482 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2483
  {
2484 6
    return self::htmlspecialchars(
2485 6
        $str,
2486 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2487 6
        $encoding
2488
    );
2489
  }
2490
2491
  /**
2492
   * Remove empty html-tag.
2493
   *
2494
   * e.g.: <tag></tag>
2495
   *
2496
   * @param string $str
2497
   *
2498
   * @return string
2499
   */
2500 1
  public static function html_stripe_empty_tags(string $str): string
2501
  {
2502 1
    return (string)\preg_replace(
2503 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2504 1
        '',
2505 1
        $str
2506
    );
2507
  }
2508
2509
  /**
2510
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2511
   *
2512
   * @link http://php.net/manual/en/function.htmlentities.php
2513
   *
2514
   * @param string $str           <p>
2515
   *                              The input string.
2516
   *                              </p>
2517
   * @param int    $flags         [optional] <p>
2518
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2519
   *                              invalid code unit sequences and the used document type. The default is
2520
   *                              ENT_COMPAT | ENT_HTML401.
2521
   *                              <table>
2522
   *                              Available <i>flags</i> constants
2523
   *                              <tr valign="top">
2524
   *                              <td>Constant Name</td>
2525
   *                              <td>Description</td>
2526
   *                              </tr>
2527
   *                              <tr valign="top">
2528
   *                              <td><b>ENT_COMPAT</b></td>
2529
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2530
   *                              </tr>
2531
   *                              <tr valign="top">
2532
   *                              <td><b>ENT_QUOTES</b></td>
2533
   *                              <td>Will convert both double and single quotes.</td>
2534
   *                              </tr>
2535
   *                              <tr valign="top">
2536
   *                              <td><b>ENT_NOQUOTES</b></td>
2537
   *                              <td>Will leave both double and single quotes unconverted.</td>
2538
   *                              </tr>
2539
   *                              <tr valign="top">
2540
   *                              <td><b>ENT_IGNORE</b></td>
2541
   *                              <td>
2542
   *                              Silently discard invalid code unit sequences instead of returning
2543
   *                              an empty string. Using this flag is discouraged as it
2544
   *                              may have security implications.
2545
   *                              </td>
2546
   *                              </tr>
2547
   *                              <tr valign="top">
2548
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2549
   *                              <td>
2550
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2551
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2552
   *                              </td>
2553
   *                              </tr>
2554
   *                              <tr valign="top">
2555
   *                              <td><b>ENT_DISALLOWED</b></td>
2556
   *                              <td>
2557
   *                              Replace invalid code points for the given document type with a
2558
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2559
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2560
   *                              instance, to ensure the well-formedness of XML documents with
2561
   *                              embedded external content.
2562
   *                              </td>
2563
   *                              </tr>
2564
   *                              <tr valign="top">
2565
   *                              <td><b>ENT_HTML401</b></td>
2566
   *                              <td>
2567
   *                              Handle code as HTML 4.01.
2568
   *                              </td>
2569
   *                              </tr>
2570
   *                              <tr valign="top">
2571
   *                              <td><b>ENT_XML1</b></td>
2572
   *                              <td>
2573
   *                              Handle code as XML 1.
2574
   *                              </td>
2575
   *                              </tr>
2576
   *                              <tr valign="top">
2577
   *                              <td><b>ENT_XHTML</b></td>
2578
   *                              <td>
2579
   *                              Handle code as XHTML.
2580
   *                              </td>
2581
   *                              </tr>
2582
   *                              <tr valign="top">
2583
   *                              <td><b>ENT_HTML5</b></td>
2584
   *                              <td>
2585
   *                              Handle code as HTML 5.
2586
   *                              </td>
2587
   *                              </tr>
2588
   *                              </table>
2589
   *                              </p>
2590
   * @param string $encoding      [optional] <p>
2591
   *                              Like <b>htmlspecialchars</b>,
2592
   *                              <b>htmlentities</b> takes an optional third argument
2593
   *                              <i>encoding</i> which defines encoding used in
2594
   *                              conversion.
2595
   *                              Although this argument is technically optional, you are highly
2596
   *                              encouraged to specify the correct value for your code.
2597
   *                              </p>
2598
   * @param bool   $double_encode [optional] <p>
2599
   *                              When <i>double_encode</i> is turned off PHP will not
2600
   *                              encode existing html entities. The default is to convert everything.
2601
   *                              </p>
2602
   *
2603
   *
2604
   * @return string The encoded string.
2605
   * </p>
2606
   * <p>
2607
   * If the input <i>string</i> contains an invalid code unit
2608
   * sequence within the given <i>encoding</i> an empty string
2609
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2610
   * <b>ENT_SUBSTITUTE</b> flags are set.
2611
   */
2612 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2613
  {
2614 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2615 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2616
    }
2617
2618 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2619
2620
    /**
2621
     * PHP doesn't replace a backslash to its html entity since this is something
2622
     * that's mostly used to escape characters when inserting in a database. Since
2623
     * we're using a decent database layer, we don't need this shit and we're replacing
2624
     * the double backslashes by its' html entity equivalent.
2625
     *
2626
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2627
     */
2628 9
    $str = \str_replace('\\', '&#92;', $str);
2629
2630 9
    return self::html_encode($str, true, $encoding);
2631
  }
2632
2633
  /**
2634
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2635
   *
2636
   * INFO: Take a look at "UTF8::htmlentities()"
2637
   *
2638
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2639
   *
2640
   * @param string $str           <p>
2641
   *                              The string being converted.
2642
   *                              </p>
2643
   * @param int    $flags         [optional] <p>
2644
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2645
   *                              invalid code unit sequences and the used document type. The default is
2646
   *                              ENT_COMPAT | ENT_HTML401.
2647
   *                              <table>
2648
   *                              Available <i>flags</i> constants
2649
   *                              <tr valign="top">
2650
   *                              <td>Constant Name</td>
2651
   *                              <td>Description</td>
2652
   *                              </tr>
2653
   *                              <tr valign="top">
2654
   *                              <td><b>ENT_COMPAT</b></td>
2655
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2656
   *                              </tr>
2657
   *                              <tr valign="top">
2658
   *                              <td><b>ENT_QUOTES</b></td>
2659
   *                              <td>Will convert both double and single quotes.</td>
2660
   *                              </tr>
2661
   *                              <tr valign="top">
2662
   *                              <td><b>ENT_NOQUOTES</b></td>
2663
   *                              <td>Will leave both double and single quotes unconverted.</td>
2664
   *                              </tr>
2665
   *                              <tr valign="top">
2666
   *                              <td><b>ENT_IGNORE</b></td>
2667
   *                              <td>
2668
   *                              Silently discard invalid code unit sequences instead of returning
2669
   *                              an empty string. Using this flag is discouraged as it
2670
   *                              may have security implications.
2671
   *                              </td>
2672
   *                              </tr>
2673
   *                              <tr valign="top">
2674
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2675
   *                              <td>
2676
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2677
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2678
   *                              </td>
2679
   *                              </tr>
2680
   *                              <tr valign="top">
2681
   *                              <td><b>ENT_DISALLOWED</b></td>
2682
   *                              <td>
2683
   *                              Replace invalid code points for the given document type with a
2684
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2685
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2686
   *                              instance, to ensure the well-formedness of XML documents with
2687
   *                              embedded external content.
2688
   *                              </td>
2689
   *                              </tr>
2690
   *                              <tr valign="top">
2691
   *                              <td><b>ENT_HTML401</b></td>
2692
   *                              <td>
2693
   *                              Handle code as HTML 4.01.
2694
   *                              </td>
2695
   *                              </tr>
2696
   *                              <tr valign="top">
2697
   *                              <td><b>ENT_XML1</b></td>
2698
   *                              <td>
2699
   *                              Handle code as XML 1.
2700
   *                              </td>
2701
   *                              </tr>
2702
   *                              <tr valign="top">
2703
   *                              <td><b>ENT_XHTML</b></td>
2704
   *                              <td>
2705
   *                              Handle code as XHTML.
2706
   *                              </td>
2707
   *                              </tr>
2708
   *                              <tr valign="top">
2709
   *                              <td><b>ENT_HTML5</b></td>
2710
   *                              <td>
2711
   *                              Handle code as HTML 5.
2712
   *                              </td>
2713
   *                              </tr>
2714
   *                              </table>
2715
   *                              </p>
2716
   * @param string $encoding      [optional] <p>
2717
   *                              Defines encoding used in conversion.
2718
   *                              </p>
2719
   *                              <p>
2720
   *                              For the purposes of this function, the encodings
2721
   *                              ISO-8859-1, ISO-8859-15,
2722
   *                              UTF-8, cp866,
2723
   *                              cp1251, cp1252, and
2724
   *                              KOI8-R are effectively equivalent, provided the
2725
   *                              <i>string</i> itself is valid for the encoding, as
2726
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2727
   *                              the same positions in all of these encodings.
2728
   *                              </p>
2729
   * @param bool   $double_encode [optional] <p>
2730
   *                              When <i>double_encode</i> is turned off PHP will not
2731
   *                              encode existing html entities, the default is to convert everything.
2732
   *                              </p>
2733
   *
2734
   * @return string The converted string.
2735
   * </p>
2736
   * <p>
2737
   * If the input <i>string</i> contains an invalid code unit
2738
   * sequence within the given <i>encoding</i> an empty string
2739
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2740
   * <b>ENT_SUBSTITUTE</b> flags are set.
2741
   */
2742 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2743
  {
2744 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2745 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2746
    }
2747
2748 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2749
  }
2750
2751
  /**
2752
   * Checks whether iconv is available on the server.
2753
   *
2754
   * @return bool
2755
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2756
   */
2757
  public static function iconv_loaded(): bool
2758
  {
2759
    return \extension_loaded('iconv') ? true : false;
2760
  }
2761
2762
  /**
2763
   * alias for "UTF8::decimal_to_chr()"
2764
   *
2765
   * @see UTF8::decimal_to_chr()
2766
   *
2767
   * @param mixed $int
2768
   *
2769
   * @return string
2770
   */
2771 4
  public static function int_to_chr($int): string
2772
  {
2773 4
    return self::decimal_to_chr($int);
2774
  }
2775
2776
  /**
2777
   * Converts Integer to hexadecimal U+xxxx code point representation.
2778
   *
2779
   * INFO: opposite to UTF8::hex_to_int()
2780
   *
2781
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2782
   * @param string $pfix [optional]
2783
   *
2784
   * @return string The code point, or empty string on failure.
2785
   */
2786 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2787
  {
2788 6
    $hex = \dechex($int);
2789
2790 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2791
2792 6
    return $pfix . $hex . '';
2793
  }
2794
2795
  /**
2796
   * Checks whether intl-char is available on the server.
2797
   *
2798
   * @return bool
2799
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2800
   */
2801
  public static function intlChar_loaded(): bool
2802
  {
2803
    return \class_exists('IntlChar');
2804
  }
2805
2806
  /**
2807
   * Checks whether intl is available on the server.
2808
   *
2809
   * @return bool
2810
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2811
   */
2812 5
  public static function intl_loaded(): bool
2813
  {
2814 5
    return \extension_loaded('intl');
2815
  }
2816
2817
  /**
2818
   * alias for "UTF8::is_ascii()"
2819
   *
2820
   * @see        UTF8::is_ascii()
2821
   *
2822
   * @param string $str
2823
   *
2824
   * @return bool
2825
   *
2826
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2827
   */
2828 2
  public static function isAscii(string $str): bool
2829
  {
2830 2
    return self::is_ascii($str);
2831
  }
2832
2833
  /**
2834
   * alias for "UTF8::is_base64()"
2835
   *
2836
   * @see        UTF8::is_base64()
2837
   *
2838
   * @param string $str
2839
   *
2840
   * @return bool
2841
   *
2842
   * @deprecated <p>use "UTF8::is_base64()"</p>
2843
   */
2844 2
  public static function isBase64($str): bool
2845
  {
2846 2
    return self::is_base64($str);
2847
  }
2848
2849
  /**
2850
   * alias for "UTF8::is_binary()"
2851
   *
2852
   * @see        UTF8::is_binary()
2853
   *
2854
   * @param mixed $str
2855
   * @param bool  $strict
2856
   *
2857
   * @return bool
2858
   *
2859
   * @deprecated <p>use "UTF8::is_binary()"</p>
2860
   */
2861 4
  public static function isBinary($str, $strict = false): bool
2862
  {
2863 4
    return self::is_binary($str, $strict);
2864
  }
2865
2866
  /**
2867
   * alias for "UTF8::is_bom()"
2868
   *
2869
   * @see        UTF8::is_bom()
2870
   *
2871
   * @param string $utf8_chr
2872
   *
2873
   * @return bool
2874
   *
2875
   * @deprecated <p>use "UTF8::is_bom()"</p>
2876
   */
2877 2
  public static function isBom(string $utf8_chr): bool
2878
  {
2879 2
    return self::is_bom($utf8_chr);
2880
  }
2881
2882
  /**
2883
   * alias for "UTF8::is_html()"
2884
   *
2885
   * @see        UTF8::is_html()
2886
   *
2887
   * @param string $str
2888
   *
2889
   * @return bool
2890
   *
2891
   * @deprecated <p>use "UTF8::is_html()"</p>
2892
   */
2893 2
  public static function isHtml(string $str): bool
2894
  {
2895 2
    return self::is_html($str);
2896
  }
2897
2898
  /**
2899
   * alias for "UTF8::is_json()"
2900
   *
2901
   * @see        UTF8::is_json()
2902
   *
2903
   * @param string $str
2904
   *
2905
   * @return bool
2906
   *
2907
   * @deprecated <p>use "UTF8::is_json()"</p>
2908
   */
2909
  public static function isJson(string $str): bool
2910
  {
2911
    return self::is_json($str);
2912
  }
2913
2914
  /**
2915
   * alias for "UTF8::is_utf16()"
2916
   *
2917
   * @see        UTF8::is_utf16()
2918
   *
2919
   * @param mixed $str
2920
   *
2921
   * @return int|false
2922
   *                    <strong>false</strong> if is't not UTF16,<br>
2923
   *                    <strong>1</strong> for UTF-16LE,<br>
2924
   *                    <strong>2</strong> for UTF-16BE.
2925
   *
2926
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2927
   */
2928 2
  public static function isUtf16($str)
2929
  {
2930 2
    return self::is_utf16($str);
2931
  }
2932
2933
  /**
2934
   * alias for "UTF8::is_utf32()"
2935
   *
2936
   * @see        UTF8::is_utf32()
2937
   *
2938
   * @param mixed $str
2939
   *
2940
   * @return int|false
2941
   *                   <strong>false</strong> if is't not UTF16,
2942
   *                   <strong>1</strong> for UTF-32LE,
2943
   *                   <strong>2</strong> for UTF-32BE.
2944
   *
2945
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2946
   */
2947 2
  public static function isUtf32($str)
2948
  {
2949 2
    return self::is_utf32($str);
2950
  }
2951
2952
  /**
2953
   * alias for "UTF8::is_utf8()"
2954
   *
2955
   * @see        UTF8::is_utf8()
2956
   *
2957
   * @param string $str
2958
   * @param bool   $strict
2959
   *
2960
   * @return bool
2961
   *
2962
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2963
   */
2964 17
  public static function isUtf8($str, $strict = false): bool
2965
  {
2966 17
    return self::is_utf8($str, $strict);
2967
  }
2968
2969
  /**
2970
   * Returns true if the string contains only alphabetic chars, false otherwise.
2971
   *
2972
   * @param string $str
2973
   *
2974
   * @return bool
2975
   *               Whether or not $str contains only alphabetic chars.
2976
   */
2977 10
  public static function is_alpha(string $str): bool
2978
  {
2979 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2980
  }
2981
2982
  /**
2983
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2984
   *
2985
   * @param string $str
2986
   *
2987
   * @return bool
2988
   *               Whether or not $str contains only alphanumeric chars.
2989
   */
2990 13
  public static function is_alphanumeric(string $str): bool
2991
  {
2992 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2993
  }
2994
2995
  /**
2996
   * Checks if a string is 7 bit ASCII.
2997
   *
2998
   * @param string $str <p>The string to check.</p>
2999
   *
3000
   * @return bool
3001
   *              <strong>true</strong> if it is ASCII<br>
3002
   *              <strong>false</strong> otherwise
3003
   *
3004
   */
3005 202
  public static function is_ascii(string $str): bool
3006
  {
3007 202
    if ('' === $str) {
3008 10
      return true;
3009
    }
3010
3011 201
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3012
  }
3013
3014
  /**
3015
   * Returns true if the string is base64 encoded, false otherwise.
3016
   *
3017
   * @param string $str <p>The input string.</p>
3018
   *
3019
   * @return bool Whether or not $str is base64 encoded.
3020
   */
3021 9
  public static function is_base64($str): bool
3022
  {
3023 9
    if ('' === $str) {
3024 3
      return false;
3025
    }
3026
3027 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
3028 2
      return false;
3029
    }
3030
3031 8
    $base64String = (string)\base64_decode($str, true);
3032
3033 8
    return $base64String && \base64_encode($base64String) === $str;
3034
  }
3035
3036
  /**
3037
   * Check if the input is binary... (is look like a hack).
3038
   *
3039
   * @param mixed $input
3040
   * @param bool  $strict
3041
   *
3042
   * @return bool
3043
   */
3044 39
  public static function is_binary($input, bool $strict = false): bool
3045
  {
3046 39
    $input = (string)$input;
3047 39
    if ('' === $input) {
3048 10
      return false;
3049
    }
3050
3051 39
    if (\preg_match('~^[01]+$~', $input)) {
3052 12
      return true;
3053
    }
3054
3055 39
    $ext = self::get_file_type($input);
3056 39
    if ($ext['type'] === 'binary') {
3057 7
      return true;
3058
    }
3059
3060 36
    $testLength = self::strlen_in_byte($input);
3061 36
    if ($testLength) {
3062 36
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3063
        self::checkForSupport();
3064
      }
3065
3066 36
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3067 36
      if (($testNull / $testLength) > 0.256) {
3068 12
        return true;
3069
      }
3070
    }
3071
3072 34
    if ($strict === true) {
3073
3074 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3075
        self::checkForSupport();
3076
      }
3077
3078 34
      if (self::$SUPPORT['finfo'] === false) {
3079
        throw new \RuntimeException('ext-fileinfo: is not installed');
3080
      }
3081
3082
      /** @noinspection PhpComposerExtensionStubsInspection */
3083 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3084 34
      $finfo_encoding = $finfo->buffer($input);
3085 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3086 14
        return true;
3087
      }
3088
3089
    }
3090
3091 30
    return false;
3092
  }
3093
3094
  /**
3095
   * Check if the file is binary.
3096
   *
3097
   * @param string $file
3098
   *
3099
   * @return bool
3100
   */
3101 6
  public static function is_binary_file($file): bool
3102
  {
3103
    // init
3104 6
    $block = '';
3105
3106 6
    $fp = \fopen($file, 'rb');
3107 6
    if (\is_resource($fp)) {
3108 6
      $block = \fread($fp, 512);
3109 6
      \fclose($fp);
3110
    }
3111
3112 6
    if ($block === '') {
3113 2
      return false;
3114
    }
3115
3116 6
    return self::is_binary($block, true);
3117
  }
3118
3119
  /**
3120
   * Returns true if the string contains only whitespace chars, false otherwise.
3121
   *
3122
   * @param string $str
3123
   *
3124
   * @return bool
3125
   *               Whether or not $str contains only whitespace characters.
3126
   */
3127 15
  public static function is_blank(string $str): bool
3128
  {
3129 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3130
  }
3131
3132
  /**
3133
   * Checks if the given string is equal to any "Byte Order Mark".
3134
   *
3135
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3136
   *
3137
   * @param string $str <p>The input string.</p>
3138
   *
3139
   * @return bool
3140
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3141
   */
3142 2
  public static function is_bom($str): bool
3143
  {
3144 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3145 2
      if ($str === $bomString) {
3146 2
        return true;
3147
      }
3148
    }
3149
3150 2
    return false;
3151
  }
3152
3153
  /**
3154
   * Determine whether the string is considered to be empty.
3155
   *
3156
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3157
   * empty() does not generate a warning if the variable does not exist.
3158
   *
3159
   * @param mixed $str
3160
   *
3161
   * @return bool Whether or not $str is empty().
3162
   */
3163
  public static function is_empty($str): bool
3164
  {
3165
    return empty($str);
3166
  }
3167
3168
  /**
3169
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3170
   *
3171
   * @param string $str
3172
   *
3173
   * @return bool
3174
   *               Whether or not $str contains only hexadecimal chars.
3175
   */
3176 13
  public static function is_hexadecimal(string $str): bool
3177
  {
3178 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3179
  }
3180
3181
  /**
3182
   * Check if the string contains any html-tags <lall>.
3183
   *
3184
   * @param string $str <p>The input string.</p>
3185
   *
3186
   * @return bool
3187
   */
3188 3
  public static function is_html(string $str): bool
3189
  {
3190 3
    if ('' === $str) {
3191 3
      return false;
3192
    }
3193
3194
    // init
3195 3
    $matches = [];
3196
3197 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3198
3199 3
    return !(\count($matches) === 0);
3200
  }
3201
3202
  /**
3203
   * Try to check if "$str" is an json-string.
3204
   *
3205
   * @param string $str <p>The input string.</p>
3206
   *
3207
   * @return bool
3208
   */
3209 22
  public static function is_json(string $str): bool
3210
  {
3211 22
    if ('' === $str) {
3212 3
      return false;
3213
    }
3214
3215 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3216
      self::checkForSupport();
3217
    }
3218
3219 21
    if (self::$SUPPORT['json'] === false) {
3220
      throw new \RuntimeException('ext-json: is not installed');
3221
    }
3222
3223 21
    $json = self::json_decode($str);
3224
3225
    /** @noinspection PhpComposerExtensionStubsInspection */
3226
    return (
3227 21
               \is_object($json) === true
3228
               ||
3229 21
               \is_array($json) === true
3230
           )
3231
           &&
3232 21
           \json_last_error() === JSON_ERROR_NONE;
3233
  }
3234
3235
  /**
3236
   * @param string $str
3237
   *
3238
   * @return bool
3239
   */
3240 8
  public static function is_lowercase(string $str): bool
3241
  {
3242 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3243 3
      return true;
3244
    }
3245
3246 5
    return false;
3247
  }
3248
3249
  /**
3250
   * Returns true if the string is serialized, false otherwise.
3251
   *
3252
   * @param string $str
3253
   *
3254
   * @return bool Whether or not $str is serialized.
3255
   */
3256 7
  public static function is_serialized(string $str): bool
3257
  {
3258 7
    if ('' === $str) {
3259 1
      return false;
3260
    }
3261
3262
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3263
    /** @noinspection UnserializeExploitsInspection */
3264 6
    return $str === 'b:0;'
3265
           ||
3266 6
           @\unserialize($str) !== false;
3267
  }
3268
3269
  /**
3270
   * Returns true if the string contains only lower case chars, false
3271
   * otherwise.
3272
   *
3273
   * @param string $str <p>The input string.</p>
3274
   *
3275
   * @return bool
3276
   *               Whether or not $str contains only lower case characters.
3277
   */
3278 8
  public static function is_uppercase(string $str): bool
3279
  {
3280 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3281
  }
3282
3283
  /**
3284
   * Check if the string is UTF-16.
3285
   *
3286
   * @param mixed $str <p>The input string.</p>
3287
   * @param bool  $checkIfStringIsBinary
3288
   *
3289
   * @return int|false
3290
   *                   <strong>false</strong> if is't not UTF-16,<br>
3291
   *                   <strong>1</strong> for UTF-16LE,<br>
3292
   *                   <strong>2</strong> for UTF-16BE.
3293
   */
3294 21
  public static function is_utf16($str, $checkIfStringIsBinary = true)
3295
  {
3296
    // init
3297 21
    $str = (string)$str;
3298 21
    $strChars = [];
3299
3300
    if (
3301 21
        $checkIfStringIsBinary === true
3302
        &&
3303 21
        self::is_binary($str, true) === false
3304
    ) {
3305 2
      return false;
3306
    }
3307
3308 21
    if (self::$SUPPORT['mbstring'] === false) {
3309 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3310
    }
3311
3312 21
    $str = self::remove_bom($str);
3313
3314 21
    $maybeUTF16LE = 0;
3315 21
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3316 21
    if ($test) {
3317 15
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3318 15
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3319 15
      if ($test3 === $test) {
3320 15
        if (\count($strChars) === 0) {
3321 15
          $strChars = self::count_chars($str, true);
3322
        }
3323 15
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3324 15
          if (\in_array($test3char, $strChars, true) === true) {
3325 15
            $maybeUTF16LE++;
3326
          }
3327
        }
3328
      }
3329
    }
3330
3331 21
    $maybeUTF16BE = 0;
3332 21
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3333 21
    if ($test) {
3334 15
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3335 15
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3336 15
      if ($test3 === $test) {
3337 15
        if (\count($strChars) === 0) {
3338 7
          $strChars = self::count_chars($str, true);
3339
        }
3340 15
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3341 15
          if (\in_array($test3char, $strChars, true) === true) {
3342 15
            $maybeUTF16BE++;
3343
          }
3344
        }
3345
      }
3346
    }
3347
3348 21
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3349 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3350 4
        return 1;
3351
      }
3352
3353 6
      return 2;
3354
    }
3355
3356 17
    return false;
3357
  }
3358
3359
  /**
3360
   * Check if the string is UTF-32.
3361
   *
3362
   * @param mixed $str <p>The input string.</p>
3363
   * @param bool  $checkIfStringIsBinary
3364
   *
3365
   * @return int|false
3366
   *                   <strong>false</strong> if is't not UTF-32,<br>
3367
   *                   <strong>1</strong> for UTF-32LE,<br>
3368
   *                   <strong>2</strong> for UTF-32BE.
3369
   */
3370 17
  public static function is_utf32($str, $checkIfStringIsBinary = true)
3371
  {
3372
    // init
3373 17
    $str = (string)$str;
3374 17
    $strChars = [];
3375
3376
    if (
3377 17
        $checkIfStringIsBinary === true
3378
        &&
3379 17
        self::is_binary($str, true) === false
3380
    ) {
3381 2
      return false;
3382
    }
3383
3384 17
    if (self::$SUPPORT['mbstring'] === false) {
3385 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3386
    }
3387
3388 17
    $str = self::remove_bom($str);
3389
3390 17
    $maybeUTF32LE = 0;
3391 17
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3392 17
    if ($test) {
3393 11
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3394 11
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3395 11
      if ($test3 === $test) {
3396 11
        if (\count($strChars) === 0) {
3397 11
          $strChars = self::count_chars($str, true);
3398
        }
3399 11
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3400 11
          if (\in_array($test3char, $strChars, true) === true) {
3401 11
            $maybeUTF32LE++;
3402
          }
3403
        }
3404
      }
3405
    }
3406
3407 17
    $maybeUTF32BE = 0;
3408 17
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3409 17
    if ($test) {
3410 11
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3411 11
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3412 11
      if ($test3 === $test) {
3413 11
        if (\count($strChars) === 0) {
3414 7
          $strChars = self::count_chars($str, true);
3415
        }
3416 11
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3417 11
          if (\in_array($test3char, $strChars, true) === true) {
3418 11
            $maybeUTF32BE++;
3419
          }
3420
        }
3421
      }
3422
    }
3423
3424 17
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3425 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3426 2
        return 1;
3427
      }
3428
3429 2
      return 2;
3430
    }
3431
3432 17
    return false;
3433
  }
3434
3435
  /**
3436
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3437
   *
3438
   * @see    http://hsivonen.iki.fi/php-utf8/
3439
   *
3440
   * @param string|string[] $str    <p>The string to be checked.</p>
3441
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3442
   *
3443
   * @return bool
3444
   */
3445 106
  public static function is_utf8($str, bool $strict = false): bool
3446
  {
3447 106
    if (\is_array($str) === true) {
3448 2
      foreach ($str as $k => $v) {
3449 2
        if (false === self::is_utf8($v, $strict)) {
3450 2
          return false;
3451
        }
3452
      }
3453
3454
      return true;
3455
    }
3456
3457 106
    if ('' === $str) {
3458 12
      return true;
3459
    }
3460
3461 102
    if ($strict === true) {
3462 2
      $isBinary = self::is_binary($str, true);
3463
3464 2
      if ($isBinary && self::is_utf16($str, false) !== false) {
3465 2
        return false;
3466
      }
3467
3468
      if ($isBinary && self::is_utf32($str, false) !== false) {
3469
        return false;
3470
      }
3471
    }
3472
3473 102
    if (self::pcre_utf8_support() !== true) {
3474
3475
      // If even just the first character can be matched, when the /u
3476
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3477
      // invalid, nothing at all will match, even if the string contains
3478
      // some valid sequences
3479
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3480
    }
3481
3482 102
    $mState = 0; // cached expected number of octets after the current octet
3483
    // until the beginning of the next UTF8 character sequence
3484 102
    $mUcs4 = 0; // cached Unicode character
3485 102
    $mBytes = 1; // cached expected number of octets in the current sequence
3486
3487 102
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3488
      self::checkForSupport();
3489
    }
3490
3491 102
    if (self::$ORD === null) {
3492
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3493
    }
3494
3495 102
    $len = self::strlen_in_byte((string)$str);
3496
    /** @noinspection ForeachInvariantsInspection */
3497 102
    for ($i = 0; $i < $len; $i++) {
3498 102
      $in = self::$ORD[$str[$i]];
3499 102
      if ($mState === 0) {
3500
        // When mState is zero we expect either a US-ASCII character or a
3501
        // multi-octet sequence.
3502 102
        if (0 === (0x80 & $in)) {
3503
          // US-ASCII, pass straight through.
3504 98
          $mBytes = 1;
3505 83
        } elseif (0xC0 === (0xE0 & $in)) {
3506
          // First octet of 2 octet sequence.
3507 74
          $mUcs4 = $in;
3508 74
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3509 74
          $mState = 1;
3510 74
          $mBytes = 2;
3511 58
        } elseif (0xE0 === (0xF0 & $in)) {
3512
          // First octet of 3 octet sequence.
3513 41
          $mUcs4 = $in;
3514 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3515 41
          $mState = 2;
3516 41
          $mBytes = 3;
3517 30
        } elseif (0xF0 === (0xF8 & $in)) {
3518
          // First octet of 4 octet sequence.
3519 19
          $mUcs4 = $in;
3520 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3521 19
          $mState = 3;
3522 19
          $mBytes = 4;
3523 13
        } elseif (0xF8 === (0xFC & $in)) {
3524
          /* First octet of 5 octet sequence.
3525
          *
3526
          * This is illegal because the encoded codepoint must be either
3527
          * (a) not the shortest form or
3528
          * (b) outside the Unicode range of 0-0x10FFFF.
3529
          * Rather than trying to resynchronize, we will carry on until the end
3530
          * of the sequence and let the later error handling code catch it.
3531
          */
3532 5
          $mUcs4 = $in;
3533 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3534 5
          $mState = 4;
3535 5
          $mBytes = 5;
3536 10
        } elseif (0xFC === (0xFE & $in)) {
3537
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3538 5
          $mUcs4 = $in;
3539 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3540 5
          $mState = 5;
3541 5
          $mBytes = 6;
3542
        } else {
3543
          // Current octet is neither in the US-ASCII range nor a legal first
3544
          // octet of a multi-octet sequence.
3545 102
          return false;
3546
        }
3547
      } else {
3548
        // When mState is non-zero, we expect a continuation of the multi-octet
3549
        // sequence
3550 83
        if (0x80 === (0xC0 & $in)) {
3551
          // Legal continuation.
3552 75
          $shift = ($mState - 1) * 6;
3553 75
          $tmp = $in;
3554 75
          $tmp = ($tmp & 0x0000003F) << $shift;
3555 75
          $mUcs4 |= $tmp;
3556
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3557
          // Unicode code point to be output.
3558 75
          if (0 === --$mState) {
3559
            // Check for illegal sequences and code points.
3560
            //
3561
            // From Unicode 3.1, non-shortest form is illegal
3562
            if (
3563 75
                (2 === $mBytes && $mUcs4 < 0x0080)
3564
                ||
3565 75
                (3 === $mBytes && $mUcs4 < 0x0800)
3566
                ||
3567 75
                (4 === $mBytes && $mUcs4 < 0x10000)
3568
                ||
3569 75
                (4 < $mBytes)
3570
                ||
3571
                // From Unicode 3.2, surrogate characters are illegal.
3572 75
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3573
                ||
3574
                // Code points outside the Unicode range are illegal.
3575 75
                ($mUcs4 > 0x10FFFF)
3576
            ) {
3577 8
              return false;
3578
            }
3579
            // initialize UTF8 cache
3580 75
            $mState = 0;
3581 75
            $mUcs4 = 0;
3582 75
            $mBytes = 1;
3583
          }
3584
        } else {
3585
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3586
          // Incomplete multi-octet sequence.
3587 36
          return false;
3588
        }
3589
      }
3590
    }
3591
3592 66
    return true;
3593
  }
3594
3595
  /**
3596
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3597
   * Decodes a JSON string
3598
   *
3599
   * @link http://php.net/manual/en/function.json-decode.php
3600
   *
3601
   * @param string $json    <p>
3602
   *                        The <i>json</i> string being decoded.
3603
   *                        </p>
3604
   *                        <p>
3605
   *                        This function only works with UTF-8 encoded strings.
3606
   *                        </p>
3607
   *                        <p>PHP implements a superset of
3608
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3609
   *                        only supports these values when they are nested inside an array or an object.
3610
   *                        </p>
3611
   * @param bool   $assoc   [optional] <p>
3612
   *                        When <b>TRUE</b>, returned objects will be converted into
3613
   *                        associative arrays.
3614
   *                        </p>
3615
   * @param int    $depth   [optional] <p>
3616
   *                        User specified recursion depth.
3617
   *                        </p>
3618
   * @param int    $options [optional] <p>
3619
   *                        Bitmask of JSON decode options. Currently only
3620
   *                        <b>JSON_BIGINT_AS_STRING</b>
3621
   *                        is supported (default is to cast large integers as floats)
3622
   *                        </p>
3623
   *
3624
   * @return mixed
3625
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3626
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3627
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3628
   *                is deeper than the recursion limit.
3629
   */
3630 24
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3631
  {
3632 24
    $json = self::filter($json);
3633
3634 24
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3635
      self::checkForSupport();
3636
    }
3637
3638 24
    if (self::$SUPPORT['json'] === false) {
3639
      throw new \RuntimeException('ext-json: is not installed');
3640
    }
3641
3642
    /** @noinspection PhpComposerExtensionStubsInspection */
3643 24
    $json = \json_decode($json, $assoc, $depth, $options);
3644
3645 24
    return $json;
3646
  }
3647
3648
  /**
3649
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3650
   * Returns the JSON representation of a value.
3651
   *
3652
   * @link http://php.net/manual/en/function.json-encode.php
3653
   *
3654
   * @param mixed $value   <p>
3655
   *                       The <i>value</i> being encoded. Can be any type except
3656
   *                       a resource.
3657
   *                       </p>
3658
   *                       <p>
3659
   *                       All string data must be UTF-8 encoded.
3660
   *                       </p>
3661
   *                       <p>PHP implements a superset of
3662
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3663
   *                       only supports these values when they are nested inside an array or an object.
3664
   *                       </p>
3665
   * @param int   $options [optional] <p>
3666
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3667
   *                       <b>JSON_HEX_TAG</b>,
3668
   *                       <b>JSON_HEX_AMP</b>,
3669
   *                       <b>JSON_HEX_APOS</b>,
3670
   *                       <b>JSON_NUMERIC_CHECK</b>,
3671
   *                       <b>JSON_PRETTY_PRINT</b>,
3672
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3673
   *                       <b>JSON_FORCE_OBJECT</b>,
3674
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3675
   *                       constants is described on
3676
   *                       the JSON constants page.
3677
   *                       </p>
3678
   * @param int   $depth   [optional] <p>
3679
   *                       Set the maximum depth. Must be greater than zero.
3680
   *                       </p>
3681
   *
3682
   * @return string|false
3683
   *                      A JSON encoded <strong>string</strong> on success or<br>
3684
   *                      <strong>FALSE</strong> on failure.
3685
   */
3686 5
  public static function json_encode($value, int $options = 0, int $depth = 512)
3687
  {
3688 5
    $value = self::filter($value);
3689
3690 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3691
      self::checkForSupport();
3692
    }
3693
3694 5
    if (self::$SUPPORT['json'] === false) {
3695
      throw new \RuntimeException('ext-json: is not installed');
3696
    }
3697
3698
    /** @noinspection PhpComposerExtensionStubsInspection */
3699 5
    $json = \json_encode($value, $options, $depth);
3700
3701 5
    return $json;
3702
  }
3703
3704
  /**
3705
   * Checks whether JSON is available on the server.
3706
   *
3707
   * @return bool
3708
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3709
   */
3710
  public static function json_loaded(): bool
3711
  {
3712
    return \function_exists('json_decode');
3713
  }
3714
3715
  /**
3716
   * Makes string's first char lowercase.
3717
   *
3718
   * @param string      $str                   <p>The input string</p>
3719
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3720
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3721
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3722
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3723
   *
3724
   * @return string The resulting string.
3725
   */
3726 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3727
  {
3728 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3729 46
    if ($strPartTwo === false) {
3730
      $strPartTwo = '';
3731
    }
3732
3733 46
    $strPartOne = self::strtolower(
3734 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3735 46
        $encoding,
3736 46
        $cleanUtf8,
3737 46
        $lang,
3738 46
        $tryToKeepStringLength
3739
    );
3740
3741 46
    return $strPartOne . $strPartTwo;
3742
  }
3743
3744
  /**
3745
   * alias for "UTF8::lcfirst()"
3746
   *
3747
   * @see UTF8::lcfirst()
3748
   *
3749
   * @param string      $str
3750
   * @param string      $encoding
3751
   * @param bool        $cleanUtf8
3752
   * @param string|null $lang
3753
   * @param bool        $tryToKeepStringLength
3754
   *
3755
   * @return string
3756
   */
3757 2
  public static function lcword(
3758
      string $str,
3759
      string $encoding = 'UTF-8',
3760
      bool $cleanUtf8 = false,
3761
      string $lang = null,
3762
      bool $tryToKeepStringLength = false
3763
  ): string
3764
  {
3765 2
    return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3766
  }
3767
3768
  /**
3769
   * Lowercase for all words in the string.
3770
   *
3771
   * @param string      $str                   <p>The input string.</p>
3772
   * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3773
   * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start a
3774
   *                                           new word.</p>
3775
   * @param string      $encoding              [optional] <p>Set the charset.</p>
3776
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3777
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3778
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3779
   *
3780
   * @return string
3781
   */
3782 2
  public static function lcwords(
3783
      string $str,
3784
      array $exceptions = [],
3785
      string $charlist = '',
3786
      string $encoding = 'UTF-8',
3787
      bool $cleanUtf8 = false,
3788
      string $lang = null,
3789
      bool $tryToKeepStringLength = false
3790
  ): string
3791
  {
3792 2
    if (!$str) {
3793 2
      return '';
3794
    }
3795
3796 2
    $words = self::str_to_words($str, $charlist);
3797 2
    $newWords = [];
3798
3799 2
    if (\count($exceptions) > 0) {
3800 2
      $useExceptions = true;
3801
    } else {
3802 2
      $useExceptions = false;
3803
    }
3804
3805 2
    foreach ($words as $word) {
3806
3807 2
      if (!$word) {
3808 2
        continue;
3809
      }
3810
3811
      if (
3812 2
          $useExceptions === false
3813
          ||
3814
          (
3815 2
              $useExceptions === true
3816
              &&
3817 2
              !\in_array($word, $exceptions, true)
3818
          )
3819
      ) {
3820 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3821
      }
3822
3823 2
      $newWords[] = $word;
3824
    }
3825
3826 2
    return \implode('', $newWords);
3827
  }
3828
3829
  /**
3830
   * alias for "UTF8::lcfirst()"
3831
   *
3832
   * @see UTF8::lcfirst()
3833
   *
3834
   * @param string      $str
3835
   * @param string      $encoding
3836
   * @param bool        $cleanUtf8
3837
   * @param string|null $lang
3838
   * @param bool        $tryToKeepStringLength
3839
   *
3840
   * @return string
3841
   */
3842 5
  public static function lowerCaseFirst(
3843
      string $str,
3844
      string $encoding = 'UTF-8',
3845
      bool $cleanUtf8 = false,
3846
      string $lang = null,
3847
      bool $tryToKeepStringLength = false
3848
  ): string
3849
  {
3850 5
    return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3851
  }
3852
3853
  /**
3854
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3855
   *
3856
   * @param string $str   <p>The string to be trimmed</p>
3857
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3858
   *
3859
   * @return string The string with unwanted characters stripped from the left.
3860
   */
3861 22
  public static function ltrim(string $str = '', $chars = INF): string
3862
  {
3863 22
    if ('' === $str) {
3864 3
      return '';
3865
    }
3866
3867
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3868 21
    if ($chars === INF || !$chars) {
3869 14
      $pattern = "^[\pZ\pC]+";
3870
    } else {
3871 10
      $chars = \preg_quote($chars, '/');
3872 10
      $pattern = "^[$chars]+";
3873
    }
3874
3875 21
    return self::regex_replace($str, $pattern, '', '', '/');
3876
  }
3877
3878
  /**
3879
   * Returns the UTF-8 character with the maximum code point in the given data.
3880
   *
3881
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3882
   *
3883
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3884
   */
3885 2
  public static function max($arg)
3886
  {
3887 2
    if (\is_array($arg) === true) {
3888 2
      $arg = \implode('', $arg);
3889
    }
3890
3891 2
    $codepoints = self::codepoints($arg, false);
3892 2
    if (\count($codepoints) === 0) {
3893 2
      return null;
3894
    }
3895
3896 2
    $codepoint_max = \max($codepoints);
3897
3898 2
    return self::chr($codepoint_max);
3899
  }
3900
3901
  /**
3902
   * Calculates and returns the maximum number of bytes taken by any
3903
   * UTF-8 encoded character in the given string.
3904
   *
3905
   * @param string $str <p>The original Unicode string.</p>
3906
   *
3907
   * @return int Max byte lengths of the given chars.
3908
   */
3909 2
  public static function max_chr_width(string $str): int
3910
  {
3911 2
    $bytes = self::chr_size_list($str);
3912 2
    if (\count($bytes) > 0) {
3913 2
      return (int)\max($bytes);
3914
    }
3915
3916 2
    return 0;
3917
  }
3918
3919
  /**
3920
   * Checks whether mbstring is available on the server.
3921
   *
3922
   * @return bool
3923
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3924
   */
3925 27
  public static function mbstring_loaded(): bool
3926
  {
3927 27
    $return = \extension_loaded('mbstring') ? true : false;
3928
3929 27
    if ($return === true) {
3930 27
      \mb_internal_encoding('UTF-8');
3931
    }
3932
3933 27
    return $return;
3934
  }
3935
3936
  /**
3937
   * Checks whether mbstring "overloaded" is active on the server.
3938
   *
3939
   * @return bool
3940
   */
3941
  private static function mbstring_overloaded(): bool
3942
  {
3943
    /**
3944
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3945
     */
3946
3947
    /** @noinspection PhpComposerExtensionStubsInspection */
3948
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3949
    return \defined('MB_OVERLOAD_STRING')
3950
           &&
3951
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3952
  }
3953
3954
  /**
3955
   * Returns the UTF-8 character with the minimum code point in the given data.
3956
   *
3957
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3958
   *
3959
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3960
   */
3961 2
  public static function min($arg)
3962
  {
3963 2
    if (\is_array($arg) === true) {
3964 2
      $arg = \implode('', $arg);
3965
    }
3966
3967 2
    $codepoints = self::codepoints($arg, false);
3968 2
    if (\count($codepoints) === 0) {
3969 2
      return null;
3970
    }
3971
3972 2
    $codepoint_min = \min($codepoints);
3973
3974 2
    return self::chr($codepoint_min);
3975
  }
3976
3977
  /**
3978
   * alias for "UTF8::normalize_encoding()"
3979
   *
3980
   * @see        UTF8::normalize_encoding()
3981
   *
3982
   * @param mixed $encoding
3983
   * @param mixed $fallback
3984
   *
3985
   * @return mixed
3986
   *
3987
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3988
   */
3989 2
  public static function normalizeEncoding($encoding, $fallback = '')
3990
  {
3991 2
    return self::normalize_encoding($encoding, $fallback);
3992
  }
3993
3994
  /**
3995
   * Normalize the encoding-"name" input.
3996
   *
3997
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3998
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3999
   *
4000
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4001
   */
4002 341
  public static function normalize_encoding($encoding, $fallback = '')
4003
  {
4004 341
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4005
4006
    // init
4007 341
    $encoding = (string)$encoding;
4008
4009
    if (
4010 341
        !$encoding
4011
        ||
4012 50
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
4013
        ||
4014 341
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
4015
    ) {
4016 296
      return $fallback;
4017
    }
4018
4019
    if (
4020 49
        'UTF-8' === $encoding
4021
        ||
4022 49
        'UTF8' === $encoding
4023
    ) {
4024 22
      return 'UTF-8';
4025
    }
4026
4027
    if (
4028 42
        '8BIT' === $encoding
4029
        ||
4030 42
        'BINARY' === $encoding
4031
    ) {
4032
      return 'CP850';
4033
    }
4034
4035
    if (
4036 42
        'HTML' === $encoding
4037
        ||
4038 42
        'HTML-ENTITIES' === $encoding
4039
    ) {
4040 2
      return 'HTML-ENTITIES';
4041
    }
4042
4043 42
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4044 40
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4045
    }
4046
4047 6
    if (self::$ENCODINGS === null) {
4048 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4049
    }
4050
4051 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4051
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
4052 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4053
4054 4
      return $encoding;
4055
    }
4056
4057 5
    $encodingOrig = $encoding;
4058 5
    $encoding = \strtoupper($encoding);
4059 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4060
4061
    $equivalences = [
4062 5
        'ISO8859'     => 'ISO-8859-1',
4063
        'ISO88591'    => 'ISO-8859-1',
4064
        'ISO'         => 'ISO-8859-1',
4065
        'LATIN'       => 'ISO-8859-1',
4066
        'LATIN1'      => 'ISO-8859-1', // Western European
4067
        'ISO88592'    => 'ISO-8859-2',
4068
        'LATIN2'      => 'ISO-8859-2', // Central European
4069
        'ISO88593'    => 'ISO-8859-3',
4070
        'LATIN3'      => 'ISO-8859-3', // Southern European
4071
        'ISO88594'    => 'ISO-8859-4',
4072
        'LATIN4'      => 'ISO-8859-4', // Northern European
4073
        'ISO88595'    => 'ISO-8859-5',
4074
        'ISO88596'    => 'ISO-8859-6', // Greek
4075
        'ISO88597'    => 'ISO-8859-7',
4076
        'ISO88598'    => 'ISO-8859-8', // Hebrew
4077
        'ISO88599'    => 'ISO-8859-9',
4078
        'LATIN5'      => 'ISO-8859-9', // Turkish
4079
        'ISO885911'   => 'ISO-8859-11',
4080
        'TIS620'      => 'ISO-8859-11', // Thai
4081
        'ISO885910'   => 'ISO-8859-10',
4082
        'LATIN6'      => 'ISO-8859-10', // Nordic
4083
        'ISO885913'   => 'ISO-8859-13',
4084
        'LATIN7'      => 'ISO-8859-13', // Baltic
4085
        'ISO885914'   => 'ISO-8859-14',
4086
        'LATIN8'      => 'ISO-8859-14', // Celtic
4087
        'ISO885915'   => 'ISO-8859-15',
4088
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4089
        'ISO885916'   => 'ISO-8859-16',
4090
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4091
        'CP1250'      => 'WINDOWS-1250',
4092
        'WIN1250'     => 'WINDOWS-1250',
4093
        'WINDOWS1250' => 'WINDOWS-1250',
4094
        'CP1251'      => 'WINDOWS-1251',
4095
        'WIN1251'     => 'WINDOWS-1251',
4096
        'WINDOWS1251' => 'WINDOWS-1251',
4097
        'CP1252'      => 'WINDOWS-1252',
4098
        'WIN1252'     => 'WINDOWS-1252',
4099
        'WINDOWS1252' => 'WINDOWS-1252',
4100
        'CP1253'      => 'WINDOWS-1253',
4101
        'WIN1253'     => 'WINDOWS-1253',
4102
        'WINDOWS1253' => 'WINDOWS-1253',
4103
        'CP1254'      => 'WINDOWS-1254',
4104
        'WIN1254'     => 'WINDOWS-1254',
4105
        'WINDOWS1254' => 'WINDOWS-1254',
4106
        'CP1255'      => 'WINDOWS-1255',
4107
        'WIN1255'     => 'WINDOWS-1255',
4108
        'WINDOWS1255' => 'WINDOWS-1255',
4109
        'CP1256'      => 'WINDOWS-1256',
4110
        'WIN1256'     => 'WINDOWS-1256',
4111
        'WINDOWS1256' => 'WINDOWS-1256',
4112
        'CP1257'      => 'WINDOWS-1257',
4113
        'WIN1257'     => 'WINDOWS-1257',
4114
        'WINDOWS1257' => 'WINDOWS-1257',
4115
        'CP1258'      => 'WINDOWS-1258',
4116
        'WIN1258'     => 'WINDOWS-1258',
4117
        'WINDOWS1258' => 'WINDOWS-1258',
4118
        'UTF16'       => 'UTF-16',
4119
        'UTF32'       => 'UTF-32',
4120
        'UTF8'        => 'UTF-8',
4121
        'UTF'         => 'UTF-8',
4122
        'UTF7'        => 'UTF-7',
4123
        '8BIT'        => 'CP850',
4124
        'BINARY'      => 'CP850',
4125
    ];
4126
4127 5
    if (!empty($equivalences[$encodingUpperHelper])) {
4128 4
      $encoding = $equivalences[$encodingUpperHelper];
4129
    }
4130
4131 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4132
4133 5
    return $encoding;
4134
  }
4135
4136
  /**
4137
   * Standardize line ending to unix-like.
4138
   *
4139
   * @param string $str
4140
   *
4141
   * @return string
4142
   */
4143 5
  public static function normalize_line_ending(string $str): string
4144
  {
4145 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4146
  }
4147
4148
  /**
4149
   * Normalize some MS Word special characters.
4150
   *
4151
   * @param string $str <p>The string to be normalized.</p>
4152
   *
4153
   * @return string
4154
   */
4155 40
  public static function normalize_msword(string $str): string
4156
  {
4157 40
    if ('' === $str) {
4158 2
      return '';
4159
    }
4160
4161 40
    static $UTF8_MSWORD_KEYS_CACHE = null;
4162 40
    static $UTF8_MSWORD_VALUES_CACHE = null;
4163
4164 40
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4165
4166 1
      if (self::$UTF8_MSWORD === null) {
4167 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4168
      }
4169
4170 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4170
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4171 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4171
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4172
    }
4173
4174 40
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4175
  }
4176
4177
  /**
4178
   * Normalize the whitespace.
4179
   *
4180
   * @param string $str                     <p>The string to be normalized.</p>
4181
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4182
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4183
   *                                        bidirectional text chars.</p>
4184
   *
4185
   * @return string
4186
   */
4187 88
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4188
  {
4189 88
    if ('' === $str) {
4190 9
      return '';
4191
    }
4192
4193 88
    static $WHITESPACE_CACHE = [];
4194 88
    $cacheKey = (int)$keepNonBreakingSpace;
4195
4196 88
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4197
4198 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4199
4200 2
      if ($keepNonBreakingSpace === true) {
4201 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4202
      }
4203
4204 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4205
    }
4206
4207 88
    if ($keepBidiUnicodeControls === false) {
4208 88
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4209
4210 88
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4211 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4212
      }
4213
4214 88
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4215
    }
4216
4217 88
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4218
  }
4219
4220
  /**
4221
   * Calculates Unicode code point of the given UTF-8 encoded character.
4222
   *
4223
   * INFO: opposite to UTF8::chr()
4224
   *
4225
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4226
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4227
   *
4228
   * @return int
4229
   *             Unicode code point of the given character,<br>
4230
   *             0 on invalid UTF-8 byte sequence.
4231
   */
4232 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4233
  {
4234
    // init
4235 35
    $chr = (string)$chr;
4236
4237 35
    static $CHAR_CACHE = [];
4238
4239
    // save the original string
4240 35
    $chr_orig = $chr;
4241
4242 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4243 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4244
    }
4245
4246 35
    $cacheKey = $chr_orig . $encoding;
4247 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4248 26
      return $CHAR_CACHE[$cacheKey];
4249
    }
4250
4251 27
    if (self::$ORD === null) {
4252
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4253
    }
4254
4255 27
    if (isset(self::$ORD[$chr])) {
4256 27
      return self::$ORD[$chr];
4257
    }
4258
4259
    // check again, if it's still not UTF-8
4260 7
    if ($encoding !== 'UTF-8') {
4261 1
      $chr = self::encode($encoding, $chr);
4262
    }
4263
4264 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4265
      self::checkForSupport();
4266
    }
4267
4268 7
    if (self::$SUPPORT['intlChar'] === true) {
4269
      /** @noinspection PhpComposerExtensionStubsInspection */
4270 6
      $code = \IntlChar::ord($chr);
4271 6
      if ($code) {
4272 5
        return $CHAR_CACHE[$cacheKey] = $code;
4273
      }
4274
    }
4275
4276
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4277 2
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
4278 2
    $code = $chr ? $chr[1] : 0;
4279
4280 2
    if (0xF0 <= $code && isset($chr[4])) {
4281
      /** @noinspection UnnecessaryCastingInspection */
4282
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4283
    }
4284
4285 2
    if (0xE0 <= $code && isset($chr[3])) {
4286
      /** @noinspection UnnecessaryCastingInspection */
4287 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4288
    }
4289
4290 2
    if (0xC0 <= $code && isset($chr[2])) {
4291
      /** @noinspection UnnecessaryCastingInspection */
4292 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4293
    }
4294
4295 1
    return $CHAR_CACHE[$cacheKey] = $code;
4296
  }
4297
4298
  /**
4299
   * Parses the string into an array (into the the second parameter).
4300
   *
4301
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4302
   *          if the second parameter is not set!
4303
   *
4304
   * @link http://php.net/manual/en/function.parse-str.php
4305
   *
4306
   * @param string $str       <p>The input string.</p>
4307
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4308
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4309
   *
4310
   * @return bool
4311
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4312
   */
4313 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4314
  {
4315 2
    if ($cleanUtf8 === true) {
4316 2
      $str = self::clean($str);
4317
    }
4318
4319 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4320
      self::checkForSupport();
4321
    }
4322
4323 2
    if (self::$SUPPORT['mbstring'] === true) {
4324 2
      $return = \mb_parse_str($str, $result);
4325
4326 2
      return !($return === false || empty($result));
4327
    }
4328
4329
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4330
    \parse_str($str, $result);
4331
4332
    return !empty($result);
4333
  }
4334
4335
  /**
4336
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4337
   *
4338
   * @return bool
4339
   *              <strong>true</strong> if support is available,<br>
4340
   *              <strong>false</strong> otherwise.
4341
   */
4342 102
  public static function pcre_utf8_support(): bool
4343
  {
4344
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4345 102
    return (bool)@\preg_match('//u', '');
4346
  }
4347
4348
  /**
4349
   * Create an array containing a range of UTF-8 characters.
4350
   *
4351
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4352
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4353
   *
4354
   * @return string[]
4355
   */
4356 2
  public static function range($var1, $var2): array
4357
  {
4358 2
    if (!$var1 || !$var2) {
4359 2
      return [];
4360
    }
4361
4362 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4363
      self::checkForSupport();
4364
    }
4365
4366 2
    if (self::$SUPPORT['ctype'] === false) {
4367
      throw new \RuntimeException('ext-ctype: is not installed');
4368
    }
4369
4370
    /** @noinspection PhpComposerExtensionStubsInspection */
4371 2
    if (\ctype_digit((string)$var1)) {
4372 2
      $start = (int)$var1;
4373 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4374
      $start = (int)self::hex_to_int($var1);
4375
    } else {
4376 2
      $start = self::ord($var1);
4377
    }
4378
4379 2
    if (!$start) {
4380
      return [];
4381
    }
4382
4383
    /** @noinspection PhpComposerExtensionStubsInspection */
4384 2
    if (\ctype_digit((string)$var2)) {
4385 2
      $end = (int)$var2;
4386 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4387
      $end = (int)self::hex_to_int($var2);
4388
    } else {
4389 2
      $end = self::ord($var2);
4390
    }
4391
4392 2
    if (!$end) {
4393
      return [];
4394
    }
4395
4396 2
    return \array_map(
4397
        [
4398 2
            self::class,
4399
            'chr',
4400
        ],
4401 2
        \range($start, $end)
4402
    );
4403
  }
4404
4405
  /**
4406
   * Multi decode html entity & fix urlencoded-win1252-chars.
4407
   *
4408
   * e.g:
4409
   * 'test+test'                     => 'test+test'
4410
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4411
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4412
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4413
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4414
   * 'Düsseldorf'                   => 'Düsseldorf'
4415
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4416
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4417
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4418
   *
4419
   * @param string $str          <p>The input string.</p>
4420
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4421
   *
4422
   * @return string
4423
   */
4424 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4425
  {
4426 3
    if ('' === $str) {
4427 2
      return '';
4428
    }
4429
4430 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4431 3
    if (\preg_match($pattern, $str)) {
4432 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4433
    }
4434
4435 3
    $flags = ENT_QUOTES | ENT_HTML5;
4436
4437
    do {
4438 3
      $str_compare = $str;
4439
4440 3
      $str = self::fix_simple_utf8(
4441 3
          \rawurldecode(
4442 3
              self::html_entity_decode(
4443 3
                  self::to_utf8($str),
4444 3
                  $flags
4445
              )
4446
          )
4447
      );
4448
4449 3
    } while ($multi_decode === true && $str_compare !== $str);
4450
4451 3
    return $str;
4452
  }
4453
4454
  /**
4455
   * @param array $strings
4456
   * @param bool  $removeEmptyValues
4457
   * @param int   $removeShortValues
4458
   *
4459
   * @return array
4460
   */
4461 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4462
  {
4463
    // init
4464 2
    $return = [];
4465
4466 2
    foreach ($strings as $str) {
4467
      if (
4468 2
          $removeShortValues !== null
4469
          &&
4470 2
          self::strlen($str) <= $removeShortValues
4471
      ) {
4472 2
        continue;
4473
      }
4474
4475
      if (
4476 2
          $removeEmptyValues === true
4477
          &&
4478 2
          \trim($str) === ''
4479
      ) {
4480 2
        continue;
4481
      }
4482
4483 2
      $return[] = $str;
4484
    }
4485
4486 2
    return $return;
4487
  }
4488
4489
  /**
4490
   * Replaces all occurrences of $pattern in $str by $replacement.
4491
   *
4492
   * @param string $str         <p>The input string.</p>
4493
   * @param string $pattern     <p>The regular expression pattern.</p>
4494
   * @param string $replacement <p>The string to replace with.</p>
4495
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4496
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4497
   *
4498
   * @return string
4499
   */
4500 259
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4501
  {
4502 259
    if ($options === 'msr') {
4503 9
      $options = 'ms';
4504
    }
4505
4506
    // fallback
4507 259
    if (!$delimiter) {
4508
      $delimiter = '/';
4509
    }
4510
4511 259
    $str = (string)\preg_replace(
4512 259
        $delimiter . $pattern . $delimiter . 'u' . $options,
4513 259
        $replacement,
4514 259
        $str
4515
    );
4516
4517 259
    return $str;
4518
  }
4519
4520
  /**
4521
   * alias for "UTF8::remove_bom()"
4522
   *
4523
   * @see        UTF8::remove_bom()
4524
   *
4525
   * @param string $str
4526
   *
4527
   * @return string
4528
   *
4529
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4530
   */
4531
  public static function removeBOM(string $str): string
4532
  {
4533
    return self::remove_bom($str);
4534
  }
4535
4536
  /**
4537
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4538
   *
4539
   * @param string $str <p>The input string.</p>
4540
   *
4541
   * @return string String without UTF-BOM.
4542
   */
4543 81
  public static function remove_bom(string $str): string
4544
  {
4545 81
    if ('' === $str) {
4546 7
      return '';
4547
    }
4548
4549 81
    $strLength = self::strlen_in_byte($str);
4550 81
    foreach (self::$BOM as $bomString => $bomByteLength) {
4551 81
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4552 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4553 10
        if ($strTmp === false) {
4554
          return '';
4555
        }
4556
4557 10
        $strLength -= $bomByteLength;
4558
4559 81
        $str = (string)$strTmp;
4560
      }
4561
    }
4562
4563 81
    return $str;
4564
  }
4565
4566
  /**
4567
   * Removes duplicate occurrences of a string in another string.
4568
   *
4569
   * @param string          $str  <p>The base string.</p>
4570
   * @param string|string[] $what <p>String to search for in the base string.</p>
4571
   *
4572
   * @return string The result string with removed duplicates.
4573
   */
4574 2
  public static function remove_duplicates(string $str, $what = ' '): string
4575
  {
4576 2
    if (\is_string($what) === true) {
4577 2
      $what = [$what];
4578
    }
4579
4580 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4581
      /** @noinspection ForeachSourceInspection */
4582 2
      foreach ($what as $item) {
4583 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4584
      }
4585
    }
4586
4587 2
    return $str;
4588
  }
4589
4590
  /**
4591
   * Remove html via "strip_tags()" from the string.
4592
   *
4593
   * @param string $str
4594
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4595
   *                              not be stripped. Default: null
4596
   *                              </p>
4597
   *
4598
   * @return string
4599
   */
4600 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4601
  {
4602 6
    return \strip_tags($str, $allowableTags);
4603
  }
4604
4605
  /**
4606
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4607
   *
4608
   * @param string $str
4609
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4610
   *
4611
   * @return string
4612
   */
4613 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4614
  {
4615 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4616
  }
4617
4618
  /**
4619
   * Remove invisible characters from a string.
4620
   *
4621
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4622
   *
4623
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4624
   *
4625
   * @param string $str
4626
   * @param bool   $url_encoded
4627
   * @param string $replacement
4628
   *
4629
   * @return string
4630
   */
4631 115
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4632
  {
4633
    // init
4634 115
    $non_displayables = [];
4635
4636
    // every control character except newline (dec 10),
4637
    // carriage return (dec 13) and horizontal tab (dec 09)
4638 115
    if ($url_encoded) {
4639 115
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4640 115
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4641
    }
4642
4643 115
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4644
4645
    do {
4646 115
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4647 115
    } while ($count !== 0);
4648
4649 115
    return $str;
4650
  }
4651
4652
  /**
4653
   * Returns a new string with the prefix $substring removed, if present.
4654
   *
4655
   * @param string $str
4656
   * @param string $substring <p>The prefix to remove.</p>
4657
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4658
   *
4659
   * @return string String without the prefix $substring.
4660
   */
4661 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4662
  {
4663 12
    if (self::str_starts_with($str, $substring)) {
4664
4665 6
      return (string)self::substr(
4666 6
          $str,
4667 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4667
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4668 6
          null,
4669 6
          $encoding
4670
      );
4671
    }
4672
4673 6
    return $str;
4674
  }
4675
4676
  /**
4677
   * Returns a new string with the suffix $substring removed, if present.
4678
   *
4679
   * @param string $str
4680
   * @param string $substring <p>The suffix to remove.</p>
4681
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4682
   *
4683
   * @return string String having a $str without the suffix $substring.
4684
   */
4685 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4686
  {
4687 12
    if (self::str_ends_with($str, $substring)) {
4688
4689 6
      return (string)self::substr(
4690 6
          $str,
4691 6
          0,
4692 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4693
      );
4694
    }
4695
4696 6
    return $str;
4697
  }
4698
4699
  /**
4700
   * Replaces all occurrences of $search in $str by $replacement.
4701
   *
4702
   * @param string $str           <p>The input string.</p>
4703
   * @param string $search        <p>The needle to search for.</p>
4704
   * @param string $replacement   <p>The string to replace with.</p>
4705
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4706
   *
4707
   * @return string String after the replacements.
4708
   */
4709 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4710
  {
4711 29
    if ($caseSensitive) {
4712 22
      return self::str_replace($search, $replacement, $str);
4713
    }
4714
4715 7
    return self::str_ireplace($search, $replacement, $str);
4716
  }
4717
4718
  /**
4719
   * Replaces all occurrences of $search in $str by $replacement.
4720
   *
4721
   * @param string       $str           <p>The input string.</p>
4722
   * @param array        $search        <p>The elements to search for.</p>
4723
   * @param string|array $replacement   <p>The string to replace with.</p>
4724
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4725
   *
4726
   * @return string String after the replacements.
4727
   */
4728 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4729
  {
4730 30
    if ($caseSensitive) {
4731 23
      return self::str_replace($search, $replacement, $str);
4732
    }
4733
4734 7
    return self::str_ireplace($search, $replacement, $str);
4735
  }
4736
4737
  /**
4738
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4739
   *
4740
   * @param string $str                <p>The input string</p>
4741
   * @param string $replacementChar    <p>The replacement character.</p>
4742
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4743
   *
4744
   * @return string
4745
   */
4746 64
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4747
  {
4748 64
    if ('' === $str) {
4749 9
      return '';
4750
    }
4751
4752 64
    if ($processInvalidUtf8 === true) {
4753 64
      $replacementCharHelper = $replacementChar;
4754 64
      if ($replacementChar === '') {
4755 64
        $replacementCharHelper = 'none';
4756
      }
4757
4758 64
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4759
        self::checkForSupport();
4760
      }
4761
4762 64
      if (self::$SUPPORT['mbstring'] === false) {
4763
        // if there is no native support for "mbstring",
4764
        // then we need to clean the string before ...
4765
        $str = self::clean($str);
4766
      }
4767
4768
      // always fallback via symfony polyfill
4769 64
      $save = \mb_substitute_character();
4770 64
      \mb_substitute_character($replacementCharHelper);
4771 64
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4772 64
      \mb_substitute_character($save);
4773
4774 64
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4775 64
        $str = $strTmp;
4776
      } else {
4777
        $str = '';
4778
      }
4779
    }
4780
4781 64
    return str_replace(
4782
        [
4783 64
            "\xEF\xBF\xBD",
4784
            '�',
4785
        ],
4786
        [
4787 64
            $replacementChar,
4788 64
            $replacementChar,
4789
        ],
4790 64
        $str
4791
    );
4792
  }
4793
4794
  /**
4795
   * Strip whitespace or other characters from end of a UTF-8 string.
4796
   *
4797
   * @param string $str   <p>The string to be trimmed.</p>
4798
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4799
   *
4800
   * @return string The string with unwanted characters stripped from the right.
4801
   */
4802 22
  public static function rtrim(string $str = '', $chars = INF): string
4803
  {
4804 22
    if ('' === $str) {
4805 3
      return '';
4806
    }
4807
4808
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4809 21
    if ($chars === INF || !$chars) {
4810 16
      $pattern = "[\pZ\pC]+\$";
4811
    } else {
4812 8
      $chars = \preg_quote($chars, '/');
4813 8
      $pattern = "[$chars]+\$";
4814
    }
4815
4816 21
    return self::regex_replace($str, $pattern, '', '', '/');
4817
  }
4818
4819
  /**
4820
   * rxClass
4821
   *
4822
   * @param string $s
4823
   * @param string $class
4824
   *
4825
   * @return string
4826
   */
4827 47
  private static function rxClass(string $s, string $class = ''): string
4828
  {
4829 47
    static $RX_CLASSS_CACHE = [];
4830
4831 47
    $cacheKey = $s . $class;
4832
4833 47
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4834 35
      return $RX_CLASSS_CACHE[$cacheKey];
4835
    }
4836
4837
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4838 16
    $class = [$class];
4839
4840
    /** @noinspection SuspiciousLoopInspection */
4841 16
    foreach (self::str_split($s) as $s) {
4842 15
      if ('-' === $s) {
4843
        $class[0] = '-' . $class[0];
4844 15
      } elseif (!isset($s[2])) {
4845 15
        $class[0] .= \preg_quote($s, '/');
4846 1
      } elseif (1 === self::strlen($s)) {
4847 1
        $class[0] .= $s;
4848
      } else {
4849 15
        $class[] = $s;
4850
      }
4851
    }
4852
4853 16
    if ($class[0]) {
4854 16
      $class[0] = '[' . $class[0] . ']';
4855
    }
4856
4857 16
    if (1 === \count($class)) {
4858 16
      $return = $class[0];
4859
    } else {
4860
      $return = '(?:' . \implode('|', $class) . ')';
4861
    }
4862
4863 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4864
4865 16
    return $return;
4866
  }
4867
4868
  /**
4869
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4870
   */
4871 2
  public static function showSupport()
4872
  {
4873 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4874
      self::checkForSupport();
4875
    }
4876
4877 2
    echo '<pre>';
4878 2
    foreach (self::$SUPPORT as $key => $value) {
4879 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4880
    }
4881 2
    echo '</pre>';
4882 2
  }
4883
4884
  /**
4885
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4886
   *
4887
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4888
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4889
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4890
   *
4891
   * @return string The HTML numbered entity.
4892
   */
4893 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4894
  {
4895 2
    if ('' === $char) {
4896 2
      return '';
4897
    }
4898
4899
    if (
4900 2
        $keepAsciiChars === true
4901
        &&
4902 2
        self::is_ascii($char) === true
4903
    ) {
4904 2
      return $char;
4905
    }
4906
4907 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4908 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4909
    }
4910
4911 2
    return '&#' . self::ord($char, $encoding) . ';';
4912
  }
4913
4914
  /**
4915
   * @param string $str
4916
   * @param int    $tabLength
4917
   *
4918
   * @return string
4919
   */
4920 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4921
  {
4922 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4923
  }
4924
4925
  /**
4926
   * Convert a string to an array of Unicode characters.
4927
   *
4928
   * @param string|int|string[]|int[] $str       <p>The string to split into array.</p>
4929
   * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4930
   * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4931
   *
4932
   * @return string[] An array containing chunks of the string.
4933
   */
4934 87
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4935
  {
4936 87
    if ($length <= 0) {
4937 3
      return [];
4938
    }
4939
4940 86
    if (\is_array($str) === true) {
4941 2
      foreach ($str as $k => $v) {
4942 2
        $str[$k] = self::split($v, $length);
4943
      }
4944
4945 2
      return $str;
4946
    }
4947
4948
    // init
4949 86
    $str = (string)$str;
4950
4951 86
    if ('' === $str) {
4952 13
      return [];
4953
    }
4954
4955
    // init
4956 83
    $ret = [];
4957
4958 83
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4959
      self::checkForSupport();
4960
    }
4961
4962 83
    if ($cleanUtf8 === true) {
4963 19
      $str = self::clean($str);
4964
    }
4965
4966 83
    if (self::$SUPPORT['pcre_utf8'] === true) {
4967
4968 79
      \preg_match_all('/./us', $str, $retArray);
4969 79
      if (isset($retArray[0])) {
4970 79
        $ret = $retArray[0];
4971
      }
4972 79
      unset($retArray);
4973
4974
    } else {
4975
4976
      // fallback
4977
4978 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4979
        self::checkForSupport();
4980
      }
4981
4982 8
      $len = self::strlen_in_byte($str);
4983
4984
      /** @noinspection ForeachInvariantsInspection */
4985 8
      for ($i = 0; $i < $len; $i++) {
4986
4987 8
        if (($str[$i] & "\x80") === "\x00") {
4988
4989 8
          $ret[] = $str[$i];
4990
4991
        } elseif (
4992 8
            isset($str[$i + 1])
4993
            &&
4994 8
            ($str[$i] & "\xE0") === "\xC0"
4995
        ) {
4996
4997 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4998 4
            $ret[] = $str[$i] . $str[$i + 1];
4999
5000 4
            $i++;
5001
          }
5002
5003
        } elseif (
5004 6
            isset($str[$i + 2])
5005
            &&
5006 6
            ($str[$i] & "\xF0") === "\xE0"
5007
        ) {
5008
5009
          if (
5010 6
              ($str[$i + 1] & "\xC0") === "\x80"
5011
              &&
5012 6
              ($str[$i + 2] & "\xC0") === "\x80"
5013
          ) {
5014 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5015
5016 6
            $i += 2;
5017
          }
5018
5019
        } elseif (
5020
            isset($str[$i + 3])
5021
            &&
5022
            ($str[$i] & "\xF8") === "\xF0"
5023
        ) {
5024
5025
          if (
5026
              ($str[$i + 1] & "\xC0") === "\x80"
5027
              &&
5028
              ($str[$i + 2] & "\xC0") === "\x80"
5029
              &&
5030
              ($str[$i + 3] & "\xC0") === "\x80"
5031
          ) {
5032
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5033
5034
            $i += 3;
5035
          }
5036
5037
        }
5038
      }
5039
    }
5040
5041 83
    if ($length > 1) {
5042 11
      $ret = \array_chunk($ret, $length);
5043
5044 11
      return \array_map(
5045
          function ($item) {
5046 11
            return \implode('', $item);
5047 11
          }, $ret
5048
      );
5049
    }
5050
5051 76
    if (isset($ret[0]) && $ret[0] === '') {
5052
      return [];
5053
    }
5054
5055 76
    return $ret;
5056
  }
5057
5058
  /**
5059
   * Returns a camelCase version of the string. Trims surrounding spaces,
5060
   * capitalizes letters following digits, spaces, dashes and underscores,
5061
   * and removes spaces, dashes, as well as underscores.
5062
   *
5063
   * @param string      $str                   <p>The input string.</p>
5064
   * @param string      $encoding              [optional] <p>Default: UTF-8</p>
5065
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5066
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5067
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5068
   *
5069
   * @return string
5070
   */
5071 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
5072
  {
5073 32
    $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5074 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
5075
5076 32
    $str = (string)\preg_replace_callback(
5077 32
        '/[-_\s]+(.)?/u',
5078
        function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5079 27
          if (isset($match[1])) {
5080 27
            return UTF8::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5081
          }
5082
5083 1
          return '';
5084 32
        },
5085 32
        $str
5086
    );
5087
5088 32
    $str = (string)\preg_replace_callback(
5089 32
        '/[\d]+(.)?/u',
5090
        function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
5091 6
          return UTF8::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5092 32
        },
5093 32
        $str
5094
    );
5095
5096 32
    return $str;
5097
  }
5098
5099
  /**
5100
   * Returns the string with the first letter of each word capitalized,
5101
   * except for when the word is a name which shouldn't be capitalized.
5102
   *
5103
   * @param string $str
5104
   *
5105
   * @return string String with $str capitalized.
5106
   */
5107 1
  public static function str_capitalize_name(string $str): string
5108
  {
5109 1
    $str = self::collapse_whitespace($str);
5110
5111 1
    $str = self::str_capitalize_name_helper($str, ' ');
5112 1
    $str = self::str_capitalize_name_helper($str, '-');
5113
5114 1
    return $str;
5115
  }
5116
5117
  /**
5118
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5119
   *
5120
   * @param string $names
5121
   * @param string $delimiter
5122
   * @param string $encoding
5123
   *
5124
   * @return string
5125
   */
5126 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5127
  {
5128
    // init
5129 1
    $namesArray = \explode($delimiter, $names);
5130
5131 1
    if ($namesArray === false) {
5132
      return '';
5133
    }
5134
5135
    $specialCases = [
5136 1
        'names'    => [
5137
            'ab',
5138
            'af',
5139
            'al',
5140
            'and',
5141
            'ap',
5142
            'bint',
5143
            'binte',
5144
            'da',
5145
            'de',
5146
            'del',
5147
            'den',
5148
            'der',
5149
            'di',
5150
            'dit',
5151
            'ibn',
5152
            'la',
5153
            'mac',
5154
            'nic',
5155
            'of',
5156
            'ter',
5157
            'the',
5158
            'und',
5159
            'van',
5160
            'von',
5161
            'y',
5162
            'zu',
5163
        ],
5164
        'prefixes' => [
5165
            'al-',
5166
            "d'",
5167
            'ff',
5168
            "l'",
5169
            'mac',
5170
            'mc',
5171
            'nic',
5172
        ],
5173
    ];
5174
5175 1
    foreach ($namesArray as &$name) {
5176 1
      if (\in_array($name, $specialCases['names'], true)) {
5177 1
        continue;
5178
      }
5179
5180 1
      $continue = false;
5181
5182 1
      if ($delimiter == '-') {
5183 1
        foreach ($specialCases['names'] as $beginning) {
5184 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5185 1
            $continue = true;
5186
          }
5187
        }
5188
      }
5189
5190 1
      foreach ($specialCases['prefixes'] as $beginning) {
5191 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5192 1
          $continue = true;
5193
        }
5194
      }
5195
5196 1
      if ($continue) {
5197 1
        continue;
5198
      }
5199
5200 1
      $name = self::str_upper_first($name);
5201
    }
5202
5203 1
    return \implode($delimiter, $namesArray);
5204
  }
5205
5206
  /**
5207
   * Returns true if the string contains $needle, false otherwise. By default
5208
   * the comparison is case-sensitive, but can be made insensitive by setting
5209
   * $caseSensitive to false.
5210
   *
5211
   * @param string $haystack      <p>The input string.</p>
5212
   * @param string $needle        <p>Substring to look for.</p>
5213
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5214
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5215
   *
5216
   * @return bool Whether or not $haystack contains $needle.
5217
   */
5218 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5219
  {
5220 106
    if ('' === $haystack || '' === $needle) {
5221 1
      return false;
5222
    }
5223
5224
    // only a fallback to prevent BC in the api ...
5225 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5226 2
      $encoding = (string)$caseSensitive;
5227
    }
5228
5229 105
    if ($caseSensitive) {
5230 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5231
    }
5232
5233 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5234
  }
5235
5236
  /**
5237
   * Returns true if the string contains all $needles, false otherwise. By
5238
   * default the comparison is case-sensitive, but can be made insensitive by
5239
   * setting $caseSensitive to false.
5240
   *
5241
   * @param string $haystack      <p>The input string.</p>
5242
   * @param array  $needles       <p>SubStrings to look for.</p>
5243
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5244
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5245
   *
5246
   * @return bool Whether or not $haystack contains $needle.
5247
   */
5248 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5249
  {
5250 44
    if ('' === $haystack) {
5251
      return false;
5252
    }
5253
5254 44
    if (empty($needles)) {
5255 1
      return false;
5256
    }
5257
5258
    // only a fallback to prevent BC in the api ...
5259 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5260 1
      $encoding = (string)$caseSensitive;
5261
    }
5262
5263 43
    foreach ($needles as $needle) {
5264 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5265 43
        return false;
5266
      }
5267
    }
5268
5269 24
    return true;
5270
  }
5271
5272
  /**
5273
   * Returns true if the string contains any $needles, false otherwise. By
5274
   * default the comparison is case-sensitive, but can be made insensitive by
5275
   * setting $caseSensitive to false.
5276
   *
5277
   * @param string $haystack      <p>The input string.</p>
5278
   * @param array  $needles       <p>SubStrings to look for.</p>
5279
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5280
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5281
   *
5282
   * @return bool
5283
   *               Whether or not $str contains $needle.
5284
   */
5285 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5286
  {
5287 43
    if (empty($needles)) {
5288 1
      return false;
5289
    }
5290
5291 42
    foreach ($needles as $needle) {
5292 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5293 42
        return true;
5294
      }
5295
    }
5296
5297 18
    return false;
5298
  }
5299
5300
  /**
5301
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5302
   * inserted before uppercase characters (with the exception of the first
5303
   * character of the string), and in place of spaces as well as underscores.
5304
   *
5305
   * @param string $str      <p>The input string.</p>
5306
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5307
   *
5308
   * @return string
5309
   */
5310 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5311
  {
5312 19
    return self::str_delimit($str, '-', $encoding);
5313
  }
5314
5315
  /**
5316
   * Returns a lowercase and trimmed string separated by the given delimiter.
5317
   * Delimiters are inserted before uppercase characters (with the exception
5318
   * of the first character of the string), and in place of spaces, dashes,
5319
   * and underscores. Alpha delimiters are not converted to lowercase.
5320
   *
5321
   * @param string      $str                           <p>The input string.</p>
5322
   * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5323
   * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5324
   * @param bool        $cleanUtf8                     [optional] <p>Remove non UTF-8 chars from the string.</p>
5325
   * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5326
   *                                                   tr</p>
5327
   * @param bool        $tryToKeepStringLength         [optional] <p>true === try to keep the string length: e.g. ẞ ->
5328
   *                                                   ß</p>
5329
   *
5330
   * @return string
5331
   */
5332 49
  public static function str_delimit(
5333
      string $str,
5334
      string $delimiter,
5335
      string $encoding = 'UTF-8',
5336
      bool $cleanUtf8 = false,
5337
      string $lang = null,
5338
      bool $tryToKeepStringLength = false
5339
  ): string
5340
  {
5341 49
    $str = self::trim($str);
5342
5343 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5344
5345 49
    $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5346
5347 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5348
  }
5349
5350
  /**
5351
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5352
   *
5353
   * @param string $str <p>The input string.</p>
5354
   *
5355
   * @return false|string
5356
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5357
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5358
   */
5359 30
  public static function str_detect_encoding($str)
5360
  {
5361
    // init
5362 30
    $str = (string)$str;
5363
5364
    //
5365
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5366
    //
5367
5368 30
    if (self::is_binary($str, true) === true) {
5369
5370 10
      $isUtf16 = self::is_utf16($str, false);
5371 10
      if ($isUtf16 === 1) {
5372 2
        return 'UTF-16LE';
5373
      }
5374 10
      if ($isUtf16 === 2) {
5375 2
        return 'UTF-16BE';
5376
      }
5377
5378 8
      $isUtf32 = self::is_utf32($str, false);
5379 8
      if ($isUtf32 === 1) {
5380
        return 'UTF-32LE';
5381
      }
5382 8
      if ($isUtf32 === 2) {
5383
        return 'UTF-32BE';
5384
      }
5385
5386
      // is binary but not "UTF-16" or "UTF-32"
5387 8
      return false;
5388
    }
5389
5390
    //
5391
    // 2.) simple check for ASCII chars
5392
    //
5393
5394 26
    if (self::is_ascii($str) === true) {
5395 9
      return 'ASCII';
5396
    }
5397
5398
    //
5399
    // 3.) simple check for UTF-8 chars
5400
    //
5401
5402 26
    if (self::is_utf8($str) === true) {
5403 18
      return 'UTF-8';
5404
    }
5405
5406
    //
5407
    // 4.) check via "mb_detect_encoding()"
5408
    //
5409
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5410
5411
    $detectOrder = [
5412 16
        'ISO-8859-1',
5413
        'ISO-8859-2',
5414
        'ISO-8859-3',
5415
        'ISO-8859-4',
5416
        'ISO-8859-5',
5417
        'ISO-8859-6',
5418
        'ISO-8859-7',
5419
        'ISO-8859-8',
5420
        'ISO-8859-9',
5421
        'ISO-8859-10',
5422
        'ISO-8859-13',
5423
        'ISO-8859-14',
5424
        'ISO-8859-15',
5425
        'ISO-8859-16',
5426
        'WINDOWS-1251',
5427
        'WINDOWS-1252',
5428
        'WINDOWS-1254',
5429
        'CP932',
5430
        'CP936',
5431
        'CP950',
5432
        'CP866',
5433
        'CP850',
5434
        'CP51932',
5435
        'CP50220',
5436
        'CP50221',
5437
        'CP50222',
5438
        'ISO-2022-JP',
5439
        'ISO-2022-KR',
5440
        'JIS',
5441
        'JIS-ms',
5442
        'EUC-CN',
5443
        'EUC-JP',
5444
    ];
5445
5446 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5447
      self::checkForSupport();
5448
    }
5449
5450 16
    if (self::$SUPPORT['mbstring'] === true) {
5451
      // info: do not use the symfony polyfill here
5452 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5453 16
      if ($encoding) {
5454 16
        return $encoding;
5455
      }
5456
    }
5457
5458
    //
5459
    // 5.) check via "iconv()"
5460
    //
5461
5462
    if (self::$ENCODINGS === null) {
5463
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5464
    }
5465
5466
    foreach (self::$ENCODINGS as $encodingTmp) {
5467
      # INFO: //IGNORE but still throw notice
5468
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5469
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5470
        return $encodingTmp;
5471
      }
5472
    }
5473
5474
    return false;
5475
  }
5476
5477
  /**
5478
   * Check if the string ends with the given substring.
5479
   *
5480
   * @param string $haystack <p>The string to search in.</p>
5481
   * @param string $needle   <p>The substring to search for.</p>
5482
   *
5483
   * @return bool
5484
   */
5485 40
  public static function str_ends_with(string $haystack, string $needle): bool
5486
  {
5487 40
    if ('' === $haystack || '' === $needle) {
5488 4
      return false;
5489
    }
5490
5491 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5492
  }
5493
5494
  /**
5495
   * Returns true if the string ends with any of $substrings, false otherwise.
5496
   *
5497
   * - case-sensitive
5498
   *
5499
   * @param string   $str        <p>The input string.</p>
5500
   * @param string[] $substrings <p>Substrings to look for.</p>
5501
   *
5502
   * @return bool Whether or not $str ends with $substring.
5503
   */
5504 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5505
  {
5506 7
    if (empty($substrings)) {
5507
      return false;
5508
    }
5509
5510 7
    foreach ($substrings as $substring) {
5511 7
      if (self::str_ends_with($str, $substring)) {
5512 7
        return true;
5513
      }
5514
    }
5515
5516 6
    return false;
5517
  }
5518
5519
  /**
5520
   * Ensures that the string begins with $substring. If it doesn't, it's
5521
   * prepended.
5522
   *
5523
   * @param string $str       <p>The input string.</p>
5524
   * @param string $substring <p>The substring to add if not present.</p>
5525
   *
5526
   * @return string
5527
   */
5528 10
  public static function str_ensure_left(string $str, string $substring): string
5529
  {
5530 10
    if (!self::str_starts_with($str, $substring)) {
5531 4
      $str = $substring . $str;
5532
    }
5533
5534 10
    return $str;
5535
  }
5536
5537
  /**
5538
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5539
   *
5540
   * @param string $str       <p>The input string.</p>
5541
   * @param string $substring <p>The substring to add if not present.</p>
5542
   *
5543
   * @return string
5544
   */
5545 10
  public static function str_ensure_right(string $str, string $substring): string
5546
  {
5547 10
    if (!self::str_ends_with($str, $substring)) {
5548 4
      $str .= $substring;
5549
    }
5550
5551 10
    return $str;
5552
  }
5553
5554
  /**
5555
   * Capitalizes the first word of the string, replaces underscores with
5556
   * spaces, and strips '_id'.
5557
   *
5558
   * @param string $str
5559
   *
5560
   * @return string
5561
   */
5562 3
  public static function str_humanize($str): string
5563
  {
5564 3
    $str = self::str_replace(
5565
        [
5566 3
            '_id',
5567
            '_',
5568
        ],
5569
        [
5570 3
            '',
5571
            ' ',
5572
        ],
5573 3
        $str
5574
    );
5575
5576 3
    return self::ucfirst(self::trim($str));
5577
  }
5578
5579
  /**
5580
   * Check if the string ends with the given substring, case insensitive.
5581
   *
5582
   * @param string $haystack <p>The string to search in.</p>
5583
   * @param string $needle   <p>The substring to search for.</p>
5584
   *
5585
   * @return bool
5586
   */
5587 12
  public static function str_iends_with(string $haystack, string $needle): bool
5588
  {
5589 12
    if ('' === $haystack || '' === $needle) {
5590 2
      return false;
5591
    }
5592
5593 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5594 12
      return true;
5595
    }
5596
5597 8
    return false;
5598
  }
5599
5600
  /**
5601
   * Returns true if the string ends with any of $substrings, false otherwise.
5602
   *
5603
   * - case-insensitive
5604
   *
5605
   * @param string   $str        <p>The input string.</p>
5606
   * @param string[] $substrings <p>Substrings to look for.</p>
5607
   *
5608
   * @return bool Whether or not $str ends with $substring.
5609
   */
5610 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5611
  {
5612 4
    if (empty($substrings)) {
5613
      return false;
5614
    }
5615
5616 4
    foreach ($substrings as $substring) {
5617 4
      if (self::str_iends_with($str, $substring)) {
5618 4
        return true;
5619
      }
5620
    }
5621
5622
    return false;
5623
  }
5624
5625
  /**
5626
   * Returns the index of the first occurrence of $needle in the string,
5627
   * and false if not found. Accepts an optional offset from which to begin
5628
   * the search.
5629
   *
5630
   * @param string $str      <p>The input string.</p>
5631
   * @param string $needle   <p>Substring to look for.</p>
5632
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5633
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5634
   *
5635
   * @return int|false
5636
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5637
   */
5638 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5639
  {
5640 2
    return self::stripos(
5641 2
        $str,
5642 2
        $needle,
5643 2
        $offset,
5644 2
        $encoding
5645
    );
5646
  }
5647
5648
  /**
5649
   * Returns the index of the last occurrence of $needle in the string,
5650
   * and false if not found. Accepts an optional offset from which to begin
5651
   * the search. Offsets may be negative to count from the last character
5652
   * in the string.
5653
   *
5654
   * @param string $str      <p>The input string.</p>
5655
   * @param string $needle   <p>Substring to look for.</p>
5656
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5657
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5658
   *
5659
   * @return int|false
5660
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5661
   */
5662 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5663
  {
5664 2
    return self::strripos(
5665 2
        $str,
5666 2
        $needle,
5667 2
        $offset,
5668 2
        $encoding
5669
    );
5670
  }
5671
5672
  /**
5673
   * Returns the index of the first occurrence of $needle in the string,
5674
   * and false if not found. Accepts an optional offset from which to begin
5675
   * the search.
5676
   *
5677
   * @param string $str      <p>The input string.</p>
5678
   * @param string $needle   <p>Substring to look for.</p>
5679
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5680
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5681
   *
5682
   * @return int|false
5683
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5684
   */
5685 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5686
  {
5687 12
    return self::strpos(
5688 12
        $str,
5689 12
        $needle,
5690 12
        $offset,
5691 12
        $encoding
5692
    );
5693
  }
5694
5695
  /**
5696
   * Returns the index of the last occurrence of $needle in the string,
5697
   * and false if not found. Accepts an optional offset from which to begin
5698
   * the search. Offsets may be negative to count from the last character
5699
   * in the string.
5700
   *
5701
   * @param string $str      <p>The input string.</p>
5702
   * @param string $needle   <p>Substring to look for.</p>
5703
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5704
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5705
   *
5706
   * @return int|false
5707
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5708
   */
5709 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5710
  {
5711 12
    return self::strrpos(
5712 12
        $str,
5713 12
        $needle,
5714 12
        $offset,
5715 12
        $encoding
5716
    );
5717
  }
5718
5719
  /**
5720
   * Inserts $substring into the string at the $index provided.
5721
   *
5722
   * @param string $str       <p>The input string.</p>
5723
   * @param string $substring <p>String to be inserted.</p>
5724
   * @param int    $index     <p>The index at which to insert the substring.</p>
5725
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5726
   *
5727
   * @return string
5728
   */
5729 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5730
  {
5731 8
    $len = self::strlen($str, $encoding);
5732
5733 8
    if ($index > $len) {
5734 1
      return $str;
5735
    }
5736
5737 7
    $start = self::substr($str, 0, $index, $encoding);
5738 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5738
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5739
5740 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5740
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5740
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5741
  }
5742
5743
  /**
5744
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5745
   *
5746
   * @link  http://php.net/manual/en/function.str-ireplace.php
5747
   *
5748
   * @param mixed $search  <p>
5749
   *                       Every replacement with search array is
5750
   *                       performed on the result of previous replacement.
5751
   *                       </p>
5752
   * @param mixed $replace <p>
5753
   *                       </p>
5754
   * @param mixed $subject <p>
5755
   *                       If subject is an array, then the search and
5756
   *                       replace is performed with every entry of
5757
   *                       subject, and the return value is an array as
5758
   *                       well.
5759
   *                       </p>
5760
   * @param int   $count   [optional] <p>
5761
   *                       The number of matched and replaced needles will
5762
   *                       be returned in count which is passed by
5763
   *                       reference.
5764
   *                       </p>
5765
   *
5766
   * @return mixed A string or an array of replacements.
5767
   */
5768 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5769
  {
5770 41
    $search = (array)$search;
5771
5772
    /** @noinspection AlterInForeachInspection */
5773 41
    foreach ($search as &$s) {
5774 41
      $s = (string)$s;
5775 41
      if ('' === $s) {
5776 7
        $s = '/^(?<=.)$/';
5777
      } else {
5778 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5779
      }
5780
    }
5781
5782 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5783 41
    $count = $replace; // used as reference parameter
5784
5785 41
    return $subject;
5786
  }
5787
5788
  /**
5789
   * Check if the string starts with the given substring, case insensitive.
5790
   *
5791
   * @param string $haystack <p>The string to search in.</p>
5792
   * @param string $needle   <p>The substring to search for.</p>
5793
   *
5794
   * @return bool
5795
   */
5796 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5797
  {
5798 12
    if ('' === $haystack || '' === $needle) {
5799 2
      return false;
5800
    }
5801
5802 12
    if (self::stripos($haystack, $needle) === 0) {
5803 12
      return true;
5804
    }
5805
5806 4
    return false;
5807
  }
5808
5809
  /**
5810
   * Returns true if the string begins with any of $substrings, false otherwise.
5811
   *
5812
   * - case-insensitive
5813
   *
5814
   * @param string $str        <p>The input string.</p>
5815
   * @param array  $substrings <p>Substrings to look for.</p>
5816
   *
5817
   * @return bool Whether or not $str starts with $substring.
5818
   */
5819 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5820
  {
5821 4
    if ('' === $str) {
5822
      return false;
5823
    }
5824
5825 4
    if (empty($substrings)) {
5826
      return false;
5827
    }
5828
5829 4
    foreach ($substrings as $substring) {
5830 4
      if (self::str_istarts_with($str, $substring)) {
5831 4
        return true;
5832
      }
5833
    }
5834
5835
    return false;
5836
  }
5837
5838
  /**
5839
   * Gets the substring after the first occurrence of a separator.
5840
   *
5841
   * @param string $str       <p>The input string.</p>
5842
   * @param string $separator <p>The string separator.</p>
5843
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5844
   *
5845
   * @return string
5846
   */
5847 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5848
  {
5849
    if (
5850 1
        $separator === ''
5851
        ||
5852 1
        $str === ''
5853
    ) {
5854 1
      return '';
5855
    }
5856
5857 1
    $offset = self::str_iindex_first($str, $separator);
5858 1
    if ($offset === false) {
5859 1
      return '';
5860
    }
5861
5862 1
    return (string)self::substr(
5863 1
        $str,
5864 1
        $offset + self::strlen($separator, $encoding),
5865 1
        null,
5866 1
        $encoding
5867
    );
5868
  }
5869
5870
  /**
5871
   * Gets the substring after the last occurrence of a separator.
5872
   *
5873
   * @param string $str       <p>The input string.</p>
5874
   * @param string $separator <p>The string separator.</p>
5875
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5876
   *
5877
   * @return string
5878
   */
5879 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5880
  {
5881
    if (
5882 1
        $separator === ''
5883
        ||
5884 1
        $str === ''
5885
    ) {
5886 1
      return '';
5887
    }
5888
5889 1
    $offset = self::str_iindex_last($str, $separator);
5890 1
    if ($offset === false) {
5891 1
      return '';
5892
    }
5893
5894 1
    return (string)self::substr(
5895 1
        $str,
5896 1
        $offset + self::strlen($separator, $encoding),
5897 1
        null,
5898 1
        $encoding
5899
    );
5900
  }
5901
5902
  /**
5903
   * Gets the substring before the first occurrence of a separator.
5904
   *
5905
   * @param string $str       <p>The input string.</p>
5906
   * @param string $separator <p>The string separator.</p>
5907
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5908
   *
5909
   * @return string
5910
   */
5911 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5912
  {
5913
    if (
5914 1
        $separator === ''
5915
        ||
5916 1
        $str === ''
5917
    ) {
5918 1
      return '';
5919
    }
5920
5921 1
    $offset = self::str_iindex_first($str, $separator);
5922 1
    if ($offset === false) {
5923 1
      return '';
5924
    }
5925
5926 1
    return (string)self::substr($str, 0, $offset, $encoding);
5927
  }
5928
5929
  /**
5930
   * Gets the substring before the last occurrence of a separator.
5931
   *
5932
   * @param string $str       <p>The input string.</p>
5933
   * @param string $separator <p>The string separator.</p>
5934
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5935
   *
5936
   * @return string
5937
   */
5938 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5939
  {
5940
    if (
5941 1
        $separator === ''
5942
        ||
5943 1
        $str === ''
5944
    ) {
5945 1
      return '';
5946
    }
5947
5948 1
    $offset = self::str_iindex_last($str, $separator);
5949 1
    if ($offset === false) {
5950 1
      return '';
5951
    }
5952
5953 1
    return (string)self::substr($str, 0, $offset, $encoding);
5954
  }
5955
5956
  /**
5957
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5958
   *
5959
   * @param string $str          <p>The input string.</p>
5960
   * @param string $needle       <p>The string to look for.</p>
5961
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5962
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5963
   *
5964
   * @return string
5965
   */
5966 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5967
  {
5968
    if (
5969 2
        '' === $needle
5970
        ||
5971 2
        '' === $str
5972
    ) {
5973 2
      return '';
5974
    }
5975
5976 2
    $part = self::stristr(
5977 2
        $str,
5978 2
        $needle,
5979 2
        $beforeNeedle,
5980 2
        $encoding
5981
    );
5982 2
    if (false === $part) {
5983 2
      return '';
5984
    }
5985
5986 2
    return $part;
5987
  }
5988
5989
  /**
5990
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5991
   *
5992
   * @param string $str          <p>The input string.</p>
5993
   * @param string $needle       <p>The string to look for.</p>
5994
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5995
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5996
   *
5997
   * @return string
5998
   */
5999 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6000
  {
6001
    if (
6002 1
        '' === $needle
6003
        ||
6004 1
        '' === $str
6005
    ) {
6006 1
      return '';
6007
    }
6008
6009 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6010 1
    if (false === $part) {
6011 1
      return '';
6012
    }
6013
6014 1
    return $part;
6015
  }
6016
6017
  /**
6018
   * Returns the last $n characters of the string.
6019
   *
6020
   * @param string $str      <p>The input string.</p>
6021
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6022
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6023
   *
6024
   * @return string
6025
   */
6026 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6027
  {
6028 12
    if ($n <= 0) {
6029 4
      return '';
6030
    }
6031
6032 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
6033
6034 8
    return ($returnTmp === false ? '' : $returnTmp);
6035
  }
6036
6037
  /**
6038
   * Limit the number of characters in a string.
6039
   *
6040
   * @param string $str      <p>The input string.</p>
6041
   * @param int    $length   [optional] <p>Default: 100</p>
6042
   * @param string $strAddOn [optional] <p>Default: …</p>
6043
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6044
   *
6045
   * @return string
6046
   */
6047 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6048
  {
6049 2
    if ('' === $str) {
6050 2
      return '';
6051
    }
6052
6053 2
    if ($length <= 0) {
6054 2
      return '';
6055
    }
6056
6057 2
    if (self::strlen($str, $encoding) <= $length) {
6058 2
      return $str;
6059
    }
6060
6061 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6061
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
6062
  }
6063
6064
  /**
6065
   * Limit the number of characters in a string, but also after the next word.
6066
   *
6067
   * @param string $str      <p>The input string.</p>
6068
   * @param int    $length   [optional] <p>Default: 100</p>
6069
   * @param string $strAddOn [optional] <p>Default: …</p>
6070
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6071
   *
6072
   * @return string
6073
   */
6074 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
6075
  {
6076 6
    if ('' === $str) {
6077 2
      return '';
6078
    }
6079
6080 6
    if ($length <= 0) {
6081 2
      return '';
6082
    }
6083
6084 6
    if (self::strlen($str, $encoding) <= $length) {
6085 2
      return $str;
6086
    }
6087
6088 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6089 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6089
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
6090
    }
6091
6092 3
    $str = (string)self::substr($str, 0, $length, $encoding);
6093 3
    $array = \explode(' ', $str);
6094 3
    \array_pop($array);
6095 3
    $new_str = \implode(' ', $array);
6096
6097 3
    if ($new_str === '') {
6098 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
6099
    } else {
6100 3
      $str = $new_str . $strAddOn;
6101
    }
6102
6103 3
    return $str;
6104
  }
6105
6106
  /**
6107
   * Returns the longest common prefix between the string and $otherStr.
6108
   *
6109
   * @param string $str      <p>The input sting.</p>
6110
   * @param string $otherStr <p>Second string for comparison.</p>
6111
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6112
   *
6113
   * @return string
6114
   */
6115 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6116
  {
6117 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6118
6119 10
    $longestCommonPrefix = '';
6120 10
    for ($i = 0; $i < $maxLength; $i++) {
6121 8
      $char = self::substr($str, $i, 1, $encoding);
6122
6123 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6124 6
        $longestCommonPrefix .= $char;
6125
      } else {
6126 6
        break;
6127
      }
6128
    }
6129
6130 10
    return $longestCommonPrefix;
6131
  }
6132
6133
  /**
6134
   * Returns the longest common substring between the string and $otherStr.
6135
   * In the case of ties, it returns that which occurs first.
6136
   *
6137
   * @param string $str
6138
   * @param string $otherStr <p>Second string for comparison.</p>
6139
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6140
   *
6141
   * @return string String with its $str being the longest common substring.
6142
   */
6143 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6144
  {
6145
    // Uses dynamic programming to solve
6146
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6147 11
    $strLength = self::strlen($str, $encoding);
6148 11
    $otherLength = self::strlen($otherStr, $encoding);
6149
6150
    // Return if either string is empty
6151 11
    if ($strLength == 0 || $otherLength == 0) {
6152 2
      return '';
6153
    }
6154
6155 9
    $len = 0;
6156 9
    $end = 0;
6157 9
    $table = \array_fill(
6158 9
        0,
6159 9
        $strLength + 1,
6160 9
        \array_fill(0, $otherLength + 1, 0)
6161
    );
6162
6163 9
    for ($i = 1; $i <= $strLength; $i++) {
6164 9
      for ($j = 1; $j <= $otherLength; $j++) {
6165 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6166 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6167
6168 9
        if ($strChar == $otherChar) {
6169 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6170 8
          if ($table[$i][$j] > $len) {
6171 8
            $len = $table[$i][$j];
6172 8
            $end = $i;
6173
          }
6174
        } else {
6175 9
          $table[$i][$j] = 0;
6176
        }
6177
      }
6178
    }
6179
6180 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6181
6182 9
    return ($returnTmp === false ? '' : $returnTmp);
6183
  }
6184
6185
  /**
6186
   * Returns the longest common suffix between the string and $otherStr.
6187
   *
6188
   * @param string $str
6189
   * @param string $otherStr <p>Second string for comparison.</p>
6190
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6191
   *
6192
   * @return string
6193
   */
6194 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6195
  {
6196 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6197
6198 10
    $longestCommonSuffix = '';
6199 10
    for ($i = 1; $i <= $maxLength; $i++) {
6200 8
      $char = self::substr($str, -$i, 1, $encoding);
6201
6202 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6203 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6203
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6204
      } else {
6205 6
        break;
6206
      }
6207
    }
6208
6209 10
    return $longestCommonSuffix;
6210
  }
6211
6212
  /**
6213
   * Returns true if $str matches the supplied pattern, false otherwise.
6214
   *
6215
   * @param string $str     <p>The input string.</p>
6216
   * @param string $pattern <p>Regex pattern to match against.</p>
6217
   *
6218
   * @return bool Whether or not $str matches the pattern.
6219
   */
6220 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6221
  {
6222 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6223 87
      return true;
6224
    }
6225
6226 39
    return false;
6227
  }
6228
6229
  /**
6230
   * Returns whether or not a character exists at an index. Offsets may be
6231
   * negative to count from the last character in the string. Implements
6232
   * part of the ArrayAccess interface.
6233
   *
6234
   * @param string $str      <p>The input string.</p>
6235
   * @param int    $offset   <p>The index to check.</p>
6236
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6237
   *
6238
   *
6239
   * @return bool Whether or not the index exists.
6240
   */
6241 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6242
  {
6243
    // init
6244 6
    $length = self::strlen($str, $encoding);
6245
6246 6
    if ($offset >= 0) {
6247 3
      return ($length > $offset);
6248
    }
6249
6250 3
    return ($length >= \abs($offset));
6251
  }
6252
6253
  /**
6254
   * Returns the character at the given index. Offsets may be negative to
6255
   * count from the last character in the string. Implements part of the
6256
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6257
   * does not exist.
6258
   *
6259
   * @param string $str      <p>The input string.</p>
6260
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6261
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6262
   *
6263
   * @return string The character at the specified index.
6264
   *
6265
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6266
   */
6267 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6268
  {
6269
    // init
6270 2
    $length = self::strlen($str);
6271
6272
    if (
6273 2
        ($index >= 0 && $length <= $index)
6274
        ||
6275 2
        $length < \abs($index)
6276
    ) {
6277 1
      throw new \OutOfBoundsException('No character exists at the index');
6278
    }
6279
6280 1
    return self::char_at($str, $index, $encoding);
6281
  }
6282
6283
  /**
6284
   * Pad a UTF-8 string to given length with another string.
6285
   *
6286
   * @param string $str        <p>The input string.</p>
6287
   * @param int    $pad_length <p>The length of return string.</p>
6288
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6289
   * @param int    $pad_type   [optional] <p>
6290
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6291
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6292
   *                           </p>
6293
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6294
   *
6295
   * @return string Returns the padded string.
6296
   */
6297 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6298
  {
6299 41
    if ('' === $str) {
6300
      return '';
6301
    }
6302
6303 41
    if ($pad_type !== (int)$pad_type) {
6304 13
      if ($pad_type == 'left') {
6305 3
        $pad_type = STR_PAD_LEFT;
6306 10
      } elseif ($pad_type == 'right') {
6307 6
        $pad_type = STR_PAD_RIGHT;
6308 4
      } elseif ($pad_type == 'both') {
6309 3
        $pad_type = STR_PAD_BOTH;
6310
      } else {
6311 1
        throw new \InvalidArgumentException(
6312 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6313
        );
6314
      }
6315
    }
6316
6317 40
    $str_length = self::strlen($str, $encoding);
6318
6319
    if (
6320 40
        $pad_length > 0
6321
        &&
6322 40
        $pad_length >= $str_length
6323
    ) {
6324 39
      $ps_length = self::strlen($pad_string, $encoding);
6325
6326 39
      $diff = ($pad_length - $str_length);
6327
6328
      switch ($pad_type) {
6329 39
        case STR_PAD_LEFT:
6330 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6331 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6332 13
          $post = '';
6333 13
          break;
6334
6335 29
        case STR_PAD_BOTH:
6336 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6337 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6338 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6339 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6340 14
          break;
6341
6342 18
        case STR_PAD_RIGHT:
6343
        default:
6344 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6345 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6346 18
          $pre = '';
6347
      }
6348
6349 39
      return $pre . $str . $post;
6350
    }
6351
6352 4
    return $str;
6353
  }
6354
6355
  /**
6356
   * Returns a new string of a given length such that both sides of the
6357
   * string are padded. Alias for pad() with a $padType of 'both'.
6358
   *
6359
   * @param string $str
6360
   * @param int    $length   <p>Desired string length after padding.</p>
6361
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6362
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6363
   *
6364
   * @return string String with padding applied.
6365
   */
6366 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6367
  {
6368 11
    $padding = $length - self::strlen($str, $encoding);
6369
6370 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6371
  }
6372
6373
  /**
6374
   * Returns a new string of a given length such that the beginning of the
6375
   * string is padded. Alias for pad() with a $padType of 'left'.
6376
   *
6377
   * @param string $str
6378
   * @param int    $length   <p>Desired string length after padding.</p>
6379
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6380
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6381
   *
6382
   * @return string String with left padding.
6383
   */
6384 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6385
  {
6386 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6387
  }
6388
6389
  /**
6390
   * Returns a new string of a given length such that the end of the string
6391
   * is padded. Alias for pad() with a $padType of 'right'.
6392
   *
6393
   * @param string $str
6394
   * @param int    $length   <p>Desired string length after padding.</p>
6395
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6396
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6397
   *
6398
   * @return string String with right padding.
6399
   */
6400 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6401
  {
6402 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6403
  }
6404
6405
  /**
6406
   * Repeat a string.
6407
   *
6408
   * @param string $str        <p>
6409
   *                           The string to be repeated.
6410
   *                           </p>
6411
   * @param int    $multiplier <p>
6412
   *                           Number of time the input string should be
6413
   *                           repeated.
6414
   *                           </p>
6415
   *                           <p>
6416
   *                           multiplier has to be greater than or equal to 0.
6417
   *                           If the multiplier is set to 0, the function
6418
   *                           will return an empty string.
6419
   *                           </p>
6420
   *
6421
   * @return string The repeated string.
6422
   */
6423 9
  public static function str_repeat(string $str, int $multiplier): string
6424
  {
6425 9
    $str = self::filter($str);
6426
6427 9
    return \str_repeat($str, $multiplier);
6428
  }
6429
6430
  /**
6431
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6432
   *
6433
   * Replace all occurrences of the search string with the replacement string
6434
   *
6435
   * @link http://php.net/manual/en/function.str-replace.php
6436
   *
6437
   * @param mixed $search  <p>
6438
   *                       The value being searched for, otherwise known as the needle.
6439
   *                       An array may be used to designate multiple needles.
6440
   *                       </p>
6441
   * @param mixed $replace <p>
6442
   *                       The replacement value that replaces found search
6443
   *                       values. An array may be used to designate multiple replacements.
6444
   *                       </p>
6445
   * @param mixed $subject <p>
6446
   *                       The string or array being searched and replaced on,
6447
   *                       otherwise known as the haystack.
6448
   *                       </p>
6449
   *                       <p>
6450
   *                       If subject is an array, then the search and
6451
   *                       replace is performed with every entry of
6452
   *                       subject, and the return value is an array as
6453
   *                       well.
6454
   *                       </p>
6455
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6456
   *
6457
   * @return mixed This function returns a string or an array with the replaced values.
6458
   */
6459 60
  public static function str_replace($search, $replace, $subject, int &$count = null)
6460
  {
6461 60
    return \str_replace($search, $replace, $subject, $count);
6462
  }
6463
6464
  /**
6465
   * Replaces $search from the beginning of string with $replacement.
6466
   *
6467
   * @param string $str         <p>The input string.</p>
6468
   * @param string $search      <p>The string to search for.</p>
6469
   * @param string $replacement <p>The replacement.</p>
6470
   *
6471
   * @return string String after the replacements.
6472
   */
6473 17
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6474
  {
6475 17
    if ('' === $str) {
6476 4
      if ('' === $replacement) {
6477 2
        return '';
6478
      }
6479
6480 2
      if ('' === $search) {
6481 2
        return $replacement;
6482
      }
6483
    }
6484
6485 13
    if ('' === $search) {
6486 2
      return $str . $replacement;
6487
    }
6488
6489 11
    if (\strpos($str, $search) === 0) {
6490 9
      return $replacement . \substr($str, \strlen($search));
6491
    }
6492
6493 2
    return $str;
6494
  }
6495
6496
  /**
6497
   * Replaces $search from the beginning of string with $replacement.
6498
   *
6499
   * @param string $str         <p>The input string.</p>
6500
   * @param string $search      <p>The string to search for.</p>
6501
   * @param string $replacement <p>The replacement.</p>
6502
   *
6503
   * @return string String after the replacements.
6504
   */
6505 17
  public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6506
  {
6507 17
    if ('' === $str) {
6508 4
      if ('' === $replacement) {
6509 2
        return '';
6510
      }
6511
6512 2
      if ('' === $search) {
6513 2
        return $replacement;
6514
      }
6515
    }
6516
6517 13
    if ('' === $search) {
6518 2
      return $str . $replacement;
6519
    }
6520
6521 11
    if (\stripos($str, $search) === 0) {
6522 10
      return $replacement . \substr($str, \strlen($search));
6523
    }
6524
6525 1
    return $str;
6526
  }
6527
6528
  /**
6529
   * Replaces $search from the ending of string with $replacement.
6530
   *
6531
   * @param string $str         <p>The input string.</p>
6532
   * @param string $search      <p>The string to search for.</p>
6533
   * @param string $replacement <p>The replacement.</p>
6534
   *
6535
   * @return string String after the replacements.
6536
   */
6537 17
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6538
  {
6539 17
    if ('' === $str) {
6540 4
      if ('' === $replacement) {
6541 2
        return '';
6542
      }
6543
6544 2
      if ('' === $search) {
6545 2
        return $replacement;
6546
      }
6547
    }
6548
6549 13
    if ('' === $search) {
6550 2
      return $str . $replacement;
6551
    }
6552
6553 11
    if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6554 8
      $str = substr($str, 0, -\strlen($search)) . $replacement;
6555
    }
6556
6557 11
    return $str;
6558
  }
6559
6560
  /**
6561
   * Replaces $search from the ending of string with $replacement.
6562
   *
6563
   * @param string $str         <p>The input string.</p>
6564
   * @param string $search      <p>The string to search for.</p>
6565
   * @param string $replacement <p>The replacement.</p>
6566
   *
6567
   * @return string String after the replacements.
6568
   */
6569 17
  public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6570
  {
6571 17
    if ('' === $str) {
6572 4
      if ('' === $replacement) {
6573 2
        return '';
6574
      }
6575
6576 2
      if ('' === $search) {
6577 2
        return $replacement;
6578
      }
6579
    }
6580
6581 13
    if ('' === $search) {
6582 2
      return $str . $replacement;
6583
    }
6584
6585 11
    if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6586 9
      $str = substr($str, 0, -\strlen($search)) . $replacement;
6587
    }
6588
6589 11
    return $str;
6590
  }
6591
6592
  /**
6593
   * Replace the first "$search"-term with the "$replace"-term.
6594
   *
6595
   * @param string $search
6596
   * @param string $replace
6597
   * @param string $subject
6598
   *
6599
   * @return string
6600
   */
6601 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6602
  {
6603 2
    $pos = self::strpos($subject, $search);
6604 2
    if ($pos !== false) {
6605 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6605
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6606
    }
6607
6608 2
    return $subject;
6609
  }
6610
6611
  /**
6612
   * Replace the last "$search"-term with the "$replace"-term.
6613
   *
6614
   * @param string $search
6615
   * @param string $replace
6616
   * @param string $subject
6617
   *
6618
   * @return string
6619
   */
6620 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6621
  {
6622 2
    $pos = self::strrpos($subject, $search);
6623 2
    if ($pos !== false) {
6624 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6624
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6625
    }
6626
6627 2
    return $subject;
6628
  }
6629
6630
  /**
6631
   * Shuffles all the characters in the string.
6632
   *
6633
   * PS: uses random algorithm which is weak for cryptography purposes
6634
   *
6635
   * @param string $str <p>The input string</p>
6636
   *
6637
   * @return string The shuffled string.
6638
   */
6639 5
  public static function str_shuffle(string $str): string
6640
  {
6641 5
    $indexes = \range(0, self::strlen($str) - 1);
6642
    /** @noinspection NonSecureShuffleUsageInspection */
6643 5
    \shuffle($indexes);
6644
6645 5
    $shuffledStr = '';
6646 5
    foreach ($indexes as $i) {
6647 5
      $shuffledStr .= self::substr($str, $i, 1);
6648
    }
6649
6650 5
    return $shuffledStr;
6651
  }
6652
6653
  /**
6654
   * Returns the substring beginning at $start, and up to, but not including
6655
   * the index specified by $end. If $end is omitted, the function extracts
6656
   * the remaining string. If $end is negative, it is computed from the end
6657
   * of the string.
6658
   *
6659
   * @param string $str
6660
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6661
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6662
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6663
   *
6664
   * @return string|false
6665
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6666
   *                     characters long, <b>FALSE</b> will be returned.
6667
   */
6668 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6669
  {
6670 18
    if ($end === null) {
6671 6
      $length = self::strlen($str);
6672 12
    } elseif ($end >= 0 && $end <= $start) {
6673 4
      return '';
6674 8
    } elseif ($end < 0) {
6675 2
      $length = self::strlen($str) + $end - $start;
6676
    } else {
6677 6
      $length = $end - $start;
6678
    }
6679
6680 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6680
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6681
  }
6682
6683
  /**
6684
   * Convert a string to e.g.: "snake_case"
6685
   *
6686
   * @param string $str
6687
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6688
   *
6689
   * @return string String in snake_case.
6690
   */
6691 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6692
  {
6693 20
    $str = self::normalize_whitespace($str);
6694 20
    $str = \str_replace('-', '_', $str);
6695
6696 20
    $str = (string)\preg_replace_callback(
6697 20
        '/([\d|A-Z])/u',
6698
        function ($matches) use ($encoding) {
6699 8
          $match = $matches[1];
6700 8
          $matchInt = (int)$match;
6701
6702 8
          if ((string)$matchInt == $match) {
6703 4
            return '_' . $match . '_';
6704
          }
6705
6706 4
          return '_' . UTF8::strtolower($match, $encoding);
6707 20
        },
6708 20
        $str
6709
    );
6710
6711 20
    $str = (string)\preg_replace(
6712
        [
6713 20
            '/\s+/',        // convert spaces to "_"
6714
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6715
            '/_+/',         // remove double "_"
6716
        ],
6717
        [
6718 20
            '_',
6719
            '',
6720
            '_',
6721
        ],
6722 20
        $str
6723
    );
6724
6725 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6726 20
    $str = self::trim($str); // trim leading & trailing whitespace
6727
6728 20
    return $str;
6729
  }
6730
6731
  /**
6732
   * Sort all characters according to code points.
6733
   *
6734
   * @param string $str    <p>A UTF-8 string.</p>
6735
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6736
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6737
   *
6738
   * @return string String of sorted characters.
6739
   */
6740 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6741
  {
6742 2
    $array = self::codepoints($str);
6743
6744 2
    if ($unique) {
6745 2
      $array = \array_flip(\array_flip($array));
6746
    }
6747
6748 2
    if ($desc) {
6749 2
      \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6749
      \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6750
    } else {
6751 2
      \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6751
      \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6752
    }
6753
6754 2
    return self::string($array);
6755
  }
6756
6757
  /**
6758
   * alias for "UTF8::split()"
6759
   *
6760
   * @see UTF8::split()
6761
   *
6762
   * @param string|string[] $str
6763
   * @param int             $len
6764
   *
6765
   * @return string[]
6766
   */
6767 25
  public static function str_split($str, int $len = 1): array
6768
  {
6769 25
    return self::split($str, $len);
6770
  }
6771
6772
  /**
6773
   * Splits the string with the provided regular expression, returning an
6774
   * array of Stringy objects. An optional integer $limit will truncate the
6775
   * results.
6776
   *
6777
   * @param string $str
6778
   * @param string $pattern <p>The regex with which to split the string.</p>
6779
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6780
   *
6781
   * @return string[] An array of strings.
6782
   */
6783 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6784
  {
6785 16
    if ($limit === 0) {
6786 2
      return [];
6787
    }
6788
6789
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6790
    // and current versions of HHVM (3.8 and below)
6791 14
    if ($pattern === '') {
6792 1
      return [$str];
6793
    }
6794
6795
    // this->split returns the remaining unsplit string in the last index when
6796
    // supplying a limit
6797 13
    if ($limit > 0) {
6798 8
      ++$limit;
6799
    } else {
6800 5
      $limit = -1;
6801
    }
6802
6803 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6804
6805 13
    if ($array === false) {
6806
      return [];
6807
    }
6808
6809 13
    if ($limit > 0 && \count($array) === $limit) {
6810 4
      \array_pop($array);
6811
    }
6812
6813 13
    return $array;
6814
  }
6815
6816
  /**
6817
   * Check if the string starts with the given substring.
6818
   *
6819
   * @param string $haystack <p>The string to search in.</p>
6820
   * @param string $needle   <p>The substring to search for.</p>
6821
   *
6822
   * @return bool
6823
   */
6824 41
  public static function str_starts_with(string $haystack, string $needle): bool
6825
  {
6826 41
    if ('' === $haystack || '' === $needle) {
6827 4
      return false;
6828
    }
6829
6830 39
    if (\strpos($haystack, $needle) === 0) {
6831 19
      return true;
6832
    }
6833
6834 24
    return false;
6835
  }
6836
6837
  /**
6838
   * Returns true if the string begins with any of $substrings, false otherwise.
6839
   *
6840
   * - case-sensitive
6841
   *
6842
   * @param string $str        <p>The input string.</p>
6843
   * @param array  $substrings <p>Substrings to look for.</p>
6844
   *
6845
   * @return bool Whether or not $str starts with $substring.
6846
   */
6847 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6848
  {
6849 8
    if ('' === $str) {
6850
      return false;
6851
    }
6852
6853 8
    if (empty($substrings)) {
6854
      return false;
6855
    }
6856
6857 8
    foreach ($substrings as $substring) {
6858 8
      if (self::str_starts_with($str, $substring)) {
6859 8
        return true;
6860
      }
6861
    }
6862
6863 6
    return false;
6864
  }
6865
6866
  /**
6867
   * Gets the substring after the first occurrence of a separator.
6868
   *
6869
   * @param string $str       <p>The input string.</p>
6870
   * @param string $separator <p>The string separator.</p>
6871
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6872
   *
6873
   * @return string
6874
   */
6875 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6876
  {
6877
    if (
6878 1
        $separator === ''
6879
        ||
6880 1
        $str === ''
6881
    ) {
6882 1
      return '';
6883
    }
6884
6885 1
    $offset = self::str_index_first($str, $separator);
6886 1
    if ($offset === false) {
6887 1
      return '';
6888
    }
6889
6890 1
    return (string)self::substr(
6891 1
        $str,
6892 1
        $offset + self::strlen($separator, $encoding),
6893 1
        null,
6894 1
        $encoding
6895
    );
6896
  }
6897
6898
  /**
6899
   * Gets the substring after the last occurrence of a separator.
6900
   *
6901
   * @param string $str       <p>The input string.</p>
6902
   * @param string $separator <p>The string separator.</p>
6903
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6904
   *
6905
   * @return string
6906
   */
6907 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6908
  {
6909
    if (
6910 1
        $separator === ''
6911
        ||
6912 1
        $str === ''
6913
    ) {
6914 1
      return '';
6915
    }
6916
6917 1
    $offset = self::str_index_last($str, $separator);
6918 1
    if ($offset === false) {
6919 1
      return '';
6920
    }
6921
6922 1
    return (string)self::substr(
6923 1
        $str,
6924 1
        $offset + self::strlen($separator, $encoding),
6925 1
        null,
6926 1
        $encoding
6927
    );
6928
  }
6929
6930
  /**
6931
   * Gets the substring before the first occurrence of a separator.
6932
   *
6933
   * @param string $str       <p>The input string.</p>
6934
   * @param string $separator <p>The string separator.</p>
6935
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6936
   *
6937
   * @return string
6938
   */
6939 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6940
  {
6941
    if (
6942 1
        $separator === ''
6943
        ||
6944 1
        $str === ''
6945
    ) {
6946 1
      return '';
6947
    }
6948
6949 1
    $offset = self::str_index_first($str, $separator);
6950 1
    if ($offset === false) {
6951 1
      return '';
6952
    }
6953
6954 1
    return (string)self::substr(
6955 1
        $str,
6956 1
        0,
6957 1
        $offset,
6958 1
        $encoding
6959
    );
6960
  }
6961
6962
  /**
6963
   * Gets the substring before the last occurrence of a separator.
6964
   *
6965
   * @param string $str       <p>The input string.</p>
6966
   * @param string $separator <p>The string separator.</p>
6967
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6968
   *
6969
   * @return string
6970
   */
6971 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6972
  {
6973
    if (
6974 1
        $separator === ''
6975
        ||
6976 1
        $str === ''
6977
    ) {
6978 1
      return '';
6979
    }
6980
6981 1
    $offset = self::str_index_last($str, $separator);
6982 1
    if ($offset === false) {
6983 1
      return '';
6984
    }
6985
6986 1
    return (string)self::substr(
6987 1
        $str,
6988 1
        0,
6989 1
        $offset,
6990 1
        $encoding
6991
    );
6992
  }
6993
6994
  /**
6995
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6996
   *
6997
   * @param string $str          <p>The input string.</p>
6998
   * @param string $needle       <p>The string to look for.</p>
6999
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7000
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
7001
   *
7002
   * @return string
7003
   */
7004 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
7005
  {
7006
    if (
7007 2
        '' === $str
7008
        ||
7009 2
        '' === $needle
7010
    ) {
7011 2
      return '';
7012
    }
7013
7014 2
    $part = self::strstr(
7015 2
        $str,
7016 2
        $needle,
7017 2
        $beforeNeedle,
7018 2
        $encoding
7019
    );
7020 2
    if (false === $part) {
7021 2
      return '';
7022
    }
7023
7024 2
    return $part;
7025
  }
7026
7027
  /**
7028
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7029
   *
7030
   * @param string $str          <p>The input string.</p>
7031
   * @param string $needle       <p>The string to look for.</p>
7032
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7033
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
7034
   *
7035
   * @return string
7036
   */
7037 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
7038
  {
7039
    if (
7040 2
        '' === $str
7041
        ||
7042 2
        '' === $needle
7043
    ) {
7044 2
      return '';
7045
    }
7046
7047 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
7048 2
    if (false === $part) {
7049 2
      return '';
7050
    }
7051
7052 2
    return $part;
7053
  }
7054
7055
  /**
7056
   * Surrounds $str with the given substring.
7057
   *
7058
   * @param string $str
7059
   * @param string $substring <p>The substring to add to both sides.</P>
7060
   *
7061
   * @return string String with the substring both prepended and appended.
7062
   */
7063 5
  public static function str_surround(string $str, string $substring): string
7064
  {
7065 5
    return \implode('', [$substring, $str, $substring]);
7066
  }
7067
7068
  /**
7069
   * Returns a trimmed string with the first letter of each word capitalized.
7070
   * Also accepts an array, $ignore, allowing you to list words not to be
7071
   * capitalized.
7072
   *
7073
   * @param string              $str
7074
   * @param string[]|array|null $ignore                [optional] <p>An array of words not to capitalize or null.
7075
   *                                                   Default: null</p>
7076
   * @param string              $encoding              [optional] <p>Default: UTF-8</p>
7077
   * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7078
   * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7079
   *                                                   tr</p>
7080
   * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7081
   *                                                   ß</p>
7082
   * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7083
   *
7084
   * @return string The titleized string.
7085
   */
7086 10
  public static function str_titleize(
7087
      string $str,
7088
      array $ignore = null,
7089
      string $encoding = 'UTF-8',
7090
      bool $cleanUtf8 = false,
7091
      string $lang = null,
7092
      bool $tryToKeepStringLength = false,
7093
      bool $useTrimFirst = true
7094
  ): string
7095
  {
7096 10
    if ($useTrimFirst === true) {
7097 5
      $str = self::trim($str);
7098
    }
7099
7100 10
    $str_array = self::str_to_words($str);
7101
7102 10
    foreach ($str_array as &$str_tmp) {
7103
7104 10
      if ($ignore && \in_array($str_tmp, $ignore, true)) {
7105 2
        continue;
7106
      }
7107
7108 10
      $str_tmp = self::str_upper_first(
7109 10
          self::strtolower(
7110 10
              $str_tmp,
7111 10
              $encoding,
7112 10
              $cleanUtf8,
7113 10
              $lang,
7114 10
              $tryToKeepStringLength
7115
          ),
7116 10
          $encoding,
7117 10
          $cleanUtf8,
7118 10
          $lang,
7119 10
          $tryToKeepStringLength
7120
      );
7121
    }
7122
7123 10
    return \implode('', $str_array);
7124
  }
7125
7126
  /**
7127
   * Returns a trimmed string in proper title case.
7128
   *
7129
   * Also accepts an array, $ignore, allowing you to list words not to be
7130
   * capitalized.
7131
   *
7132
   * Adapted from John Gruber's script.
7133
   *
7134
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7135
   *
7136
   * @param string $str
7137
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
7138
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7139
   *
7140
   * @return string The titleized string.
7141
   */
7142 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7143
  {
7144 35
    $smallWords = \array_merge(
7145
        [
7146 35
            '(?<!q&)a',
7147
            'an',
7148
            'and',
7149
            'as',
7150
            'at(?!&t)',
7151
            'but',
7152
            'by',
7153
            'en',
7154
            'for',
7155
            'if',
7156
            'in',
7157
            'of',
7158
            'on',
7159
            'or',
7160
            'the',
7161
            'to',
7162
            'v[.]?',
7163
            'via',
7164
            'vs[.]?',
7165
        ],
7166 35
        $ignore
7167
    );
7168
7169 35
    $smallWordsRx = \implode('|', $smallWords);
7170 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7171
7172 35
    $str = self::trim($str);
7173
7174 35
    if (self::has_lowercase($str) === false) {
7175 2
      $str = self::strtolower($str);
7176
    }
7177
7178
    // The main substitutions
7179 35
    $str = (string)\preg_replace_callback(
7180
        '~\b (_*) (?:                                                              # 1. Leading underscore and
7181
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7182 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7183
                        |
7184 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7185
                        |
7186 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7187
                        |
7188 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7189
                      ) (_*) \b                                                           # 6. With trailing underscore
7190
                    ~ux',
7191
        function ($matches) use ($encoding) {
7192
          // Preserve leading underscore
7193 35
          $str = $matches[1];
7194 35
          if ($matches[2]) {
7195
            // Preserve URLs, domains, emails and file paths
7196 5
            $str .= $matches[2];
7197 35
          } elseif ($matches[3]) {
7198
            // Lower-case small words
7199 25
            $str .= self::strtolower($matches[3], $encoding);
7200 35
          } elseif ($matches[4]) {
7201
            // Capitalize word w/o internal caps
7202 34
            $str .= static::str_upper_first($matches[4], $encoding);
7203
          } else {
7204
            // Preserve other kinds of word (iPhone)
7205 7
            $str .= $matches[5];
7206
          }
7207
          // Preserve trailing underscore
7208 35
          $str .= $matches[6];
7209
7210 35
          return $str;
7211 35
        },
7212 35
        $str
7213
    );
7214
7215
    // Exceptions for small words: capitalize at start of title...
7216 35
    $str = (string)\preg_replace_callback(
7217
        '~(  \A [[:punct:]]*                # start of title...
7218
                      |  [:.;?!][ ]+               # or of subsentence...
7219
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7220 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7221
                     ~uxi',
7222
        function ($matches) use ($encoding) {
7223 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7224 35
        },
7225 35
        $str
7226
    );
7227
7228
    // ...and end of title
7229 35
    $str = (string)\preg_replace_callback(
7230 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7231
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7232
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7233
                     ~uxi',
7234
        function ($matches) use ($encoding) {
7235 3
          return static::str_upper_first($matches[1], $encoding);
7236 35
        },
7237 35
        $str
7238
    );
7239
7240
    // Exceptions for small words in hyphenated compound words
7241
    // e.g. "in-flight" -> In-Flight
7242 35
    $str = (string)\preg_replace_callback(
7243
        '~\b
7244
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7245 35
                        ( ' . $smallWordsRx . ' )
7246
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7247
                       ~uxi',
7248
        function ($matches) use ($encoding) {
7249
          return static::str_upper_first($matches[1], $encoding);
7250 35
        },
7251 35
        $str
7252
    );
7253
7254
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7255 35
    $str = (string)\preg_replace_callback(
7256
        '~\b
7257
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7258
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7259 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7260
                      (?!	- )                   # Negative lookahead for another -
7261
                     ~uxi',
7262
        function ($matches) use ($encoding) {
7263
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7264 35
        },
7265 35
        $str
7266
    );
7267
7268 35
    return $str;
7269
  }
7270
7271
  /**
7272
   * Get a binary representation of a specific string.
7273
   *
7274
   * @param string $str <p>The input string.</p>
7275
   *
7276
   * @return string
7277
   */
7278 2
  public static function str_to_binary(string $str): string
7279
  {
7280 2
    $value = \unpack('H*', $str);
7281
7282 2
    return \base_convert($value[1], 16, 2);
7283
  }
7284
7285
  /**
7286
   * @param string   $str
7287
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7288
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7289
   *
7290
   * @return string[]
7291
   */
7292 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7293
  {
7294 17
    if ('' === $str) {
7295 1
      return ($removeEmptyValues === true ? [] : ['']);
7296
    }
7297
7298 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7299
7300 16
    if ($return === false) {
7301
      return ($removeEmptyValues === true ? [] : ['']);
7302
    }
7303
7304
    if (
7305 16
        $removeShortValues === null
7306
        &&
7307 16
        $removeEmptyValues === false
7308
    ) {
7309 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7310
    }
7311
7312
    $tmpReturn = self::reduce_string_array(
7313
        $return,
7314
        $removeEmptyValues,
7315
        $removeShortValues
7316
    );
7317
7318
    return $tmpReturn;
7319
  }
7320
7321
  /**
7322
   * Convert a string into an array of words.
7323
   *
7324
   * @param string   $str
7325
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7326
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7327
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7328
   *
7329
   * @return string[]
7330
   */
7331 24
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7332
  {
7333 24
    if ('' === $str) {
7334 4
      return ($removeEmptyValues === true ? [] : ['']);
7335
    }
7336
7337 24
    $charList = self::rxClass($charList, '\pL');
7338
7339 24
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7340
7341 24
    if ($return === false) {
7342
      return ($removeEmptyValues === true ? [] : ['']);
7343
    }
7344
7345
    if (
7346 24
        $removeShortValues === null
7347
        &&
7348 24
        $removeEmptyValues === false
7349
    ) {
7350 24
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7351
    }
7352
7353 2
    $tmpReturn = self::reduce_string_array(
7354 2
        $return,
7355 2
        $removeEmptyValues,
7356 2
        $removeShortValues
7357
    );
7358
7359 2
    foreach ($tmpReturn as &$item) {
7360 2
      $item = (string)$item;
7361
    }
7362
7363 2
    return $tmpReturn;
7364
  }
7365
7366
  /**
7367
   * alias for "UTF8::to_ascii()"
7368
   *
7369
   * @see UTF8::to_ascii()
7370
   *
7371
   * @param string $str
7372
   * @param string $unknown
7373
   * @param bool   $strict
7374
   *
7375
   * @return string
7376
   */
7377 8
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7378
  {
7379 8
    return self::to_ascii($str, $unknown, $strict);
7380
  }
7381
7382
  /**
7383
   * Truncates the string to a given length. If $substring is provided, and
7384
   * truncating occurs, the string is further truncated so that the substring
7385
   * may be appended without exceeding the desired length.
7386
   *
7387
   * @param string $str
7388
   * @param int    $length    <p>Desired length of the truncated string.</p>
7389
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7390
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7391
   *
7392
   * @return string String after truncating.
7393
   */
7394 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7395
  {
7396
    // init
7397 22
    $str = (string)$str;
7398
7399 22
    if ('' === $str) {
7400
      return '';
7401
    }
7402
7403 22
    if ($length >= self::strlen($str, $encoding)) {
7404 4
      return $str;
7405
    }
7406
7407
    // Need to further trim the string so we can append the substring
7408 18
    $substringLength = self::strlen($substring, $encoding);
7409 18
    $length -= $substringLength;
7410
7411 18
    $truncated = self::substr($str, 0, $length, $encoding);
7412
7413 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7413
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7414
  }
7415
7416
  /**
7417
   * Truncates the string to a given length, while ensuring that it does not
7418
   * split words. If $substring is provided, and truncating occurs, the
7419
   * string is further truncated so that the substring may be appended without
7420
   * exceeding the desired length.
7421
   *
7422
   * @param string $str
7423
   * @param int    $length    <p>Desired length of the truncated string.</p>
7424
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7425
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7426
   *
7427
   * @return string String after truncating.
7428
   */
7429 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7430
  {
7431 23
    if ($length >= self::strlen($str, $encoding)) {
7432 4
      return $str;
7433
    }
7434
7435
    // need to further trim the string so we can append the substring
7436 19
    $substringLength = self::strlen($substring, $encoding);
7437 19
    $length -= $substringLength;
7438
7439 19
    $truncated = self::substr($str, 0, $length, $encoding);
7440 19
    if ($truncated === false) {
7441
      return '';
7442
    }
7443
7444
    // if the last word was truncated
7445 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7446 19
    if ($strPosSpace != $length) {
7447
      // find pos of the last occurrence of a space, get up to that
7448 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7449
7450 12
      if ($lastPos !== false || $strPosSpace !== false) {
7451 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7452
      }
7453
    }
7454
7455 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7455
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7456
7457 19
    return $str;
7458
  }
7459
7460
  /**
7461
   * Returns a lowercase and trimmed string separated by underscores.
7462
   * Underscores are inserted before uppercase characters (with the exception
7463
   * of the first character of the string), and in place of spaces as well as
7464
   * dashes.
7465
   *
7466
   * @param string $str
7467
   *
7468
   * @return string The underscored string.
7469
   */
7470 16
  public static function str_underscored(string $str): string
7471
  {
7472 16
    return self::str_delimit($str, '_');
7473
  }
7474
7475
  /**
7476
   * Returns an UpperCamelCase version of the supplied string. It trims
7477
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7478
   * and underscores, and removes spaces, dashes, underscores.
7479
   *
7480
   * @param string      $str                   <p>The input string.</p>
7481
   * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7482
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7483
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7484
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7485
   *
7486
   * @return string String in UpperCamelCase.
7487
   */
7488 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7489
  {
7490 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7491
  }
7492
7493
  /**
7494
   * alias for "UTF8::ucfirst()"
7495
   *
7496
   * @see UTF8::ucfirst()
7497
   *
7498
   * @param string      $str
7499
   * @param string      $encoding
7500
   * @param bool        $cleanUtf8
7501
   * @param string|null $lang
7502
   * @param bool        $tryToKeepStringLength
7503
   *
7504
   *
7505
   * @return string
7506
   */
7507 63
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7508
  {
7509 63
    return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7510
  }
7511
7512
  /**
7513
   * Counts number of words in the UTF-8 string.
7514
   *
7515
   * @param string $str      <p>The input string.</p>
7516
   * @param int    $format   [optional] <p>
7517
   *                         <strong>0</strong> => return a number of words (default)<br>
7518
   *                         <strong>1</strong> => return an array of words<br>
7519
   *                         <strong>2</strong> => return an array of words with word-offset as key
7520
   *                         </p>
7521
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7522
   *
7523
   * @return string[]|int The number of words in the string
7524
   */
7525 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7526
  {
7527 2
    $strParts = self::str_to_words($str, $charlist);
7528
7529 2
    $len = \count($strParts);
7530
7531 2
    if ($format === 1) {
7532
7533 2
      $numberOfWords = [];
7534 2
      for ($i = 1; $i < $len; $i += 2) {
7535 2
        $numberOfWords[] = $strParts[$i];
7536
      }
7537
7538 2
    } elseif ($format === 2) {
7539
7540 2
      $numberOfWords = [];
7541 2
      $offset = self::strlen($strParts[0]);
7542 2
      for ($i = 1; $i < $len; $i += 2) {
7543 2
        $numberOfWords[$offset] = $strParts[$i];
7544 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7545
      }
7546
7547
    } else {
7548
7549 2
      $numberOfWords = (int)(($len - 1) / 2);
7550
7551
    }
7552
7553 2
    return $numberOfWords;
7554
  }
7555
7556
  /**
7557
   * Case-insensitive string comparison.
7558
   *
7559
   * INFO: Case-insensitive version of UTF8::strcmp()
7560
   *
7561
   * @param string $str1     <p>The first string.</p>
7562
   * @param string $str2     <p>The second string.</p>
7563
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7564
   *
7565
   * @return int
7566
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7567
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7568
   *             <strong>0</strong> if they are equal.
7569
   */
7570 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7571
  {
7572 23
    return self::strcmp(
7573 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7574 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7575
    );
7576
  }
7577
7578
  /**
7579
   * alias for "UTF8::strstr()"
7580
   *
7581
   * @see UTF8::strstr()
7582
   *
7583
   * @param string $haystack
7584
   * @param string $needle
7585
   * @param bool   $before_needle
7586
   * @param string $encoding
7587
   * @param bool   $cleanUtf8
7588
   *
7589
   * @return string|false
7590
   */
7591 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7592
  {
7593 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7594
  }
7595
7596
  /**
7597
   * Case-sensitive string comparison.
7598
   *
7599
   * @param string $str1 <p>The first string.</p>
7600
   * @param string $str2 <p>The second string.</p>
7601
   *
7602
   * @return int
7603
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7604
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7605
   *              <strong>0</strong> if they are equal.
7606
   */
7607 29
  public static function strcmp(string $str1, string $str2): int
7608
  {
7609
    /** @noinspection PhpUndefinedClassInspection */
7610 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7611 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7612 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7613
    );
7614
  }
7615
7616
  /**
7617
   * Find length of initial segment not matching mask.
7618
   *
7619
   * @param string $str
7620
   * @param string $charList
7621
   * @param int    $offset
7622
   * @param int    $length
7623
   *
7624
   * @return int|null
7625
   */
7626 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7627
  {
7628 15
    if ('' === $charList) {
7629 1
      return null;
7630
    }
7631
7632 14
    if ($offset || $length !== null) {
7633 2
      $strTmp = self::substr($str, $offset, $length);
7634 2
      if ($strTmp === false) {
7635
        return null;
7636
      }
7637 2
      $str = $strTmp;
7638
    }
7639
7640 14
    if ('' === $str) {
7641 1
      return null;
7642
    }
7643
7644 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept array|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7644
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7645 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7646
    }
7647
7648 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7649
  }
7650
7651
  /**
7652
   * alias for "UTF8::stristr()"
7653
   *
7654
   * @see UTF8::stristr()
7655
   *
7656
   * @param string $haystack
7657
   * @param string $needle
7658
   * @param bool   $before_needle
7659
   * @param string $encoding
7660
   * @param bool   $cleanUtf8
7661
   *
7662
   * @return string|false
7663
   */
7664 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7665
  {
7666 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7667
  }
7668
7669
  /**
7670
   * Create a UTF-8 string from code points.
7671
   *
7672
   * INFO: opposite to UTF8::codepoints()
7673
   *
7674
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7675
   *
7676
   * @return string UTF-8 encoded string.
7677
   */
7678 4
  public static function string(array $array): string
7679
  {
7680 4
    return \implode(
7681 4
        '',
7682 4
        \array_map(
7683
            [
7684 4
                self::class,
7685
                'chr',
7686
            ],
7687 4
            $array
7688
        )
7689
    );
7690
  }
7691
7692
  /**
7693
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7694
   *
7695
   * @param string $str <p>The input string.</p>
7696
   *
7697
   * @return bool
7698
   *              <strong>true</strong> if the string has BOM at the start,<br>
7699
   *              <strong>false</strong> otherwise.
7700
   */
7701 6
  public static function string_has_bom(string $str): bool
7702
  {
7703 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7704 6
      if (0 === \strpos($str, $bomString)) {
7705 6
        return true;
7706
      }
7707
    }
7708
7709 6
    return false;
7710
  }
7711
7712
  /**
7713
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7714
   *
7715
   * @link http://php.net/manual/en/function.strip-tags.php
7716
   *
7717
   * @param string $str             <p>
7718
   *                                The input string.
7719
   *                                </p>
7720
   * @param string $allowable_tags  [optional] <p>
7721
   *                                You can use the optional second parameter to specify tags which should
7722
   *                                not be stripped.
7723
   *                                </p>
7724
   *                                <p>
7725
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7726
   *                                can not be changed with allowable_tags.
7727
   *                                </p>
7728
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7729
   *
7730
   * @return string The stripped string.
7731
   */
7732 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7733
  {
7734 4
    if ('' === $str) {
7735 1
      return '';
7736
    }
7737
7738 4
    if ($cleanUtf8 === true) {
7739 2
      $str = self::clean($str);
7740
    }
7741
7742 4
    return \strip_tags($str, $allowable_tags);
7743
  }
7744
7745
  /**
7746
   * Strip all whitespace characters. This includes tabs and newline
7747
   * characters, as well as multibyte whitespace such as the thin space
7748
   * and ideographic space.
7749
   *
7750
   * @param string $str
7751
   *
7752
   * @return string
7753
   */
7754 36
  public static function strip_whitespace(string $str): string
7755
  {
7756 36
    if ('' === $str) {
7757 3
      return '';
7758
    }
7759
7760 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7761
  }
7762
7763
  /**
7764
   * Finds position of first occurrence of a string within another, case insensitive.
7765
   *
7766
   * @link http://php.net/manual/en/function.mb-stripos.php
7767
   *
7768
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7769
   * @param string $needle    <p>The string to find in haystack.</p>
7770
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7771
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7772
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7773
   *
7774
   * @return int|false
7775
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7776
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7777
   */
7778 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7779
  {
7780 75
    if ('' === $haystack || '' === $needle) {
7781 5
      return false;
7782
    }
7783
7784 74
    if ($cleanUtf8 === true) {
7785
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7786
      // if invalid characters are found in $haystack before $needle
7787 1
      $haystack = self::clean($haystack);
7788 1
      $needle = self::clean($needle);
7789
    }
7790
7791 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7792 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7793
    }
7794
7795 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7796
      self::checkForSupport();
7797
    }
7798
7799 74
    if (self::$SUPPORT['mbstring'] === true) {
7800 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7801 74
      if ($returnTmp !== false) {
7802 54
        return $returnTmp;
7803
      }
7804
    }
7805
7806
    if (
7807 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7808
        &&
7809 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7810
        &&
7811 31
        self::$SUPPORT['intl'] === true
7812
    ) {
7813 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7814 31
      if ($returnTmp !== false) {
7815
        return $returnTmp;
7816
      }
7817
    }
7818
7819
    //
7820
    // fallback for ascii only
7821
    //
7822
7823 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7824 15
      return \stripos($haystack, $needle, $offset);
7825
    }
7826
7827
    //
7828
    // fallback via vanilla php
7829
    //
7830
7831 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7832 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7833
7834 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7835
  }
7836
7837
  /**
7838
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7839
   *
7840
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7841
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7842
   * @param bool   $before_needle  [optional] <p>
7843
   *                               If <b>TRUE</b>, it returns the part of the
7844
   *                               haystack before the first occurrence of the needle (excluding the needle).
7845
   *                               </p>
7846
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7847
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7848
   *
7849
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7850
   */
7851 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7852
  {
7853 19
    if ('' === $haystack || '' === $needle) {
7854 6
      return false;
7855
    }
7856
7857 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7858 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7859
    }
7860
7861 13
    if ($cleanUtf8 === true) {
7862
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7863
      // if invalid characters are found in $haystack before $needle
7864 1
      $needle = self::clean($needle);
7865 1
      $haystack = self::clean($haystack);
7866
    }
7867
7868 13
    if (!$needle) {
7869
      return $haystack;
7870
    }
7871
7872 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7873
      self::checkForSupport();
7874
    }
7875
7876
    if (
7877 13
        $encoding !== 'UTF-8'
7878
        &&
7879 13
        self::$SUPPORT['mbstring'] === false
7880
    ) {
7881
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7882
    }
7883
7884 13
    if (self::$SUPPORT['mbstring'] === true) {
7885 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7886
    }
7887
7888
    if (
7889
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7890
        &&
7891
        self::$SUPPORT['intl'] === true
7892
    ) {
7893
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7894
      if ($returnTmp !== false) {
7895
        return $returnTmp;
7896
      }
7897
    }
7898
7899
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7900
      return \stristr($haystack, $needle, $before_needle);
7901
    }
7902
7903
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7904
7905
    if (!isset($match[1])) {
7906
      return false;
7907
    }
7908
7909
    if ($before_needle) {
7910
      return $match[1];
7911
    }
7912
7913
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7913
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7914
  }
7915
7916
  /**
7917
   * Get the string length, not the byte-length!
7918
   *
7919
   * @link     http://php.net/manual/en/function.mb-strlen.php
7920
   *
7921
   * @param string $str       <p>The string being checked for length.</p>
7922
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7923
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7924
   *
7925
   * @return int|false
7926
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7927
   *             (One multi-byte character counted as +1).
7928
   *             <br>
7929
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7930
   */
7931 263
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7932
  {
7933 263
    if ('' === $str) {
7934 37
      return 0;
7935
    }
7936
7937 261
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7938 83
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7939
    }
7940
7941
    //
7942
    // fallback for binary || ascii only
7943
    //
7944
7945
    if (
7946 261
        $encoding === 'CP850'
7947
        ||
7948 261
        $encoding === 'ASCII'
7949
    ) {
7950 2
      return self::strlen_in_byte($str);
7951
    }
7952
7953 261
    if ($cleanUtf8 === true) {
7954
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7955
      // if invalid characters are found in $str
7956 4
      $str = self::clean($str);
7957
    }
7958
7959 261
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7960
      self::checkForSupport();
7961
    }
7962
7963
    if (
7964 261
        $encoding !== 'UTF-8'
7965
        &&
7966 261
        self::$SUPPORT['mbstring'] === false
7967
        &&
7968 261
        self::$SUPPORT['iconv'] === false
7969
    ) {
7970 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7971
    }
7972
7973
    //
7974
    // fallback via mbstring
7975
    //
7976
7977 261
    if (self::$SUPPORT['mbstring'] === true) {
7978 255
      $returnTmp = \mb_strlen($str, $encoding);
7979 255
      if ($returnTmp !== false) {
7980 255
        return $returnTmp;
7981
      }
7982
    }
7983
7984
    //
7985
    // fallback via iconv
7986
    //
7987
7988 8
    if (self::$SUPPORT['iconv'] === true) {
7989
      $returnTmp = \iconv_strlen($str, $encoding);
7990
      if ($returnTmp !== false) {
7991
        return $returnTmp;
7992
      }
7993
    }
7994
7995
    //
7996
    // fallback via intl
7997
    //
7998
7999
    if (
8000 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8001
        &&
8002 8
        self::$SUPPORT['intl'] === true
8003
    ) {
8004
      $returnTmp = \grapheme_strlen($str);
8005
      if ($returnTmp !== null) {
8006
        return $returnTmp;
8007
      }
8008
    }
8009
8010
    //
8011
    // fallback for ascii only
8012
    //
8013
8014 8
    if (self::is_ascii($str)) {
8015 4
      return \strlen($str);
8016
    }
8017
8018
    //
8019
    // fallback via vanilla php
8020
    //
8021
8022 8
    \preg_match_all('/./us', $str, $parts);
8023
8024 8
    $returnTmp = \count($parts[0]);
8025 8
    if ($returnTmp === 0 && isset($str[0])) {
8026
      return false;
8027
    }
8028
8029 8
    return $returnTmp;
8030
  }
8031
8032
  /**
8033
   * Get string length in byte.
8034
   *
8035
   * @param string $str
8036
   *
8037
   * @return int
8038
   */
8039 194
  public static function strlen_in_byte(string $str): int
8040
  {
8041 194
    if ($str === '') {
8042
      return 0;
8043
    }
8044
8045 194
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8046
      self::checkForSupport();
8047
    }
8048
8049 194
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8050
      // "mb_" is available if overload is used, so use it ...
8051
      return \mb_strlen($str, 'CP850'); // 8-BIT
8052
    }
8053
8054 194
    return \strlen($str);
8055
  }
8056
8057
  /**
8058
   * Case insensitive string comparisons using a "natural order" algorithm.
8059
   *
8060
   * INFO: natural order version of UTF8::strcasecmp()
8061
   *
8062
   * @param string $str1     <p>The first string.</p>
8063
   * @param string $str2     <p>The second string.</p>
8064
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8065
   *
8066
   * @return int
8067
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8068
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8069
   *             <strong>0</strong> if they are equal
8070
   */
8071 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8072
  {
8073 2
    return self::strnatcmp(
8074 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
8075 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
8076
    );
8077
  }
8078
8079
  /**
8080
   * String comparisons using a "natural order" algorithm
8081
   *
8082
   * INFO: natural order version of UTF8::strcmp()
8083
   *
8084
   * @link  http://php.net/manual/en/function.strnatcmp.php
8085
   *
8086
   * @param string $str1 <p>The first string.</p>
8087
   * @param string $str2 <p>The second string.</p>
8088
   *
8089
   * @return int
8090
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8091
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8092
   *             <strong>0</strong> if they are equal
8093
   */
8094 4
  public static function strnatcmp(string $str1, string $str2): int
8095
  {
8096 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
8097
  }
8098
8099
  /**
8100
   * Case-insensitive string comparison of the first n characters.
8101
   *
8102
   * @link  http://php.net/manual/en/function.strncasecmp.php
8103
   *
8104
   * @param string $str1     <p>The first string.</p>
8105
   * @param string $str2     <p>The second string.</p>
8106
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8107
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8108
   *
8109
   * @return int
8110
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8111
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8112
   *             <strong>0</strong> if they are equal
8113
   */
8114 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
8115
  {
8116 2
    return self::strncmp(
8117 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
8118 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
8119 2
        $len
8120
    );
8121
  }
8122
8123
  /**
8124
   * String comparison of the first n characters.
8125
   *
8126
   * @link  http://php.net/manual/en/function.strncmp.php
8127
   *
8128
   * @param string $str1 <p>The first string.</p>
8129
   * @param string $str2 <p>The second string.</p>
8130
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
8131
   *
8132
   * @return int
8133
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8134
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8135
   *             <strong>0</strong> if they are equal
8136
   */
8137 4
  public static function strncmp(string $str1, string $str2, int $len): int
8138
  {
8139 4
    $str1 = (string)self::substr($str1, 0, $len);
8140 4
    $str2 = (string)self::substr($str2, 0, $len);
8141
8142 4
    return self::strcmp($str1, $str2);
8143
  }
8144
8145
  /**
8146
   * Search a string for any of a set of characters.
8147
   *
8148
   * @link  http://php.net/manual/en/function.strpbrk.php
8149
   *
8150
   * @param string $haystack  <p>The string where char_list is looked for.</p>
8151
   * @param string $char_list <p>This parameter is case sensitive.</p>
8152
   *
8153
   * @return string|false String starting from the character found, or false if it is not found.
8154
   */
8155 2
  public static function strpbrk(string $haystack, string $char_list)
8156
  {
8157 2
    if ('' === $haystack || '' === $char_list) {
8158 2
      return false;
8159
    }
8160
8161 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8162 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
8163
    }
8164
8165 2
    return false;
8166
  }
8167
8168
  /**
8169
   * Find position of first occurrence of string in a string.
8170
   *
8171
   * @link http://php.net/manual/en/function.mb-strpos.php
8172
   *
8173
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8174
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8175
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8176
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8177
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8178
   *
8179
   * @return int|false
8180
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8181
   *                   string.<br> If needle is not found it returns false.
8182
   */
8183 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
8184
  {
8185 142
    if ('' === $haystack) {
8186 4
      return false;
8187
    }
8188
8189
    // iconv and mbstring do not support integer $needle
8190 141
    if ((int)$needle === $needle && $needle >= 0) {
8191
      $needle = (string)self::chr($needle);
8192
    }
8193 141
    $needle = (string)$needle;
8194
8195 141
    if ('' === $needle) {
8196 2
      return false;
8197
    }
8198
8199 141
    if ($cleanUtf8 === true) {
8200
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8201
      // if invalid characters are found in $haystack before $needle
8202 3
      $needle = self::clean($needle);
8203 3
      $haystack = self::clean($haystack);
8204
    }
8205
8206 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8207 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8208
    }
8209
8210 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8211
      self::checkForSupport();
8212
    }
8213
8214
    //
8215
    // fallback for binary || ascii only
8216
    //
8217
8218
    if (
8219 141
        $encoding === 'CP850'
8220
        ||
8221 141
        $encoding === 'ASCII'
8222
    ) {
8223 2
      return self::strpos_in_byte($haystack, $needle, $offset);
8224
    }
8225
8226
    if (
8227 141
        $encoding !== 'UTF-8'
8228
        &&
8229 141
        self::$SUPPORT['iconv'] === false
8230
        &&
8231 141
        self::$SUPPORT['mbstring'] === false
8232
    ) {
8233 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8234
    }
8235
8236
    //
8237
    // fallback via mbstring
8238
    //
8239
8240 141
    if (self::$SUPPORT['mbstring'] === true) {
8241 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8242 141
      if ($returnTmp !== false) {
8243 86
        return $returnTmp;
8244
      }
8245
    }
8246
8247
    //
8248
    // fallback via intl
8249
    //
8250
8251
    if (
8252 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8253
        &&
8254 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8255
        &&
8256 69
        self::$SUPPORT['intl'] === true
8257
    ) {
8258 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8259 69
      if ($returnTmp !== false) {
8260
        return $returnTmp;
8261
      }
8262
    }
8263
8264
    //
8265
    // fallback via iconv
8266
    //
8267
8268
    if (
8269 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8270
        &&
8271 69
        self::$SUPPORT['iconv'] === true
8272
    ) {
8273
      // ignore invalid negative offset to keep compatibility
8274
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8275 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8276 69
      if ($returnTmp !== false) {
8277
        return $returnTmp;
8278
      }
8279
    }
8280
8281
    //
8282
    // fallback for ascii only
8283
    //
8284
8285 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8286 35
      return \strpos($haystack, $needle, $offset);
8287
    }
8288
8289
    //
8290
    // fallback via vanilla php
8291
    //
8292
8293 39
    if ($haystackIsAscii) {
8294
      $haystackTmp = \substr($haystack, $offset);
8295
    } else {
8296 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8297
    }
8298 39
    if ($haystackTmp === false) {
8299
      $haystackTmp = '';
8300
    }
8301 39
    $haystack = (string)$haystackTmp;
8302
8303 39
    if ($offset < 0) {
8304 2
      $offset = 0;
8305
    }
8306
8307 39
    $pos = \strpos($haystack, $needle);
8308 39
    if ($pos === false) {
8309 39
      return false;
8310
    }
8311
8312 4
    if ($pos) {
8313 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8314
    }
8315
8316 2
    return ($offset + 0);
8317
  }
8318
8319
  /**
8320
   * Find position of first occurrence of string in a string.
8321
   *
8322
   * @param string $haystack <p>
8323
   *                         The string being checked.
8324
   *                         </p>
8325
   * @param string $needle   <p>
8326
   *                         The position counted from the beginning of haystack.
8327
   *                         </p>
8328
   * @param int    $offset   [optional] <p>
8329
   *                         The search offset. If it is not specified, 0 is used.
8330
   *                         </p>
8331
   *
8332
   * @return int|false The numeric position of the first occurrence of needle in the
8333
   *                   haystack string. If needle is not found, it returns false.
8334
   */
8335 83
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8336
  {
8337 83
    if ($haystack === '' || $needle === '') {
8338
      return false;
8339
    }
8340
8341 83
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8342
      self::checkForSupport();
8343
    }
8344
8345 83
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8346
      // "mb_" is available if overload is used, so use it ...
8347
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8348
    }
8349
8350 83
    return \strpos($haystack, $needle, $offset);
8351
  }
8352
8353
  /**
8354
   * Finds the last occurrence of a character in a string within another.
8355
   *
8356
   * @link http://php.net/manual/en/function.mb-strrchr.php
8357
   *
8358
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8359
   * @param string $needle        <p>The string to find in haystack</p>
8360
   * @param bool   $before_needle [optional] <p>
8361
   *                              Determines which portion of haystack
8362
   *                              this function returns.
8363
   *                              If set to true, it returns all of haystack
8364
   *                              from the beginning to the last occurrence of needle.
8365
   *                              If set to false, it returns all of haystack
8366
   *                              from the last occurrence of needle to the end,
8367
   *                              </p>
8368
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8369
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8370
   *
8371
   * @return string|false The portion of haystack or false if needle is not found.
8372
   */
8373 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8374
  {
8375 4
    if ('' === $haystack || '' === $needle) {
8376 2
      return false;
8377
    }
8378
8379 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8380 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8381
    }
8382
8383 4
    if ($cleanUtf8 === true) {
8384
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8385
      // if invalid characters are found in $haystack before $needle
8386 2
      $needle = self::clean($needle);
8387 2
      $haystack = self::clean($haystack);
8388
    }
8389
8390 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8391
      self::checkForSupport();
8392
    }
8393
8394
    if (
8395 4
        $encoding !== 'UTF-8'
8396
        &&
8397 4
        self::$SUPPORT['mbstring'] === false
8398
    ) {
8399
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8400
    }
8401
8402 4
    if (self::$SUPPORT['mbstring'] === true) {
8403 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8404
    }
8405
8406
    //
8407
    // fallback for binary || ascii only
8408
    //
8409
8410
    if (
8411
        $before_needle === false
8412
        &&
8413
        (
8414
            $encoding === 'CP850'
8415
            ||
8416
            $encoding === 'ASCII'
8417
        )
8418
    ) {
8419
      return \strrchr($haystack, $needle);
8420
    }
8421
8422
    //
8423
    // fallback via iconv
8424
    //
8425
8426
    if (self::$SUPPORT['iconv'] === true) {
8427
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8428
      if ($needleTmp === false) {
8429
        return false;
8430
      }
8431
      $needle = (string)$needleTmp;
8432
8433
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8434
      if (false === $pos) {
8435
        return false;
8436
      }
8437
8438
      if ($before_needle) {
8439
        return self::substr($haystack, 0, $pos, $encoding);
8440
      }
8441
8442
      return self::substr($haystack, $pos, null, $encoding);
8443
    }
8444
8445
    //
8446
    // fallback via vanilla php
8447
    //
8448
8449
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8450
    if ($needleTmp === false) {
8451
      return false;
8452
    }
8453
    $needle = (string)$needleTmp;
8454
8455
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8456
    if ($pos === false) {
8457
      return false;
8458
    }
8459
8460
    if ($before_needle) {
8461
      return self::substr($haystack, 0, $pos, $encoding);
8462
    }
8463
8464
    return self::substr($haystack, $pos, null, $encoding);
8465
  }
8466
8467
  /**
8468
   * Reverses characters order in the string.
8469
   *
8470
   * @param string $str <p>The input string.</p>
8471
   *
8472
   * @return string The string with characters in the reverse sequence.
8473
   */
8474 10
  public static function strrev(string $str): string
8475
  {
8476 10
    if ('' === $str) {
8477 4
      return '';
8478
    }
8479
8480 8
    $reversed = '';
8481 8
    $i = self::strlen($str);
8482 8
    while ($i--) {
8483 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8483
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8484
    }
8485
8486 8
    return $reversed;
8487
  }
8488
8489
  /**
8490
   * Finds the last occurrence of a character in a string within another, case insensitive.
8491
   *
8492
   * @link http://php.net/manual/en/function.mb-strrichr.php
8493
   *
8494
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8495
   * @param string $needle         <p>The string to find in haystack.</p>
8496
   * @param bool   $before_needle  [optional] <p>
8497
   *                               Determines which portion of haystack
8498
   *                               this function returns.
8499
   *                               If set to true, it returns all of haystack
8500
   *                               from the beginning to the last occurrence of needle.
8501
   *                               If set to false, it returns all of haystack
8502
   *                               from the last occurrence of needle to the end,
8503
   *                               </p>
8504
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8505
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8506
   *
8507
   * @return string|false The portion of haystack or<br>false if needle is not found.
8508
   */
8509 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8510
  {
8511 3
    if ('' === $haystack || '' === $needle) {
8512 2
      return false;
8513
    }
8514
8515 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8516 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8517
    }
8518
8519 3
    if ($cleanUtf8 === true) {
8520
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8521
      // if invalid characters are found in $haystack before $needle
8522 2
      $needle = self::clean($needle);
8523 2
      $haystack = self::clean($haystack);
8524
    }
8525
8526 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8527
      self::checkForSupport();
8528
    }
8529
8530
    //
8531
    // fallback via mbstring
8532
    //
8533
8534 3
    if (self::$SUPPORT['mbstring'] === true) {
8535 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8536
    }
8537
8538
    //
8539
    // fallback via vanilla php
8540
    //
8541
8542
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8543
    if ($needleTmp === false) {
8544
      return false;
8545
    }
8546
    $needle = (string)$needleTmp;
8547
8548
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8549
    if ($pos === false) {
8550
      return false;
8551
    }
8552
8553
    if ($before_needle) {
8554
      return self::substr($haystack, 0, $pos, $encoding);
8555
    }
8556
8557
    return self::substr($haystack, $pos, null, $encoding);
8558
  }
8559
8560
  /**
8561
   * Find position of last occurrence of a case-insensitive string.
8562
   *
8563
   * @param string     $haystack  <p>The string to look in.</p>
8564
   * @param string|int $needle    <p>The string to look for.</p>
8565
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8566
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8567
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8568
   *
8569
   * @return int|false
8570
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8571
   *                   string.<br>If needle is not found, it returns false.
8572
   */
8573 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8574
  {
8575 4
    if ('' === $haystack) {
8576
      return false;
8577
    }
8578
8579
    // iconv and mbstring do not support integer $needle
8580 4
    if ((int)$needle === $needle && $needle >= 0) {
8581
      $needle = (string)self::chr($needle);
8582
    }
8583 4
    $needle = (string)$needle;
8584
8585 4
    if ('' === $needle) {
8586
      return false;
8587
    }
8588
8589 4
    if ($cleanUtf8 === true) {
8590
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8591 2
      $needle = self::clean($needle);
8592 2
      $haystack = self::clean($haystack);
8593
    }
8594
8595 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8596 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8597
    }
8598
8599
    //
8600
    // fallback for binary || ascii only
8601
    //
8602
8603
    if (
8604 4
        $encoding === 'CP850'
8605
        ||
8606 4
        $encoding === 'ASCII'
8607
    ) {
8608
      return self::strripos_in_byte($haystack, $needle, $offset);
8609
    }
8610
8611 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8612
      self::checkForSupport();
8613
    }
8614
8615
    if (
8616 4
        $encoding !== 'UTF-8'
8617
        &&
8618 4
        self::$SUPPORT['mbstring'] === false
8619
    ) {
8620
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8621
    }
8622
8623
    //
8624
    // fallback via mbstrig
8625
    //
8626
8627 4
    if (self::$SUPPORT['mbstring'] === true) {
8628 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8629
    }
8630
8631
    //
8632
    // fallback via intl
8633
    //
8634
8635
    if (
8636
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8637
        &&
8638
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8639
        &&
8640
        self::$SUPPORT['intl'] === true
8641
    ) {
8642
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8643
      if ($returnTmp !== false) {
8644
        return $returnTmp;
8645
      }
8646
    }
8647
8648
    //
8649
    // fallback for ascii only
8650
    //
8651
8652
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8653
      return self::strripos_in_byte($haystack, $needle, $offset);
8654
    }
8655
8656
    //
8657
    // fallback via vanilla php
8658
    //
8659
8660
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8661
    $needle = self::strtocasefold($needle, true, false, $encoding);
8662
8663
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8664
  }
8665
8666
  /**
8667
   * Finds position of last occurrence of a string within another, case insensitive.
8668
   *
8669
   * @param string $haystack <p>
8670
   *                         The string from which to get the position of the last occurrence
8671
   *                         of needle.
8672
   *                         </p>
8673
   * @param string $needle   <p>
8674
   *                         The string to find in haystack.
8675
   *                         </p>
8676
   * @param int    $offset   [optional] <p>
8677
   *                         The position in haystack
8678
   *                         to start searching.
8679
   *                         </p>
8680
   *
8681
   * @return int|false Return the numeric position of the last occurrence of needle in the
8682
   *                   haystack string, or false if needle is not found.
8683
   */
8684
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8685
  {
8686
    if ($haystack === '' || $needle === '') {
8687
      return false;
8688
    }
8689
8690
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8691
      self::checkForSupport();
8692
    }
8693
8694
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8695
      // "mb_" is available if overload is used, so use it ...
8696
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8697
    }
8698
8699
    return \strripos($haystack, $needle, $offset);
8700
  }
8701
8702
  /**
8703
   * Find position of last occurrence of a string in a string.
8704
   *
8705
   * @link http://php.net/manual/en/function.mb-strrpos.php
8706
   *
8707
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8708
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8709
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8710
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8711
   *                              the end of the string.
8712
   *                              </p>
8713
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8714
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8715
   *
8716
   * @return int|false
8717
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8718
   *                   string.<br>If needle is not found, it returns false.
8719
   */
8720 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8721
  {
8722 38
    if ('' === $haystack) {
8723 3
      return false;
8724
    }
8725
8726
    // iconv and mbstring do not support integer $needle
8727 37
    if ((int)$needle === $needle && $needle >= 0) {
8728 2
      $needle = (string)self::chr($needle);
8729
    }
8730 37
    $needle = (string)$needle;
8731
8732 37
    if ('' === $needle) {
8733 2
      return false;
8734
    }
8735
8736 37
    if ($cleanUtf8 === true) {
8737
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8738 4
      $needle = self::clean($needle);
8739 4
      $haystack = self::clean($haystack);
8740
    }
8741
8742 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8743 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8744
    }
8745
8746
    //
8747
    // fallback for binary || ascii only
8748
    //
8749
8750
    if (
8751 37
        $encoding === 'CP850'
8752
        ||
8753 37
        $encoding === 'ASCII'
8754
    ) {
8755 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8755
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8756
    }
8757
8758 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8759
      self::checkForSupport();
8760
    }
8761
8762
    if (
8763 37
        $encoding !== 'UTF-8'
8764
        &&
8765 37
        self::$SUPPORT['mbstring'] === false
8766
    ) {
8767
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8768
    }
8769
8770
    //
8771
    // fallback via mbstring
8772
    //
8773
8774 37
    if (self::$SUPPORT['mbstring'] === true) {
8775 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8776
    }
8777
8778
    //
8779
    // fallback via intl
8780
    //
8781
8782
    if (
8783
        $offset !== null
8784
        &&
8785
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8786
        &&
8787
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8788
        &&
8789
        self::$SUPPORT['intl'] === true
8790
    ) {
8791
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8792
      if ($returnTmp !== false) {
8793
        return $returnTmp;
8794
      }
8795
    }
8796
8797
    //
8798
    // fallback for ascii only
8799
    //
8800
8801
    if (
8802
        $offset !== null
8803
        &&
8804
        self::is_ascii($haystack)
8805
        &&
8806
        self::is_ascii($needle)
8807
    ) {
8808
      return self::strrpos_in_byte($haystack, $needle, $offset);
8809
    }
8810
8811
    //
8812
    // fallback via vanilla php
8813
    //
8814
8815
    $haystackTmp = null;
8816
    if ($offset > 0) {
8817
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8817
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8818
    } elseif ($offset < 0) {
8819
      $haystackTmp = self::substr($haystack, 0, $offset);
8820
      $offset = 0;
8821
    }
8822
8823
    if ($haystackTmp !== null) {
8824
      if ($haystackTmp === false) {
8825
        $haystackTmp = '';
8826
      }
8827
      $haystack = (string)$haystackTmp;
8828
    }
8829
8830
    $pos = self::strrpos_in_byte($haystack, $needle);
8831
    if ($pos === false) {
8832
      return false;
8833
    }
8834
8835
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8836
  }
8837
8838
  /**
8839
   * Find position of last occurrence of a string in a string.
8840
   *
8841
   * @param string $haystack <p>
8842
   *                         The string being checked, for the last occurrence
8843
   *                         of needle.
8844
   *                         </p>
8845
   * @param string $needle   <p>
8846
   *                         The string to find in haystack.
8847
   *                         </p>
8848
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8849
   *                         the string. Negative values will stop searching at an arbitrary point
8850
   *                         prior to the end of the string.
8851
   *
8852
   * @return int|false The numeric position of the last occurrence of needle in the
8853
   *                   haystack string. If needle is not found, it returns false.
8854
   */
8855 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8856
  {
8857 2
    if ($haystack === '' || $needle === '') {
8858
      return false;
8859
    }
8860
8861 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8862
      self::checkForSupport();
8863
    }
8864
8865 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8866
      // "mb_" is available if overload is used, so use it ...
8867
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8868
    }
8869
8870 2
    return \strrpos($haystack, $needle, $offset);
8871
  }
8872
8873
  /**
8874
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8875
   * mask.
8876
   *
8877
   * @param string $str    <p>The input string.</p>
8878
   * @param string $mask   <p>The mask of chars</p>
8879
   * @param int    $offset [optional]
8880
   * @param int    $length [optional]
8881
   *
8882
   * @return int
8883
   */
8884 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8885
  {
8886 10
    if ($offset || $length !== null) {
8887 2
      $strTmp = self::substr($str, $offset, $length);
8888 2
      if ($strTmp === false) {
8889
        $strTmp = '';
8890
      }
8891 2
      $str = (string)$strTmp;
8892
    }
8893
8894 10
    if ('' === $str || '' === $mask) {
8895 2
      return 0;
8896
    }
8897
8898 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
$str of type string is incompatible with the type array|null expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8898
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
8899
  }
8900
8901
  /**
8902
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8903
   *
8904
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8905
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8906
   * @param bool   $before_needle  [optional] <p>
8907
   *                               If <b>TRUE</b>, strstr() returns the part of the
8908
   *                               haystack before the first occurrence of the needle (excluding the needle).
8909
   *                               </p>
8910
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8911
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8912
   *
8913
   * @return string|false
8914
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8915
   */
8916 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8917
  {
8918 5
    if ('' === $haystack || '' === $needle) {
8919 2
      return false;
8920
    }
8921
8922 5
    if ($cleanUtf8 === true) {
8923
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8924
      // if invalid characters are found in $haystack before $needle
8925
      $needle = self::clean($needle);
8926
      $haystack = self::clean($haystack);
8927
    }
8928
8929 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8930 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8931
    }
8932
8933
    //
8934
    // fallback for binary || ascii only
8935
    //
8936
8937
    if (
8938 5
        $encoding === 'CP850'
8939
        ||
8940 5
        $encoding === 'ASCII'
8941
    ) {
8942
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8943
    }
8944
8945 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8946
      self::checkForSupport();
8947
    }
8948
8949
    if (
8950 5
        $encoding !== 'UTF-8'
8951
        &&
8952 5
        self::$SUPPORT['mbstring'] === false
8953
    ) {
8954
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8955
    }
8956
8957
    //
8958
    // fallback via mbstring
8959
    //
8960
8961 5
    if (self::$SUPPORT['mbstring'] === true) {
8962 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8963
    }
8964
8965
    //
8966
    // fallback via intl
8967
    //
8968
8969
    if (
8970
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8971
        &&
8972
        self::$SUPPORT['intl'] === true
8973
    ) {
8974
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8975
      if ($returnTmp !== false) {
8976
        return $returnTmp;
8977
      }
8978
    }
8979
8980
    //
8981
    // fallback for ascii only
8982
    //
8983
8984
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8985
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8986
    }
8987
8988
    //
8989
    // fallback via vanilla php
8990
    //
8991
8992
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8993
8994
    if (!isset($match[1])) {
8995
      return false;
8996
    }
8997
8998
    if ($before_needle) {
8999
      return $match[1];
9000
    }
9001
9002
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9002
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
9003
  }
9004
9005
  /**
9006
   *  * Finds first occurrence of a string within another.
9007
   *
9008
   * @param string $haystack      <p>
9009
   *                              The string from which to get the first occurrence
9010
   *                              of needle.
9011
   *                              </p>
9012
   * @param string $needle        <p>
9013
   *                              The string to find in haystack.
9014
   *                              </p>
9015
   * @param bool   $before_needle [optional] <p>
9016
   *                              Determines which portion of haystack
9017
   *                              this function returns.
9018
   *                              If set to true, it returns all of haystack
9019
   *                              from the beginning to the first occurrence of needle.
9020
   *                              If set to false, it returns all of haystack
9021
   *                              from the first occurrence of needle to the end,
9022
   *                              </p>
9023
   *
9024
   * @return string|false The portion of haystack,
9025
   *                      or false if needle is not found.
9026
   */
9027
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9028
  {
9029
    if ($haystack === '' || $needle === '') {
9030
      return false;
9031
    }
9032
9033
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9034
      self::checkForSupport();
9035
    }
9036
9037
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9038
      // "mb_" is available if overload is used, so use it ...
9039
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9040
    }
9041
9042
    return \strstr($haystack, $needle, $before_needle);
9043
  }
9044
9045
  /**
9046
   * Unicode transformation for case-less matching.
9047
   *
9048
   * @link http://unicode.org/reports/tr21/tr21-5.html
9049
   *
9050
   * @param string      $str       <p>The input string.</p>
9051
   * @param bool        $full      [optional] <p>
9052
   *                               <b>true</b>, replace full case folding chars (default)<br>
9053
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9054
   *                               </p>
9055
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9056
   * @param string      $encoding  [optional] <p>Set the charset.</p>
9057
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9058
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
9059
   *                               for some languages better ...</p>
9060
   *
9061
   * @return string
9062
   */
9063 53
  public static function strtocasefold(
9064
      string $str,
9065
      bool $full = true,
9066
      bool $cleanUtf8 = false,
9067
      string $encoding = 'UTF-8',
9068
      string $lang = null,
9069
      $lower = true
9070
  ): string
9071
  {
9072 53
    if ('' === $str) {
9073 5
      return '';
9074
    }
9075
9076 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
9077
9078 52
    if ($lower === true) {
9079 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9080
    }
9081
9082 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9083
  }
9084
9085
  /**
9086
   * Make a string lowercase.
9087
   *
9088
   * @link http://php.net/manual/en/function.mb-strtolower.php
9089
   *
9090
   * @param string      $str                   <p>The string being lowercased.</p>
9091
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9092
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9093
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9094
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9095
   *
9096
   * @return string String with all alphabetic characters converted to lowercase.
9097
   */
9098 156
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9099
  {
9100
    // init
9101 156
    $str = (string)$str;
9102
9103 156
    if ('' === $str) {
9104 12
      return '';
9105
    }
9106
9107 154
    if ($cleanUtf8 === true) {
9108
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9109
      // if invalid characters are found in $haystack before $needle
9110 4
      $str = self::clean($str);
9111
    }
9112
9113 154
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9114 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9115
    }
9116
9117
    // hack for old php version or for the polyfill ...
9118 154
    if ($tryToKeepStringLength === true) {
9119
      $str = self::fixStrCaseHelper($str, true);
9120
    }
9121
9122 154
    if ($lang !== null) {
9123
9124 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9125
        self::checkForSupport();
9126
      }
9127
9128 2
      if (self::$SUPPORT['intl'] === true) {
9129
9130 2
        $langCode = $lang . '-Lower';
9131 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9132
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
9133
9134
          $langCode = 'Any-Lower';
9135
        }
9136
9137
        /** @noinspection PhpComposerExtensionStubsInspection */
9138 2
        return transliterator_transliterate($langCode, $str);
9139
      }
9140
9141
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
9142
    }
9143
9144
    // always fallback via symfony polyfill
9145 154
    return \mb_strtolower($str, $encoding);
9146
  }
9147
9148
  /**
9149
   * Generic case sensitive transformation for collation matching.
9150
   *
9151
   * @param string $str <p>The input string</p>
9152
   *
9153
   * @return string
9154
   */
9155 6
  private static function strtonatfold(string $str): string
9156
  {
9157
    /** @noinspection PhpUndefinedClassInspection */
9158 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
9159
  }
9160
9161
  /**
9162
   * Make a string uppercase.
9163
   *
9164
   * @link http://php.net/manual/en/function.mb-strtoupper.php
9165
   *
9166
   * @param string      $str                   <p>The string being uppercased.</p>
9167
   * @param string      $encoding              [optional] <p>Set the charset.</p>
9168
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9169
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9170
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9171
   *
9172
   * @return string String with all alphabetic characters converted to uppercase.
9173
   */
9174 165
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9175
  {
9176
    // init
9177 165
    $str = (string)$str;
9178
9179 165
    if ('' === $str) {
9180 12
      return '';
9181
    }
9182
9183 163
    if ($cleanUtf8 === true) {
9184
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9185
      // if invalid characters are found in $haystack before $needle
9186 3
      $str = self::clean($str);
9187
    }
9188
9189 163
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9190 76
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9191
    }
9192
9193
    // hack for old php version or for the polyfill ...
9194 163
    if ($tryToKeepStringLength === true) {
9195 2
      $str = self::fixStrCaseHelper($str, false);
9196
    }
9197
9198 163
    if ($lang !== null) {
9199
9200 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9201
        self::checkForSupport();
9202
      }
9203
9204 2
      if (self::$SUPPORT['intl'] === true) {
9205
9206 2
        $langCode = $lang . '-Upper';
9207 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9208
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
9209
9210
          $langCode = 'Any-Upper';
9211
        }
9212
9213
        /** @noinspection PhpComposerExtensionStubsInspection */
9214 2
        return transliterator_transliterate($langCode, $str);
9215
      }
9216
9217
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
9218
    }
9219
9220
    // always fallback via symfony polyfill
9221 163
    return \mb_strtoupper($str, $encoding);
9222
  }
9223
9224
  /**
9225
   * Translate characters or replace sub-strings.
9226
   *
9227
   * @link  http://php.net/manual/en/function.strtr.php
9228
   *
9229
   * @param string          $str  <p>The string being translated.</p>
9230
   * @param string|string[] $from <p>The string replacing from.</p>
9231
   * @param string|string[] $to   <p>The string being translated to to.</p>
9232
   *
9233
   * @return string
9234
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9235
   *                corresponding character in to.
9236
   */
9237 2
  public static function strtr(string $str, $from, $to = INF): string
9238
  {
9239 2
    if ('' === $str) {
9240
      return '';
9241
    }
9242
9243 2
    if ($from === $to) {
9244
      return $str;
9245
    }
9246
9247 2
    if (INF !== $to) {
9248 2
      $from = self::str_split($from);
9249 2
      $to = self::str_split($to);
9250 2
      $countFrom = \count($from);
9251 2
      $countTo = \count($to);
9252
9253 2
      if ($countFrom > $countTo) {
9254 2
        $from = \array_slice($from, 0, $countTo);
9255 2
      } elseif ($countFrom < $countTo) {
9256 2
        $to = \array_slice($to, 0, $countFrom);
9257
      }
9258
9259 2
      $from = \array_combine($from, $to);
9260
    }
9261
9262 2
    if (\is_string($from)) {
9263 2
      return \str_replace($from, '', $str);
9264
    }
9265
9266 2
    return \strtr($str, $from);
9267
  }
9268
9269
  /**
9270
   * Return the width of a string.
9271
   *
9272
   * @param string $str       <p>The input string.</p>
9273
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9274
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9275
   *
9276
   * @return int
9277
   */
9278 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9279
  {
9280 2
    if ('' === $str) {
9281 2
      return 0;
9282
    }
9283
9284 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9285 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9286
    }
9287
9288 2
    if ($cleanUtf8 === true) {
9289
      // iconv and mbstring are not tolerant to invalid encoding
9290
      // further, their behaviour is inconsistent with that of PHP's substr
9291 2
      $str = self::clean($str);
9292
    }
9293
9294 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9295
      self::checkForSupport();
9296
    }
9297
9298
    //
9299
    // fallback via mbstring
9300
    //
9301
9302 2
    if (self::$SUPPORT['mbstring'] === true) {
9303 2
      return \mb_strwidth($str, $encoding);
9304
    }
9305
9306
    //
9307
    // fallback via vanilla php
9308
    //
9309
9310
    if ('UTF-8' !== $encoding) {
9311
      $str = self::encode('UTF-8', $str, false, $encoding);
9312
    }
9313
9314
    $wide = 0;
9315
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9316
9317
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9318
  }
9319
9320
  /**
9321
   * Get part of a string.
9322
   *
9323
   * @link http://php.net/manual/en/function.mb-substr.php
9324
   *
9325
   * @param string $str       <p>The string being checked.</p>
9326
   * @param int    $offset    <p>The first position used in str.</p>
9327
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9328
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9329
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9330
   *
9331
   * @return string|false
9332
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9333
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9334
   *                      characters long, <b>FALSE</b> will be returned.
9335
   */
9336 403
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9337
  {
9338 403
    if ('' === $str) {
9339 26
      return '';
9340
    }
9341
9342
    // Empty string
9343 398
    if ($length === 0) {
9344 20
      return '';
9345
    }
9346
9347 395
    if ($cleanUtf8 === true) {
9348
      // iconv and mbstring are not tolerant to invalid encoding
9349
      // further, their behaviour is inconsistent with that of PHP's substr
9350 2
      $str = self::clean($str);
9351
    }
9352
9353
    // Whole string
9354 395
    if (!$offset && $length === null) {
9355 40
      return $str;
9356
    }
9357
9358 366
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9359 161
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9360
    }
9361
9362 366
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9363
      self::checkForSupport();
9364
    }
9365
9366
    //
9367
    // fallback for binary || ascii only
9368
    //
9369
9370
    if (
9371 366
        $encoding === 'CP850'
9372
        ||
9373 366
        $encoding === 'ASCII'
9374
    ) {
9375 2
      return self::substr_in_byte($str, $offset, $length);
9376
    }
9377
9378
    //
9379
    // fallback via mbstring
9380
    //
9381
9382 364
    if (self::$SUPPORT['mbstring'] === true) {
9383 364
      $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9384 364
      if ($return !== false) {
9385 364
        return $return;
9386
      }
9387
    }
9388
9389
    // otherwise we need the string-length and can't fake it via "2147483647"
9390 4
    $str_length = 0;
9391 4
    if ($offset || $length === null) {
9392 4
      $str_length = self::strlen($str, $encoding);
9393
    }
9394
9395
    // e.g.: invalid chars + mbstring not installed
9396 4
    if ($str_length === false) {
9397
      return false;
9398
    }
9399
9400
    // Empty string
9401 4
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9402
      return '';
9403
    }
9404
9405
    // Impossible
9406 4
    if ($offset && $offset > $str_length) {
9407
      // "false" is the php native return type here,
9408
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9409
      return '';
9410
9411
    }
9412
9413 4
    if ($length === null) {
9414 4
      $length = (int)$str_length;
9415
    } else {
9416 2
      $length = (int)$length;
9417
    }
9418
9419
    if (
9420 4
        $encoding !== 'UTF-8'
9421
        &&
9422 4
        self::$SUPPORT['mbstring'] === false
9423
    ) {
9424 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9425
    }
9426
9427
    //
9428
    // fallback via intl
9429
    //
9430
9431
    if (
9432 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9433
        &&
9434 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
9435
        &&
9436 4
        self::$SUPPORT['intl'] === true
9437
    ) {
9438
      $returnTmp = \grapheme_substr($str, $offset, $length);
9439
      if ($returnTmp !== false) {
9440
        return $returnTmp;
9441
      }
9442
    }
9443
9444
    //
9445
    // fallback via iconv
9446
    //
9447
9448
    if (
9449 4
        $length >= 0 // "iconv_substr()" can't handle negative length
9450
        &&
9451 4
        self::$SUPPORT['iconv'] === true
9452
    ) {
9453
      $returnTmp = \iconv_substr($str, $offset, $length);
9454
      if ($returnTmp !== false) {
9455
        return $returnTmp;
9456
      }
9457
    }
9458
9459
    //
9460
    // fallback for ascii only
9461
    //
9462
9463 4
    if (self::is_ascii($str)) {
9464
      return \substr($str, $offset, $length);
9465
    }
9466
9467
    //
9468
    // fallback via vanilla php
9469
    //
9470
9471
    // split to array, and remove invalid characters
9472 4
    $array = self::split($str);
9473
9474
    // extract relevant part, and join to make sting again
9475 4
    return \implode('', \array_slice($array, $offset, $length));
9476
  }
9477
9478
  /**
9479
   * Binary safe comparison of two strings from an offset, up to length characters.
9480
   *
9481
   * @param string   $str1               <p>The main string being compared.</p>
9482
   * @param string   $str2               <p>The secondary string being compared.</p>
9483
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9484
   *                                     counting from the end of the string.</p>
9485
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9486
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9487
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9488
   *                                     insensitive.</p>
9489
   *
9490
   * @return int
9491
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9492
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9493
   *             <strong>0</strong> if they are equal.
9494
   */
9495 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9496
  {
9497
    if (
9498 2
        $offset !== 0
9499
        ||
9500 2
        $length !== null
9501
    ) {
9502 2
      $str1Tmp = self::substr($str1, $offset, $length);
9503 2
      if ($str1Tmp === false) {
9504
        $str1Tmp = '';
9505
      }
9506 2
      $str1 = (string)$str1Tmp;
9507
9508 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9508
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9509 2
      if ($str2Tmp === false) {
9510
        $str2Tmp = '';
9511
      }
9512 2
      $str2 = (string)$str2Tmp;
9513
    }
9514
9515 2
    if ($case_insensitivity === true) {
9516 2
      return self::strcasecmp($str1, $str2);
9517
    }
9518
9519 2
    return self::strcmp($str1, $str2);
9520
  }
9521
9522
  /**
9523
   * Count the number of substring occurrences.
9524
   *
9525
   * @link  http://php.net/manual/en/function.substr-count.php
9526
   *
9527
   * @param string $haystack   <p>The string to search in.</p>
9528
   * @param string $needle     <p>The substring to search for.</p>
9529
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9530
   * @param int    $length     [optional] <p>
9531
   *                           The maximum length after the specified offset to search for the
9532
   *                           substring. It outputs a warning if the offset plus the length is
9533
   *                           greater than the haystack length.
9534
   *                           </p>
9535
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9536
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9537
   *
9538
   * @return int|false This functions returns an integer or false if there isn't a string.
9539
   */
9540 18
  public static function substr_count(
9541
      string $haystack,
9542
      string $needle,
9543
      int $offset = 0,
9544
      int $length = null,
9545
      string $encoding = 'UTF-8',
9546
      bool $cleanUtf8 = false
9547
  )
9548
  {
9549 18
    if ('' === $haystack || '' === $needle) {
9550 2
      return false;
9551
    }
9552
9553 18
    if ($offset || $length !== null) {
9554
9555 2
      if ($length === null) {
9556 2
        $lengthTmp = self::strlen($haystack);
9557 2
        if ($lengthTmp === false) {
9558
          return false;
9559
        }
9560 2
        $length = (int)$lengthTmp;
9561
      }
9562
9563
      if (
9564
          (
9565 2
              $length !== 0
9566
              &&
9567 2
              $offset !== 0
9568
          )
9569
          &&
9570 2
          ($length + $offset) <= 0
9571
          &&
9572 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9573
      ) {
9574
        return false;
9575
      }
9576
9577 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9578 2
      if ($haystackTmp === false) {
9579
        $haystackTmp = '';
9580
      }
9581 2
      $haystack = (string)$haystackTmp;
9582
    }
9583
9584 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9585 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9586
    }
9587
9588 18
    if ($cleanUtf8 === true) {
9589
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9590
      // if invalid characters are found in $haystack before $needle
9591
      $needle = self::clean($needle);
9592
      $haystack = self::clean($haystack);
9593
    }
9594
9595 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9596
      self::checkForSupport();
9597
    }
9598
9599
    if (
9600 18
        $encoding !== 'UTF-8'
9601
        &&
9602 18
        self::$SUPPORT['mbstring'] === false
9603
    ) {
9604
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9605
    }
9606
9607 18
    if (self::$SUPPORT['mbstring'] === true) {
9608 18
      return \mb_substr_count($haystack, $needle, $encoding);
9609
    }
9610
9611
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9612
9613
    return \count($matches);
9614
  }
9615
9616
  /**
9617
   * Count the number of substring occurrences.
9618
   *
9619
   * @param string $haystack <p>
9620
   *                         The string being checked.
9621
   *                         </p>
9622
   * @param string $needle   <p>
9623
   *                         The string being found.
9624
   *                         </p>
9625
   * @param int    $offset   [optional] <p>
9626
   *                         The offset where to start counting
9627
   *                         </p>
9628
   * @param int    $length   [optional] <p>
9629
   *                         The maximum length after the specified offset to search for the
9630
   *                         substring. It outputs a warning if the offset plus the length is
9631
   *                         greater than the haystack length.
9632
   *                         </p>
9633
   *
9634
   * @return int|false The number of times the
9635
   *                   needle substring occurs in the
9636
   *                   haystack string.
9637
   */
9638 36
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9639
  {
9640 36
    if ($haystack === '' || $needle === '') {
9641
      return 0;
9642
    }
9643
9644 36
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9645
      self::checkForSupport();
9646
    }
9647
9648
    if (
9649 36
        ($offset || $length !== null)
9650
        &&
9651 36
        self::$SUPPORT['mbstring_func_overload'] === true
9652
    ) {
9653
9654
      if ($length === null) {
9655
        $lengthTmp = self::strlen($haystack);
9656
        if ($lengthTmp === false) {
9657
          return false;
9658
        }
9659
        $length = (int)$lengthTmp;
9660
      }
9661
9662
      if (
9663
          (
9664
              $length !== 0
9665
              &&
9666
              $offset !== 0
9667
          )
9668
          &&
9669
          ($length + $offset) <= 0
9670
          &&
9671
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9672
      ) {
9673
        return false;
9674
      }
9675
9676
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9677
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9678
        $haystackTmp = '';
9679
      }
9680
      $haystack = (string)$haystackTmp;
9681
    }
9682
9683 36
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9684
      // "mb_" is available if overload is used, so use it ...
9685
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9686
    }
9687
9688 36
    return \substr_count($haystack, $needle, $offset, $length);
9689
  }
9690
9691
  /**
9692
   * Returns the number of occurrences of $substring in the given string.
9693
   * By default, the comparison is case-sensitive, but can be made insensitive
9694
   * by setting $caseSensitive to false.
9695
   *
9696
   * @param string $str           <p>The input string.</p>
9697
   * @param string $substring     <p>The substring to search for.</p>
9698
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9699
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9700
   *
9701
   * @return int
9702
   */
9703 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9704
  {
9705 15
    if ('' === $str || '' === $substring) {
9706 2
      return 0;
9707
    }
9708
9709
    // only a fallback to prevent BC in the api ...
9710 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9711 4
      $encoding = (string)$caseSensitive;
9712
    }
9713
9714 13
    if (!$caseSensitive) {
9715 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9716 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9717
    }
9718
9719 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9720
  }
9721
9722
  /**
9723
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9724
   *
9725
   * @param string $haystack <p>The string to search in.</p>
9726
   * @param string $needle   <p>The substring to search for.</p>
9727
   *
9728
   * @return string Return the sub-string.
9729
   */
9730 2
  public static function substr_ileft(string $haystack, string $needle): string
9731
  {
9732 2
    if ('' === $haystack) {
9733 2
      return '';
9734
    }
9735
9736 2
    if ('' === $needle) {
9737 2
      return $haystack;
9738
    }
9739
9740 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9741 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9741
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9742 2
      if ($haystackTmp === false) {
9743
        $haystackTmp = '';
9744
      }
9745 2
      $haystack = (string)$haystackTmp;
9746
    }
9747
9748 2
    return $haystack;
9749
  }
9750
9751
  /**
9752
   * Get part of a string process in bytes.
9753
   *
9754
   * @param string $str    <p>The string being checked.</p>
9755
   * @param int    $offset <p>The first position used in str.</p>
9756
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9757
   *
9758
   * @return string|false
9759
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9760
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9761
   *                      characters long, <b>FALSE</b> will be returned.
9762
   */
9763 51
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9764
  {
9765 51
    if ($str === '') {
9766
      return '';
9767
    }
9768
9769
    // Empty string
9770 51
    if ($length === 0) {
9771
      return '';
9772
    }
9773
9774
    // Whole string
9775 51
    if (!$offset && $length === null) {
9776
      return $str;
9777
    }
9778
9779 51
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9780
      self::checkForSupport();
9781
    }
9782
9783 51
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9784
      // "mb_" is available if overload is used, so use it ...
9785
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9786
    }
9787
9788 51
    return \substr($str, $offset, $length ?? 2147483647);
9789
  }
9790
9791
  /**
9792
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9793
   *
9794
   * @param string $haystack <p>The string to search in.</p>
9795
   * @param string $needle   <p>The substring to search for.</p>
9796
   *
9797
   * @return string Return the sub-string.
9798
   */
9799 2
  public static function substr_iright(string $haystack, string $needle): string
9800
  {
9801 2
    if ('' === $haystack) {
9802 2
      return '';
9803
    }
9804
9805 2
    if ('' === $needle) {
9806 2
      return $haystack;
9807
    }
9808
9809 2
    if (self::str_iends_with($haystack, $needle) === true) {
9810 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9811 2
      if ($haystackTmp === false) {
9812
        $haystackTmp = '';
9813
      }
9814 2
      $haystack = (string)$haystackTmp;
9815
    }
9816
9817 2
    return $haystack;
9818
  }
9819
9820
  /**
9821
   * Removes an prefix ($needle) from start of the string ($haystack).
9822
   *
9823
   * @param string $haystack <p>The string to search in.</p>
9824
   * @param string $needle   <p>The substring to search for.</p>
9825
   *
9826
   * @return string Return the sub-string.
9827
   */
9828 2
  public static function substr_left(string $haystack, string $needle): string
9829
  {
9830 2
    if ('' === $haystack) {
9831 2
      return '';
9832
    }
9833
9834 2
    if ('' === $needle) {
9835 2
      return $haystack;
9836
    }
9837
9838 2
    if (self::str_starts_with($haystack, $needle) === true) {
9839 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9839
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9840 2
      if ($haystackTmp === false) {
9841
        $haystackTmp = '';
9842
      }
9843 2
      $haystack = (string)$haystackTmp;
9844
    }
9845
9846 2
    return $haystack;
9847
  }
9848
9849
  /**
9850
   * Replace text within a portion of a string.
9851
   *
9852
   * source: https://gist.github.com/stemar/8287074
9853
   *
9854
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9855
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9856
   * @param int|int[]       $offset           <p>
9857
   *                                          If start is positive, the replacing will begin at the start'th offset
9858
   *                                          into string.
9859
   *                                          <br><br>
9860
   *                                          If start is negative, the replacing will begin at the start'th character
9861
   *                                          from the end of string.
9862
   *                                          </p>
9863
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9864
   *                                          portion of string which is to be replaced. If it is negative, it
9865
   *                                          represents the number of characters from the end of string at which to
9866
   *                                          stop replacing. If it is not given, then it will default to strlen(
9867
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9868
   *                                          length is zero then this function will have the effect of inserting
9869
   *                                          replacement into string at the given start offset.</p>
9870
   * @param string          $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
9871
   *
9872
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9873
   */
9874 10
  public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9875
  {
9876 10
    if (\is_array($str) === true) {
9877 1
      $num = \count($str);
9878
9879
      // the replacement
9880 1
      if (\is_array($replacement) === true) {
9881 1
        $replacement = \array_slice($replacement, 0, $num);
9882
      } else {
9883 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9884
      }
9885
9886
      // the offset
9887 1
      if (\is_array($offset) === true) {
9888 1
        $offset = \array_slice($offset, 0, $num);
9889 1
        foreach ($offset as &$valueTmp) {
9890 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9891
        }
9892 1
        unset($valueTmp);
9893
      } else {
9894 1
        $offset = \array_pad([$offset], $num, $offset);
9895
      }
9896
9897
      // the length
9898 1
      if (null === $length) {
9899 1
        $length = \array_fill(0, $num, 0);
9900 1
      } elseif (\is_array($length) === true) {
9901 1
        $length = \array_slice($length, 0, $num);
9902 1
        foreach ($length as &$valueTmpV2) {
9903 1
          if (null !== $valueTmpV2) {
9904 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9905
          } else {
9906 1
            $valueTmpV2 = 0;
9907
          }
9908
        }
9909 1
        unset($valueTmpV2);
9910
      } else {
9911 1
        $length = \array_pad([$length], $num, $length);
9912
      }
9913
9914
      // recursive call
9915 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9916
    }
9917
9918 10
    if (\is_array($replacement) === true) {
9919 1
      if (\count($replacement) > 0) {
9920 1
        $replacement = $replacement[0];
9921
      } else {
9922 1
        $replacement = '';
9923
      }
9924
    }
9925
9926
    // init
9927 10
    $str = (string)$str;
9928 10
    $replacement = (string)$replacement;
9929
9930 10
    if ('' === $str) {
9931 1
      return $replacement;
9932
    }
9933
9934 9
    if (self::is_ascii($str)) {
9935 6
      return ($length === null) ?
9936
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9936
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9937 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9937
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9938
    }
9939
9940 8
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9941
      self::checkForSupport();
9942
    }
9943
9944 8
    if (self::$SUPPORT['mbstring'] === true) {
9945 8
      $string_length = self::strlen($str, $encoding);
9946
9947 8
      if ($offset < 0) {
9948 1
        $offset = \max(0, $string_length + $offset);
9949 8
      } elseif ($offset > $string_length) {
9950
        $offset = $string_length;
9951
      }
9952
9953 8
      if ($length < 0) {
9954 1
        $length = \max(0, $string_length - $offset + $length);
9955 8
      } elseif ($length === null || $length > $string_length) {
9956 3
        $length = $string_length;
9957
      }
9958
9959 8
      if (($offset + $length) > $string_length) {
9960 3
        $length = $string_length - $offset;
9961
      }
9962
9963 8
      return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9963
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9963
      return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9964
    }
9965
9966
    \preg_match_all('/./us', $str, $smatches);
9967
    \preg_match_all('/./us', $replacement, $rmatches);
9968
9969
    if ($length === null) {
9970
      $lengthTmp = self::strlen($str, $encoding);
9971
      if ($lengthTmp === false) {
9972
        // e.g.: non mbstring support + invalid chars
9973
        return '';
9974
      }
9975
      $length = (int)$lengthTmp;
9976
    }
9977
9978
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9978
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9978
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
9979
9980
    return \implode('', $smatches[0]);
9981
  }
9982
9983
  /**
9984
   * Removes an suffix ($needle) from end of the string ($haystack).
9985
   *
9986
   * @param string $haystack <p>The string to search in.</p>
9987
   * @param string $needle   <p>The substring to search for.</p>
9988
   *
9989
   * @return string Return the sub-string.
9990
   */
9991 2
  public static function substr_right(string $haystack, string $needle): string
9992
  {
9993 2
    if ('' === $haystack) {
9994 2
      return '';
9995
    }
9996
9997 2
    if ('' === $needle) {
9998 2
      return $haystack;
9999
    }
10000
10001 2
    if (self::str_ends_with($haystack, $needle) === true) {
10002 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
10003 2
      if ($haystackTmp === false) {
10004
        $haystackTmp = '';
10005
      }
10006 2
      $haystack = (string)$haystackTmp;
10007
    }
10008
10009 2
    return $haystack;
10010
  }
10011
10012
  /**
10013
   * Returns a case swapped version of the string.
10014
   *
10015
   * @param string $str       <p>The input string.</p>
10016
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10017
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10018
   *
10019
   * @return string Each character's case swapped.
10020
   */
10021 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10022
  {
10023 6
    if ('' === $str) {
10024 1
      return '';
10025
    }
10026
10027 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10028 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
10029
    }
10030
10031 6
    if ($cleanUtf8 === true) {
10032
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10033
      // if invalid characters are found in $haystack before $needle
10034 2
      $str = self::clean($str);
10035
    }
10036
10037 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10038
  }
10039
10040
  /**
10041
   * Checks whether mbstring is available on the server.
10042
   *
10043
   * @return bool
10044
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
10045
   */
10046
  public static function symfony_polyfill_used(): bool
10047
  {
10048
    // init
10049
    $return = false;
10050
10051
    $returnTmp = \extension_loaded('mbstring') ? true : false;
10052
    if ($returnTmp === false && \function_exists('mb_strlen')) {
10053
      $return = true;
10054
    }
10055
10056
    $returnTmp = \extension_loaded('iconv') ? true : false;
10057
    if ($returnTmp === false && \function_exists('iconv')) {
10058
      $return = true;
10059
    }
10060
10061
    return $return;
10062
  }
10063
10064
  /**
10065
   * @param string $str
10066
   * @param int    $tabLength
10067
   *
10068
   * @return string
10069
   */
10070 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10071
  {
10072 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
10073
  }
10074
10075
  /**
10076
   * Converts the first character of each word in the string to uppercase
10077
   * and all other chars to lowercase.
10078
   *
10079
   * @param string      $str                   <p>The input string.</p>
10080
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10081
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10082
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10083
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10084
   *
10085
   * @return string String with all characters of $str being title-cased.
10086
   */
10087 5
  public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10088
  {
10089 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10090 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
10091
    }
10092
10093 5
    return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
10094
  }
10095
10096
  /**
10097
   * alias for "UTF8::to_ascii()"
10098
   *
10099
   * @see        UTF8::to_ascii()
10100
   *
10101
   * @param string $str
10102
   * @param string $subst_chr
10103
   * @param bool   $strict
10104
   *
10105
   * @return string
10106
   *
10107
   * @deprecated <p>use "UTF8::to_ascii()"</p>
10108
   */
10109 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10110
  {
10111 7
    return self::to_ascii($str, $subst_chr, $strict);
10112
  }
10113
10114
  /**
10115
   * alias for "UTF8::to_iso8859()"
10116
   *
10117
   * @see        UTF8::to_iso8859()
10118
   *
10119
   * @param string|string[] $str
10120
   *
10121
   * @return string|string[]
10122
   *
10123
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
10124
   */
10125 2
  public static function toIso8859($str)
10126
  {
10127 2
    return self::to_iso8859($str);
10128
  }
10129
10130
  /**
10131
   * alias for "UTF8::to_latin1()"
10132
   *
10133
   * @see        UTF8::to_latin1()
10134
   *
10135
   * @param string|string[] $str
10136
   *
10137
   * @return string|string[]
10138
   *
10139
   * @deprecated <p>use "UTF8::to_latin1()"</p>
10140
   */
10141 2
  public static function toLatin1($str)
10142
  {
10143 2
    return self::to_latin1($str);
10144
  }
10145
10146
  /**
10147
   * alias for "UTF8::to_utf8()"
10148
   *
10149
   * @see        UTF8::to_utf8()
10150
   *
10151
   * @param string|string[] $str
10152
   *
10153
   * @return string|string[]
10154
   *
10155
   * @deprecated <p>use "UTF8::to_utf8()"</p>
10156
   */
10157 2
  public static function toUTF8($str)
10158
  {
10159 2
    return self::to_utf8($str);
10160
  }
10161
10162
  /**
10163
   * Convert a string into ASCII.
10164
   *
10165
   * @param string $str     <p>The input string.</p>
10166
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10167
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10168
   *                        performance</p>
10169
   *
10170
   * @return string
10171
   */
10172 38
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
10173
  {
10174 38
    static $UTF8_TO_ASCII;
10175
10176 38
    if ('' === $str) {
10177 3
      return '';
10178
    }
10179
10180
    // check if we only have ASCII, first (better performance)
10181 35
    if (self::is_ascii($str) === true) {
10182 7
      return $str;
10183
    }
10184
10185 30
    $str = self::clean(
10186 30
        $str,
10187 30
        true,
10188 30
        true,
10189 30
        true,
10190 30
        false,
10191 30
        true,
10192 30
        true
10193
    );
10194
10195
    // check again, if we only have ASCII, now ...
10196 30
    if (self::is_ascii($str) === true) {
10197 12
      return $str;
10198
    }
10199
10200 19
    if ($strict === true) {
10201
10202 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10203
        self::checkForSupport();
10204
      }
10205
10206 1
      if (self::$SUPPORT['intl'] === true) {
10207
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
10208
        /** @noinspection PhpComposerExtensionStubsInspection */
10209 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
10210
10211
        // check again, if we only have ASCII, now ...
10212 1
        if (self::is_ascii($str) === true) {
10213 1
          return $str;
10214
        }
10215
10216
      }
10217
    }
10218
10219 19
    if (self::$ORD === null) {
10220
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10221
    }
10222
10223 19
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
10224 19
    $chars = $ar[0];
10225 19
    $ord = null;
10226 19
    foreach ($chars as &$c) {
10227
10228 19
      $ordC0 = self::$ORD[$c[0]];
10229
10230 19
      if ($ordC0 >= 0 && $ordC0 <= 127) {
10231 15
        continue;
10232
      }
10233
10234 19
      $ordC1 = self::$ORD[$c[1]];
10235
10236
      // ASCII - next please
10237 19
      if ($ordC0 >= 192 && $ordC0 <= 223) {
10238 17
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
10239
      }
10240
10241 19
      if ($ordC0 >= 224) {
10242 8
        $ordC2 = self::$ORD[$c[2]];
10243
10244 8
        if ($ordC0 <= 239) {
10245 7
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10246
        }
10247
10248 8
        if ($ordC0 >= 240) {
10249 2
          $ordC3 = self::$ORD[$c[3]];
10250
10251 2
          if ($ordC0 <= 247) {
10252 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10253
          }
10254
10255 2
          if ($ordC0 >= 248) {
10256
            $ordC4 = self::$ORD[$c[4]];
10257
10258
            if ($ordC0 <= 251) {
10259
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10260
            }
10261
10262
            if ($ordC0 >= 252) {
10263
              $ordC5 = self::$ORD[$c[5]];
10264
10265
              if ($ordC0 <= 253) {
10266
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10267
              }
10268
            }
10269
          }
10270
        }
10271
      }
10272
10273 19
      if ($ordC0 === 254 || $ordC0 === 255) {
10274
        $c = $unknown;
10275
        continue;
10276
      }
10277
10278 19
      if ($ord === null) {
10279
        $c = $unknown;
10280
        continue;
10281
      }
10282
10283 19
      $bank = $ord >> 8;
10284 19
      if (!isset($UTF8_TO_ASCII[$bank])) {
10285 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10286 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10287 2
          $UTF8_TO_ASCII[$bank] = [];
10288
        }
10289
      }
10290
10291 19
      $newchar = $ord & 255;
10292
10293 19
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10294
10295
        // keep for debugging
10296
        /*
10297
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10298
        echo "char: " . $c . "\n";
10299
        echo "ord: " . $ord . "\n";
10300
        echo "newchar: " . $newchar . "\n";
10301
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10302
        echo "bank:" . $bank . "\n\n";
10303
        */
10304
10305 18
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10306
      } else {
10307
10308
        // keep for debugging missing chars
10309
        /*
10310
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10311
        echo "char: " . $c . "\n";
10312
        echo "ord: " . $ord . "\n";
10313
        echo "newchar: " . $newchar . "\n";
10314
        echo "bank:" . $bank . "\n\n";
10315
        */
10316
10317 19
        $c = $unknown;
10318
      }
10319
    }
10320
10321 19
    return \implode('', $chars);
10322
  }
10323
10324
  /**
10325
   * @param mixed $str
10326
   *
10327
   * @return bool
10328
   */
10329 19
  public static function to_boolean($str): bool
10330
  {
10331
    // init
10332 19
    $str = (string)$str;
10333
10334 19
    if ('' === $str) {
10335 2
      return false;
10336
    }
10337
10338 17
    $key = \strtolower($str);
10339
10340
    // Info: http://php.net/manual/en/filter.filters.validate.php
10341
    $map = [
10342 17
        'true'  => true,
10343
        '1'     => true,
10344
        'on'    => true,
10345
        'yes'   => true,
10346
        'false' => false,
10347
        '0'     => false,
10348
        'off'   => false,
10349
        'no'    => false,
10350
    ];
10351
10352 17
    if (isset($map[$key])) {
10353 13
      return $map[$key];
10354
    }
10355
10356
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10357 4
    if (\is_numeric($str)) {
10358 2
      return (((float)$str + 0) > 0);
10359
    }
10360
10361 2
    return (bool)self::trim($str);
10362
  }
10363
10364
  /**
10365
   * Convert given string to safe filename (and keep string case).
10366
   *
10367
   * @param string $string
10368
   * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
10369
   *                                  simply replaced with hyphen.
10370
   * @param string $fallback_char
10371
   *
10372
   * @return string
10373
   */
10374 1
  public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
10375
  {
10376 1
    if ($use_transliterate === true) {
10377 1
      $string = self::str_transliterate($string, $fallback_char);
10378
    }
10379
10380 1
    $fallback_char_escaped = \preg_quote($fallback_char, '/');
10381
10382 1
    $string = (string)\preg_replace(
10383
        [
10384 1
            '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
10385 1
            '/[\s]+/',                                            // 2) convert spaces to $fallback_char
10386 1
            '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
10387
        ],
10388
        [
10389 1
            '',
10390 1
            $fallback_char,
10391 1
            $fallback_char,
10392
        ],
10393 1
        $string
10394
    );
10395
10396
    // trim "$fallback_char" from beginning and end of the string
10397 1
    return \trim($string, $fallback_char);
10398
  }
10399
10400
  /**
10401
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10402
   *
10403
   * @param string|string[] $str
10404
   *
10405
   * @return string|string[]
10406
   */
10407 7
  public static function to_iso8859($str)
10408
  {
10409 7
    if (\is_array($str) === true) {
10410 2
      foreach ($str as $k => $v) {
10411 2
        $str[$k] = self::to_iso8859($v);
10412
      }
10413
10414 2
      return $str;
10415
    }
10416
10417 7
    $str = (string)$str;
10418 7
    if ('' === $str) {
10419 2
      return '';
10420
    }
10421
10422 7
    return self::utf8_decode($str);
10423
  }
10424
10425
  /**
10426
   * alias for "UTF8::to_iso8859()"
10427
   *
10428
   * @see UTF8::to_iso8859()
10429
   *
10430
   * @param string|string[] $str
10431
   *
10432
   * @return string|string[]
10433
   */
10434 2
  public static function to_latin1($str)
10435
  {
10436 2
    return self::to_iso8859($str);
10437
  }
10438
10439
  /**
10440
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10441
   *
10442
   * <ul>
10443
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10444
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10445
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10446
   * case.</li>
10447
   * </ul>
10448
   *
10449
   * @param string|string[] $str                    <p>Any string or array.</p>
10450
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10451
   *
10452
   * @return string|string[] The UTF-8 encoded string.
10453
   */
10454 37
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10455
  {
10456 37
    if (\is_array($str) === true) {
10457 4
      foreach ($str as $k => $v) {
10458 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10459
      }
10460
10461 4
      return $str;
10462
    }
10463
10464 37
    $str = (string)$str;
10465 37
    if ('' === $str) {
10466 6
      return $str;
10467
    }
10468
10469 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10470
      self::checkForSupport();
10471
    }
10472
10473 37
    $max = self::strlen_in_byte($str);
10474 37
    $buf = '';
10475
10476
    /** @noinspection ForeachInvariantsInspection */
10477 37
    for ($i = 0; $i < $max; $i++) {
10478 37
      $c1 = $str[$i];
10479
10480 37
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10481
10482 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10483
10484 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10485
10486 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10487 17
            $buf .= $c1 . $c2;
10488 17
            $i++;
10489
          } else { // not valid UTF8 - convert it
10490 31
            $buf .= self::to_utf8_convert_helper($c1);
10491
          }
10492
10493 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10494
10495 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10496 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10497
10498 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10499 14
            $buf .= $c1 . $c2 . $c3;
10500 14
            $i += 2;
10501
          } else { // not valid UTF8 - convert it
10502 32
            $buf .= self::to_utf8_convert_helper($c1);
10503
          }
10504
10505 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10506
10507 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10508 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10509 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10510
10511 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10512 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10513 8
            $i += 3;
10514
          } else { // not valid UTF8 - convert it
10515 26
            $buf .= self::to_utf8_convert_helper($c1);
10516
          }
10517
10518
        } else { // doesn't look like UTF8, but should be converted
10519 34
          $buf .= self::to_utf8_convert_helper($c1);
10520
        }
10521
10522 34
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10523
10524 4
        $buf .= self::to_utf8_convert_helper($c1);
10525
10526
      } else { // it doesn't need conversion
10527 34
        $buf .= $c1;
10528
      }
10529
    }
10530
10531
    // decode unicode escape sequences
10532 37
    $buf = \preg_replace_callback(
10533 37
        '/\\\\u([0-9a-f]{4})/i',
10534
        function ($match) {
10535
          // always fallback via symfony polyfill
10536 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10537 37
        },
10538 37
        $buf
10539
    );
10540
10541
    // decode UTF-8 codepoints
10542 37
    if ($decodeHtmlEntityToUtf8 === true) {
10543 2
      $buf = self::html_entity_decode($buf);
10544
    }
10545
10546 37
    return $buf;
10547
  }
10548
10549
  /**
10550
   * @param int|string $input
10551
   *
10552
   * @return string
10553
   */
10554 30
  private static function to_utf8_convert_helper($input): string
10555
  {
10556
    // init
10557 30
    $buf = '';
10558
10559 30
    if (self::$ORD === null) {
10560 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10561
    }
10562
10563 30
    if (self::$CHR === null) {
10564 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10565
    }
10566
10567 30
    if (self::$WIN1252_TO_UTF8 === null) {
10568 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10569
    }
10570
10571 30
    $ordC1 = self::$ORD[$input];
10572 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10573 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10574
    } else {
10575 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10576 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
10577 2
      $buf .= $cc1 . $cc2;
10578
    }
10579
10580 30
    return $buf;
10581
  }
10582
10583
  /**
10584
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10585
   *
10586
   * INFO: This is slower then "trim()"
10587
   *
10588
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10589
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10590
   *
10591
   * @param string $str   <p>The string to be trimmed</p>
10592
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10593
   *
10594
   * @return string The trimmed string.
10595
   */
10596 214
  public static function trim(string $str = '', $chars = INF): string
10597
  {
10598 214
    if ('' === $str) {
10599 11
      return '';
10600
    }
10601
10602
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10603 206
    if ($chars === INF || !$chars) {
10604 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10605
    } else {
10606 47
      $chars = \preg_quote($chars, '/');
10607 47
      $pattern = "^[$chars]+|[$chars]+\$";
10608
    }
10609
10610 206
    return self::regex_replace($str, $pattern, '', '', '/');
10611
  }
10612
10613
  /**
10614
   * Makes string's first char uppercase.
10615
   *
10616
   * @param string      $str                   <p>The input string.</p>
10617
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10618
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10619
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10620
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10621
   *
10622
   * @return string The resulting string.
10623
   */
10624 81
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10625
  {
10626 81
    if ($cleanUtf8 === true) {
10627
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10628
      // if invalid characters are found in $haystack before $needle
10629 1
      $str = self::clean($str);
10630
    }
10631
10632 81
    $strPartTwo = self::substr($str, 1, null, $encoding);
10633 81
    if ($strPartTwo === false) {
10634
      $strPartTwo = '';
10635
    }
10636
10637 81
    $strPartOne = self::strtoupper(
10638 81
        (string)self::substr($str, 0, 1, $encoding),
10639 81
        $encoding,
10640 81
        $cleanUtf8,
10641 81
        $lang,
10642 81
        $tryToKeepStringLength
10643
    );
10644
10645 81
    return $strPartOne . $strPartTwo;
10646
  }
10647
10648
  /**
10649
   * alias for "UTF8::ucfirst()"
10650
   *
10651
   * @see UTF8::ucfirst()
10652
   *
10653
   * @param string $str
10654
   * @param string $encoding
10655
   * @param bool   $cleanUtf8
10656
   *
10657
   * @return string
10658
   */
10659 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10660
  {
10661 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10662
  }
10663
10664
  /**
10665
   * Uppercase for all words in the string.
10666
   *
10667
   * @param string   $str        <p>The input string.</p>
10668
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10669
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10670
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10671
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10672
   *
10673
   * @return string
10674
   */
10675 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10676
  {
10677 9
    if (!$str) {
10678 2
      return '';
10679
    }
10680
10681
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10682
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10683
10684 8
    if ($cleanUtf8 === true) {
10685
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10686
      // if invalid characters are found in $haystack before $needle
10687 1
      $str = self::clean($str);
10688
    }
10689
10690 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10691
10692
    if (
10693 8
        $usePhpDefaultFunctions === true
10694
        &&
10695 8
        self::is_ascii($str) === true
10696
    ) {
10697
      return \ucwords($str);
10698
    }
10699
10700 8
    $words = self::str_to_words($str, $charlist);
10701 8
    $newWords = [];
10702
10703 8
    if (\count($exceptions) > 0) {
10704 1
      $useExceptions = true;
10705
    } else {
10706 8
      $useExceptions = false;
10707
    }
10708
10709 8
    foreach ($words as $word) {
10710
10711 8
      if (!$word) {
10712 8
        continue;
10713
      }
10714
10715
      if (
10716 8
          $useExceptions === false
10717
          ||
10718
          (
10719 1
              $useExceptions === true
10720
              &&
10721 8
              !\in_array($word, $exceptions, true)
10722
          )
10723
      ) {
10724 8
        $word = self::ucfirst($word, $encoding);
10725
      }
10726
10727 8
      $newWords[] = $word;
10728
    }
10729
10730 8
    return \implode('', $newWords);
10731
  }
10732
10733
  /**
10734
   * Multi decode html entity & fix urlencoded-win1252-chars.
10735
   *
10736
   * e.g:
10737
   * 'test+test'                     => 'test test'
10738
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10739
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10740
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10741
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10742
   * 'Düsseldorf'                   => 'Düsseldorf'
10743
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10744
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10745
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10746
   *
10747
   * @param string $str          <p>The input string.</p>
10748
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10749
   *
10750
   * @return string
10751
   */
10752 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10753
  {
10754 2
    if ('' === $str) {
10755 2
      return '';
10756
    }
10757
10758 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10759 2
    if (\preg_match($pattern, $str)) {
10760 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10761
    }
10762
10763 2
    $flags = ENT_QUOTES | ENT_HTML5;
10764
10765
    do {
10766 2
      $str_compare = $str;
10767
10768 2
      $str = self::fix_simple_utf8(
10769 2
          \urldecode(
10770 2
              self::html_entity_decode(
10771 2
                  self::to_utf8($str),
10772 2
                  $flags
10773
              )
10774
          )
10775
      );
10776
10777 2
    } while ($multi_decode === true && $str_compare !== $str);
10778
10779 2
    return $str;
10780
  }
10781
10782
  /**
10783
   * Return a array with "urlencoded"-win1252 -> UTF-8
10784
   *
10785
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10786
   *
10787
   * @return string[]
10788
   */
10789 2
  public static function urldecode_fix_win1252_chars(): array
10790
  {
10791
    return [
10792 2
        '%20' => ' ',
10793
        '%21' => '!',
10794
        '%22' => '"',
10795
        '%23' => '#',
10796
        '%24' => '$',
10797
        '%25' => '%',
10798
        '%26' => '&',
10799
        '%27' => "'",
10800
        '%28' => '(',
10801
        '%29' => ')',
10802
        '%2A' => '*',
10803
        '%2B' => '+',
10804
        '%2C' => ',',
10805
        '%2D' => '-',
10806
        '%2E' => '.',
10807
        '%2F' => '/',
10808
        '%30' => '0',
10809
        '%31' => '1',
10810
        '%32' => '2',
10811
        '%33' => '3',
10812
        '%34' => '4',
10813
        '%35' => '5',
10814
        '%36' => '6',
10815
        '%37' => '7',
10816
        '%38' => '8',
10817
        '%39' => '9',
10818
        '%3A' => ':',
10819
        '%3B' => ';',
10820
        '%3C' => '<',
10821
        '%3D' => '=',
10822
        '%3E' => '>',
10823
        '%3F' => '?',
10824
        '%40' => '@',
10825
        '%41' => 'A',
10826
        '%42' => 'B',
10827
        '%43' => 'C',
10828
        '%44' => 'D',
10829
        '%45' => 'E',
10830
        '%46' => 'F',
10831
        '%47' => 'G',
10832
        '%48' => 'H',
10833
        '%49' => 'I',
10834
        '%4A' => 'J',
10835
        '%4B' => 'K',
10836
        '%4C' => 'L',
10837
        '%4D' => 'M',
10838
        '%4E' => 'N',
10839
        '%4F' => 'O',
10840
        '%50' => 'P',
10841
        '%51' => 'Q',
10842
        '%52' => 'R',
10843
        '%53' => 'S',
10844
        '%54' => 'T',
10845
        '%55' => 'U',
10846
        '%56' => 'V',
10847
        '%57' => 'W',
10848
        '%58' => 'X',
10849
        '%59' => 'Y',
10850
        '%5A' => 'Z',
10851
        '%5B' => '[',
10852
        '%5C' => '\\',
10853
        '%5D' => ']',
10854
        '%5E' => '^',
10855
        '%5F' => '_',
10856
        '%60' => '`',
10857
        '%61' => 'a',
10858
        '%62' => 'b',
10859
        '%63' => 'c',
10860
        '%64' => 'd',
10861
        '%65' => 'e',
10862
        '%66' => 'f',
10863
        '%67' => 'g',
10864
        '%68' => 'h',
10865
        '%69' => 'i',
10866
        '%6A' => 'j',
10867
        '%6B' => 'k',
10868
        '%6C' => 'l',
10869
        '%6D' => 'm',
10870
        '%6E' => 'n',
10871
        '%6F' => 'o',
10872
        '%70' => 'p',
10873
        '%71' => 'q',
10874
        '%72' => 'r',
10875
        '%73' => 's',
10876
        '%74' => 't',
10877
        '%75' => 'u',
10878
        '%76' => 'v',
10879
        '%77' => 'w',
10880
        '%78' => 'x',
10881
        '%79' => 'y',
10882
        '%7A' => 'z',
10883
        '%7B' => '{',
10884
        '%7C' => '|',
10885
        '%7D' => '}',
10886
        '%7E' => '~',
10887
        '%7F' => '',
10888
        '%80' => '`',
10889
        '%81' => '',
10890
        '%82' => '‚',
10891
        '%83' => 'ƒ',
10892
        '%84' => '„',
10893
        '%85' => '…',
10894
        '%86' => '†',
10895
        '%87' => '‡',
10896
        '%88' => 'ˆ',
10897
        '%89' => '‰',
10898
        '%8A' => 'Š',
10899
        '%8B' => '‹',
10900
        '%8C' => 'Œ',
10901
        '%8D' => '',
10902
        '%8E' => 'Ž',
10903
        '%8F' => '',
10904
        '%90' => '',
10905
        '%91' => '‘',
10906
        '%92' => '’',
10907
        '%93' => '“',
10908
        '%94' => '”',
10909
        '%95' => '•',
10910
        '%96' => '–',
10911
        '%97' => '—',
10912
        '%98' => '˜',
10913
        '%99' => '™',
10914
        '%9A' => 'š',
10915
        '%9B' => '›',
10916
        '%9C' => 'œ',
10917
        '%9D' => '',
10918
        '%9E' => 'ž',
10919
        '%9F' => 'Ÿ',
10920
        '%A0' => '',
10921
        '%A1' => '¡',
10922
        '%A2' => '¢',
10923
        '%A3' => '£',
10924
        '%A4' => '¤',
10925
        '%A5' => '¥',
10926
        '%A6' => '¦',
10927
        '%A7' => '§',
10928
        '%A8' => '¨',
10929
        '%A9' => '©',
10930
        '%AA' => 'ª',
10931
        '%AB' => '«',
10932
        '%AC' => '¬',
10933
        '%AD' => '',
10934
        '%AE' => '®',
10935
        '%AF' => '¯',
10936
        '%B0' => '°',
10937
        '%B1' => '±',
10938
        '%B2' => '²',
10939
        '%B3' => '³',
10940
        '%B4' => '´',
10941
        '%B5' => 'µ',
10942
        '%B6' => '¶',
10943
        '%B7' => '·',
10944
        '%B8' => '¸',
10945
        '%B9' => '¹',
10946
        '%BA' => 'º',
10947
        '%BB' => '»',
10948
        '%BC' => '¼',
10949
        '%BD' => '½',
10950
        '%BE' => '¾',
10951
        '%BF' => '¿',
10952
        '%C0' => 'À',
10953
        '%C1' => 'Á',
10954
        '%C2' => 'Â',
10955
        '%C3' => 'Ã',
10956
        '%C4' => 'Ä',
10957
        '%C5' => 'Å',
10958
        '%C6' => 'Æ',
10959
        '%C7' => 'Ç',
10960
        '%C8' => 'È',
10961
        '%C9' => 'É',
10962
        '%CA' => 'Ê',
10963
        '%CB' => 'Ë',
10964
        '%CC' => 'Ì',
10965
        '%CD' => 'Í',
10966
        '%CE' => 'Î',
10967
        '%CF' => 'Ï',
10968
        '%D0' => 'Ð',
10969
        '%D1' => 'Ñ',
10970
        '%D2' => 'Ò',
10971
        '%D3' => 'Ó',
10972
        '%D4' => 'Ô',
10973
        '%D5' => 'Õ',
10974
        '%D6' => 'Ö',
10975
        '%D7' => '×',
10976
        '%D8' => 'Ø',
10977
        '%D9' => 'Ù',
10978
        '%DA' => 'Ú',
10979
        '%DB' => 'Û',
10980
        '%DC' => 'Ü',
10981
        '%DD' => 'Ý',
10982
        '%DE' => 'Þ',
10983
        '%DF' => 'ß',
10984
        '%E0' => 'à',
10985
        '%E1' => 'á',
10986
        '%E2' => 'â',
10987
        '%E3' => 'ã',
10988
        '%E4' => 'ä',
10989
        '%E5' => 'å',
10990
        '%E6' => 'æ',
10991
        '%E7' => 'ç',
10992
        '%E8' => 'è',
10993
        '%E9' => 'é',
10994
        '%EA' => 'ê',
10995
        '%EB' => 'ë',
10996
        '%EC' => 'ì',
10997
        '%ED' => 'í',
10998
        '%EE' => 'î',
10999
        '%EF' => 'ï',
11000
        '%F0' => 'ð',
11001
        '%F1' => 'ñ',
11002
        '%F2' => 'ò',
11003
        '%F3' => 'ó',
11004
        '%F4' => 'ô',
11005
        '%F5' => 'õ',
11006
        '%F6' => 'ö',
11007
        '%F7' => '÷',
11008
        '%F8' => 'ø',
11009
        '%F9' => 'ù',
11010
        '%FA' => 'ú',
11011
        '%FB' => 'û',
11012
        '%FC' => 'ü',
11013
        '%FD' => 'ý',
11014
        '%FE' => 'þ',
11015
        '%FF' => 'ÿ',
11016
    ];
11017
  }
11018
11019
  /**
11020
   * Decodes an UTF-8 string to ISO-8859-1.
11021
   *
11022
   * @param string $str <p>The input string.</p>
11023
   * @param bool   $keepUtf8Chars
11024
   *
11025
   * @return string
11026
   */
11027 13
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11028
  {
11029 13
    if ('' === $str) {
11030 5
      return '';
11031
    }
11032
11033 13
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
11034 13
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
11035
11036 13
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
11037
11038 1
      if (self::$WIN1252_TO_UTF8 === null) {
11039
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
11040
      }
11041
11042 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11042
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11043 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11043
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11044
    }
11045
11046
    /** @noinspection PhpInternalEntityUsedInspection */
11047 13
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
11048
11049 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
11050
      self::checkForSupport();
11051
    }
11052
11053
    // save for later comparision
11054 13
    $str_backup = $str;
11055 13
    $len = self::strlen_in_byte($str);
11056
11057 13
    if (self::$ORD === null) {
11058
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
11059
    }
11060
11061 13
    if (self::$CHR === null) {
11062
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
11063
    }
11064
11065 13
    $noCharFound = '?';
11066
    /** @noinspection ForeachInvariantsInspection */
11067 13
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11068 13
      switch ($str[$i] & "\xF0") {
11069 13
        case "\xC0":
11070 12
        case "\xD0":
11071 13
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11072 13
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11073 13
          break;
11074
11075
        /** @noinspection PhpMissingBreakStatementInspection */
11076 12
        case "\xF0":
11077
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
11078 12
        case "\xE0":
11079 10
          $str[$j] = $noCharFound;
11080 10
          $i += 2;
11081 10
          break;
11082
11083
        default:
11084 12
          $str[$j] = $str[$i];
11085
      }
11086
    }
11087
11088 13
    $return = self::substr_in_byte($str, 0, $j);
11089 13
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
11090
      $return = '';
11091
    }
11092
11093
    if (
11094 13
        $keepUtf8Chars === true
11095
        &&
11096 13
        self::strlen($return) >= self::strlen($str_backup)
11097
    ) {
11098 2
      return $str_backup;
11099
    }
11100
11101 13
    return $return;
11102
  }
11103
11104
  /**
11105
   * Encodes an ISO-8859-1 string to UTF-8.
11106
   *
11107
   * @param string $str <p>The input string.</p>
11108
   *
11109
   * @return string
11110
   */
11111 14
  public static function utf8_encode(string $str): string
11112
  {
11113 14
    if ('' === $str) {
11114 13
      return '';
11115
    }
11116
11117 14
    $str = \utf8_encode($str);
11118
11119
    // the polyfill maybe return false
11120
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11121 14
    if ($str === false) {
11122
      return '';
11123
    }
11124
11125 14
    if (false === \strpos($str, "\xC2")) {
11126 6
      return $str;
11127
    }
11128
11129 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
11130 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
11131
11132 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
11133
11134 1
      if (self::$WIN1252_TO_UTF8 === null) {
11135
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type array|null. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
11136
      }
11137
11138 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11138
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11139 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11139
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
11140
    }
11141
11142 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
11143
  }
11144
11145
  /**
11146
   * fix -> utf8-win1252 chars
11147
   *
11148
   * @param string $str <p>The input string.</p>
11149
   *
11150
   * @return string
11151
   *
11152
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
11153
   */
11154 2
  public static function utf8_fix_win1252_chars(string $str): string
11155
  {
11156 2
    return self::fix_simple_utf8($str);
11157
  }
11158
11159
  /**
11160
   * Returns an array with all utf8 whitespace characters.
11161
   *
11162
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11163
   *
11164
   * @author: Derek E. [email protected]
11165
   *
11166
   * @return string[]
11167
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
11168
   *                 as defined in above URL.
11169
   */
11170 2
  public static function whitespace_table(): array
11171
  {
11172 2
    return self::$WHITESPACE_TABLE;
11173
  }
11174
11175
  /**
11176
   * Limit the number of words in a string.
11177
   *
11178
   * @param string $str      <p>The input string.</p>
11179
   * @param int    $limit    <p>The limit of words as integer.</p>
11180
   * @param string $strAddOn <p>Replacement for the striped string.</p>
11181
   *
11182
   * @return string
11183
   */
11184 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
11185
  {
11186 2
    if ('' === $str) {
11187 2
      return '';
11188
    }
11189
11190 2
    if ($limit < 1) {
11191 2
      return '';
11192
    }
11193
11194 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
11195
11196
    if (
11197 2
        !isset($matches[0])
11198
        ||
11199 2
        self::strlen($str) === self::strlen($matches[0])
11200
    ) {
11201 2
      return $str;
11202
    }
11203
11204 2
    return self::rtrim($matches[0]) . $strAddOn;
11205
  }
11206
11207
  /**
11208
   * Wraps a string to a given number of characters
11209
   *
11210
   * @link  http://php.net/manual/en/function.wordwrap.php
11211
   *
11212
   * @param string $str   <p>The input string.</p>
11213
   * @param int    $width [optional] <p>The column width.</p>
11214
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11215
   * @param bool   $cut   [optional] <p>
11216
   *                      If the cut is set to true, the string is
11217
   *                      always wrapped at or before the specified width. So if you have
11218
   *                      a word that is larger than the given width, it is broken apart.
11219
   *                      </p>
11220
   *
11221
   * @return string The given string wrapped at the specified column.
11222
   */
11223 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
11224
  {
11225 10
    if ('' === $str || '' === $break) {
11226 3
      return '';
11227
    }
11228
11229 8
    $w = '';
11230 8
    $strSplit = \explode($break, $str);
11231 8
    if ($strSplit === false) {
11232
      $count = 0;
11233
    } else {
11234 8
      $count = \count($strSplit);
11235
    }
11236
11237 8
    $chars = [];
11238
    /** @noinspection ForeachInvariantsInspection */
11239 8
    for ($i = 0; $i < $count; ++$i) {
11240
11241 8
      if ($i) {
11242 1
        $chars[] = $break;
11243 1
        $w .= '#';
11244
      }
11245
11246 8
      $c = $strSplit[$i];
11247 8
      unset($strSplit[$i]);
11248
11249 8
      if ($c !== null) {
11250 8
        foreach (self::split($c) as $c) {
11251 8
          $chars[] = $c;
11252 8
          $w .= ' ' === $c ? ' ' : '?';
11253
        }
11254
      }
11255
    }
11256
11257 8
    $strReturn = '';
11258 8
    $j = 0;
11259 8
    $b = $i = -1;
11260 8
    $w = \wordwrap($w, $width, '#', $cut);
11261
11262 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
11263 6
      for (++$i; $i < $b; ++$i) {
11264 6
        $strReturn .= $chars[$j];
11265 6
        unset($chars[$j++]);
11266
      }
11267
11268 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
11269 3
        unset($chars[$j++]);
11270
      }
11271
11272 6
      $strReturn .= $break;
11273
    }
11274
11275 8
    return $strReturn . \implode('', $chars);
11276
  }
11277
11278
  /**
11279
   * Line-Wrap the string after $limit, but also after the next word.
11280
   *
11281
   * @param string $str
11282
   * @param int    $limit
11283
   *
11284
   * @return string
11285
   */
11286 1
  public static function wordwrap_per_line(string $str, int $limit): string
11287
  {
11288 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
11289
11290 1
    $string = '';
11291 1
    foreach ($strings as $value) {
11292 1
      if ($value === false) {
11293
        continue;
11294
      }
11295
11296 1
      $string .= wordwrap($value, $limit);
11297 1
      $string .= "\n";
11298
    }
11299
11300 1
    return $string;
11301
  }
11302
11303
  /**
11304
   * Returns an array of Unicode White Space characters.
11305
   *
11306
   * @return string[] An array with numeric code point as key and White Space Character as value.
11307
   */
11308 2
  public static function ws(): array
11309
  {
11310 2
    return self::$WHITESPACE;
11311
  }
11312
11313
11314
}
11315