Passed
Push — master ( f6be5f...def38e )
by Lars
02:57
created

UTF8   F

Complexity

Total Complexity 1532

Size/Duplication

Total Lines 11019
Duplicated Lines 0 %

Test Coverage

Coverage 83.91%

Importance

Changes 0
Metric Value
eloc 3749
dl 0
loc 11019
ccs 2753
cts 3281
cp 0.8391
rs 0.8
c 0
b 0
f 0
wmc 1532

287 Methods

Rating   Name   Duplication   Size   Complexity  
A chr_to_decimal() 0 30 6
A str_substr_after_first_separator() 0 20 4
A file_has_bom() 0 8 2
A max() 0 14 3
A str_camelize() 0 26 2
A add_bom_to_string() 0 7 2
A parse_str() 0 20 5
A filter_input() 0 9 2
A str_contains() 0 16 6
B str_to_lines() 0 27 7
B substr_in_byte() 0 26 7
A array_change_key_case() 0 22 5
A get_unique_string() 0 15 2
A is_bom() 0 9 3
A is_hexadecimal() 0 3 1
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 30 5
A substr_left() 0 19 5
A count_chars() 0 3 1
F strlen() 0 99 21
A str_isubstr_last() 0 16 4
A ctype_loaded() 0 3 1
A str_replace_beginning() 0 6 1
A has_uppercase() 0 3 1
A remove_left() 0 13 2
C stripos() 0 57 15
A str_offset_exists() 0 10 2
F strrchr() 0 92 20
A str_iends_with() 0 11 4
A max_chr_width() 0 8 2
A isBinary() 0 3 1
F utf8_decode() 0 75 16
A ltrim() 0 15 4
D is_utf8() 0 146 30
A remove_html() 0 3 1
A str_longest_common_suffix() 0 16 3
C wordwrap() 0 53 13
A ucfirst() 0 20 3
A lcword() 0 3 1
A str_pad_both() 0 5 1
A str_index_last() 0 7 1
A str_substr_last() 0 16 4
A mbstring_loaded() 0 9 3
A str_limit() 0 15 4
F chr() 0 88 21
A html_escape() 0 6 1
A toUTF8() 0 3 1
A string() 0 10 1
C normalize_encoding() 0 124 12
B rxClass() 0 39 8
A apply_padding() 0 19 5
C get_file_type() 0 61 12
A str_ensure_right() 0 7 2
A chr_to_int() 0 3 1
B str_titleize_for_humans() 0 127 5
C is_utf16() 0 61 15
A isHtml() 0 3 1
C filter() 0 53 13
A normalize_whitespace() 0 31 6
A str_starts_with() 0 11 4
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 12 2
A decode_mimeheader() 0 19 6
C substr_count_in_byte() 0 51 15
A html_decode() 0 3 1
A strchr() 0 3 1
A strichr() 0 3 1
A isUtf32() 0 3 1
A str_index_first() 0 7 1
A strlen_in_byte() 0 16 4
A rtrim() 0 15 4
B str_longest_common_substring() 0 40 8
A regex_replace() 0 18 3
A chunk_split() 0 3 1
A titlecase() 0 8 3
A getData() 0 9 2
A str_iindex_first() 0 7 1
B strtolower() 0 48 10
A urldecode() 0 28 4
A str_isubstr_before_first_separator() 0 16 4
A strrev() 0 13 3
A replace_all() 0 7 2
D substr_replace() 0 81 18
A removeBOM() 0 3 1
A strstr_in_byte() 0 16 5
A str_matches_pattern() 0 7 2
A is_alpha() 0 3 1
A str_titleize() 0 17 3
A ws() 0 3 1
A get_random_string() 0 25 4
A str_replace_first() 0 8 2
A fix_utf8() 0 20 4
A toLatin1() 0 3 1
A str_pad_right() 0 3 1
B ucwords() 0 56 11
A first_char() 0 12 3
A to_boolean() 0 33 4
D stristr() 0 63 18
A isUtf8() 0 3 1
A strncasecmp() 0 6 1
B strwidth() 0 40 8
A css_stripe_media_queries() 0 6 1
A trim() 0 15 4
A clean() 0 47 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 3 1
A is_uppercase() 0 3 1
A substr_compare() 0 25 6
F substr_count() 0 74 19
A strnatcmp() 0 3 2
B str_pad() 0 56 11
A str_ireplace() 0 17 3
A to_latin1() 0 3 1
A str_replace_ending() 0 6 1
A string_has_bom() 0 9 3
B strtr() 0 30 7
B str_contains_all() 0 22 7
A is_ascii() 0 7 2
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 20 4
B range() 0 46 11
B strspn() 0 15 7
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
A rawurldecode() 0 28 4
B str_capitalize_name_helper() 0 78 10
A utf8_encode() 0 32 6
A normalize_msword() 0 20 4
C str_detect_encoding() 0 116 14
A spaces_to_tabs() 0 3 1
A str_istarts_with() 0 11 4
A is_blank() 0 3 1
A str_replace() 0 3 1
A substr_iright() 0 19 5
D getCharDirection() 0 114 119
A htmlspecialchars() 0 7 3
A replace() 0 7 2
A filter_var_array() 0 9 2
A __construct() 0 3 1
A decimal_to_chr() 0 3 1
A to_iso8859() 0 16 4
A words_limit() 0 21 5
A strip_tags() 0 11 3
A pcre_utf8_support() 0 4 1
A between() 0 24 5
A str_isubstr_before_last_separator() 0 16 4
A str_truncate_safe() 0 29 6
A codepoints() 0 29 4
A substr_right() 0 19 5
A lowerCaseFirst() 0 3 1
B str_split() 0 39 8
A str_ends_with_any() 0 13 4
A chr_map() 0 5 1
A strrpos_in_byte() 0 16 5
A cleanup() 0 28 2
F strrpos() 0 116 27
A remove_right() 0 12 2
A remove_html_breaks() 0 3 1
A showSupport() 0 11 3
A char_at() 0 3 1
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 19 6
A chars() 0 3 1
A str_replace_last() 0 8 2
A str_iindex_last() 0 7 1
A str_substr_before_last_separator() 0 20 4
C is_binary() 0 48 12
A intlChar_loaded() 0 3 1
A strtocasefold() 0 20 3
A lcfirst() 0 14 2
A tabs_to_spaces() 0 3 1
A finfo_loaded() 0 3 1
A str_truncate() 0 20 3
F strripos() 0 91 20
A strpos_in_byte() 0 16 5
A str_ends_with() 0 7 3
A fits_inside() 0 3 1
F to_ascii() 0 150 28
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A reduce_string_array() 0 26 6
A str_longest_common_prefix() 0 16 3
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 3 1
A str_substr_first() 0 21 4
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 22 4
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 17 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 20 4
A str_isubstr_after_first_separator() 0 20 4
D extract_text() 0 109 20
A json_loaded() 0 3 1
A isBom() 0 3 1
A str_snakeize() 0 38 2
A int_to_chr() 0 3 1
A is_lowercase() 0 7 2
A str_sort() 0 15 3
F to_utf8() 0 93 32
A ucword() 0 3 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A hasBom() 0 3 1
A toAscii() 0 3 1
A str_capitalize_name() 0 8 1
A str_limit_after_word() 0 30 6
A iconv_loaded() 0 3 2
B lcwords() 0 37 8
A str_upper_first() 0 3 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 5
A filter_var() 0 9 2
A substr_ileft() 0 19 5
A is_empty() 0 3 1
B html_encode() 0 39 7
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 7 2
F encode() 0 135 42
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C is_utf32() 0 61 15
C ord() 0 56 15
A is_alphanumeric() 0 3 1
A strtonatfold() 0 4 1
A json_decode() 0 16 3
A fix_simple_utf8() 0 20 4
B strcspn() 0 23 7
A checkForSupport() 0 42 4
A is_json() 0 24 6
A fixStrCaseHelper() 0 34 5
A int_to_hex() 0 7 2
B str_split_pattern() 0 31 7
F strstr() 0 87 19
A has_lowercase() 0 3 1
A json_encode() 0 16 3
A str_isubstr_first() 0 21 4
A is_base64() 0 13 4
A str_last_char() 0 9 3
A hex_to_int() 0 14 3
A htmlentities() 0 19 3
A hex_to_chr() 0 3 1
A str_substr_before_first_separator() 0 20 4
F substr() 0 137 30
A isJson() 0 3 1
A wordwrap_per_line() 0 15 3
A str_surround() 0 3 1
A strncmp() 0 6 1
A filter_input_array() 0 9 2
A str_insert() 0 12 2
A getSupportInfo() 0 15 4
A utf8_fix_win1252_chars() 0 3 1
B replace_diamond_question_mark() 0 45 7
A chr_to_hex() 0 11 3
A str_delimit() 0 9 1
A to_utf8_convert_helper() 0 27 5
B strtoupper() 0 48 10
A min() 0 14 3
A collapse_whitespace() 0 4 1
D html_entity_decode() 0 87 18
A str_starts_with_any() 0 17 5
B strrichr() 0 49 11
D split() 0 110 24
A str_istarts_with_any() 0 17 5
A str_contains_any() 0 13 4
A remove_duplicates() 0 14 4
A str_slice() 0 13 5
A access() 0 11 3
F strpos() 0 134 31
A str_shuffle() 0 12 2
A strcmp() 0 6 2
B file_get_contents() 0 55 10
A str_word_count() 0 29 5
A strripos_in_byte() 0 16 5
A str_to_binary() 0 5 1
A callback() 0 3 1
B symfony_polyfill_used() 0 16 7
A binary_to_str() 0 12 3
A bom() 0 3 1
B str_to_words() 0 33 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
15
  // This regular expression is a work around for http://bugs.exim.org/1279
16
  const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
17
18
  /**
19
   * Bom => Byte-Length
20
   *
21
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
22
   *
23
   * @var array
24
   */
25
  private static $BOM = [
26
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
27
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
28
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
29
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
30
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
31
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
32
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
33
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
34
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
35
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
36
  ];
37
38
  /**
39
   * Numeric code point => UTF-8 Character
40
   *
41
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
42
   *
43
   * @var array
44
   */
45
  private static $WHITESPACE = [
46
    // NUL Byte
47
    0     => "\x0",
48
    // Tab
49
    9     => "\x9",
50
    // New Line
51
    10    => "\xa",
52
    // Vertical Tab
53
    11    => "\xb",
54
    // Carriage Return
55
    13    => "\xd",
56
    // Ordinary Space
57
    32    => "\x20",
58
    // NO-BREAK SPACE
59
    160   => "\xc2\xa0",
60
    // OGHAM SPACE MARK
61
    5760  => "\xe1\x9a\x80",
62
    // MONGOLIAN VOWEL SEPARATOR
63
    6158  => "\xe1\xa0\x8e",
64
    // EN QUAD
65
    8192  => "\xe2\x80\x80",
66
    // EM QUAD
67
    8193  => "\xe2\x80\x81",
68
    // EN SPACE
69
    8194  => "\xe2\x80\x82",
70
    // EM SPACE
71
    8195  => "\xe2\x80\x83",
72
    // THREE-PER-EM SPACE
73
    8196  => "\xe2\x80\x84",
74
    // FOUR-PER-EM SPACE
75
    8197  => "\xe2\x80\x85",
76
    // SIX-PER-EM SPACE
77
    8198  => "\xe2\x80\x86",
78
    // FIGURE SPACE
79
    8199  => "\xe2\x80\x87",
80
    // PUNCTUATION SPACE
81
    8200  => "\xe2\x80\x88",
82
    // THIN SPACE
83
    8201  => "\xe2\x80\x89",
84
    //HAIR SPACE
85
    8202  => "\xe2\x80\x8a",
86
    // LINE SEPARATOR
87
    8232  => "\xe2\x80\xa8",
88
    // PARAGRAPH SEPARATOR
89
    8233  => "\xe2\x80\xa9",
90
    // NARROW NO-BREAK SPACE
91
    8239  => "\xe2\x80\xaf",
92
    // MEDIUM MATHEMATICAL SPACE
93
    8287  => "\xe2\x81\x9f",
94
    // IDEOGRAPHIC SPACE
95
    12288 => "\xe3\x80\x80",
96
  ];
97
98
  /**
99
   * @var array
100
   */
101
  private static $WHITESPACE_TABLE = [
102
      'SPACE'                     => "\x20",
103
      'NO-BREAK SPACE'            => "\xc2\xa0",
104
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
105
      'EN QUAD'                   => "\xe2\x80\x80",
106
      'EM QUAD'                   => "\xe2\x80\x81",
107
      'EN SPACE'                  => "\xe2\x80\x82",
108
      'EM SPACE'                  => "\xe2\x80\x83",
109
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
110
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
111
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
112
      'FIGURE SPACE'              => "\xe2\x80\x87",
113
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
114
      'THIN SPACE'                => "\xe2\x80\x89",
115
      'HAIR SPACE'                => "\xe2\x80\x8a",
116
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
117
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
118
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
119
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
120
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
121
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
122
  ];
123
124
  /**
125
   * bidirectional text chars
126
   *
127
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
128
   *
129
   * @var array
130
   */
131
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
132
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
133
    8234 => "\xE2\x80\xAA",
134
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
135
    8235 => "\xE2\x80\xAB",
136
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
137
    8236 => "\xE2\x80\xAC",
138
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
139
    8237 => "\xE2\x80\xAD",
140
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
141
    8238 => "\xE2\x80\xAE",
142
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
143
    8294 => "\xE2\x81\xA6",
144
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
145
    8295 => "\xE2\x81\xA7",
146
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
147
    8296 => "\xE2\x81\xA8",
148
    // POP DIRECTIONAL ISOLATE
149
    8297 => "\xE2\x81\xA9",
150
  ];
151
152
  /**
153
   * @var array
154
   */
155
  private static $COMMON_CASE_FOLD = [
156
      'upper' => [
157
          'µ',
158
          'ſ',
159
          "\xCD\x85",
160
          'ς',
161
          'ẞ',
162
          "\xCF\x90",
163
          "\xCF\x91",
164
          "\xCF\x95",
165
          "\xCF\x96",
166
          "\xCF\xB0",
167
          "\xCF\xB1",
168
          "\xCF\xB5",
169
          "\xE1\xBA\x9B",
170
          "\xE1\xBE\xBE",
171
      ],
172
      'lower' => [
173
          'μ',
174
          's',
175
          'ι',
176
          'σ',
177
          'ß',
178
          'β',
179
          'θ',
180
          'φ',
181
          'π',
182
          'κ',
183
          'ρ',
184
          'ε',
185
          "\xE1\xB9\xA1",
186
          'ι',
187
      ],
188
  ];
189
190
191
  /**
192
   * @var array
193
   */
194
  private static $SUPPORT = [];
195
196
  /**
197
   * @var null|array
198
   */
199
  private static $UTF8_MSWORD;
200
201
  /**
202
   * @var null|array
203
   */
204
  private static $BROKEN_UTF8_FIX;
205
206
  /**
207
   * @var null|array
208
   */
209
  private static $WIN1252_TO_UTF8;
210
211
  /**
212
   * @var null|array
213
   */
214
  private static $ENCODINGS;
215
216
  /**
217
   * @var null|array
218
   */
219
  private static $ORD;
220
221
  /**
222
   * @var null|array
223
   */
224
  private static $CHR;
225
226
  /**
227
   * __construct()
228
   */
229 32
  public function __construct()
230
  {
231 32
    self::checkForSupport();
232 32
  }
233
234
  /**
235
   * Return the character at the specified position: $str[1] like functionality.
236
   *
237
   * @param string $str <p>A UTF-8 string.</p>
238
   * @param int    $pos <p>The position of character to return.</p>
239
   *
240
   * @return string Single Multi-Byte character.
241
   */
242 3
  public static function access(string $str, int $pos): string
243
  {
244 3
    if ('' === $str) {
245 1
      return '';
246
    }
247
248 3
    if ($pos < 0) {
249 2
      return '';
250
    }
251
252 3
    return (string)self::substr($str, $pos, 1);
253
  }
254
255
  /**
256
   * Prepends UTF-8 BOM character to the string and returns the whole string.
257
   *
258
   * INFO: If BOM already existed there, the Input string is returned.
259
   *
260
   * @param string $str <p>The input string.</p>
261
   *
262
   * @return string The output string that contains BOM.
263
   */
264 2
  public static function add_bom_to_string(string $str): string
265
  {
266 2
    if (self::string_has_bom($str) === false) {
267 2
      $str = self::bom() . $str;
268
    }
269
270 2
    return $str;
271
  }
272
273
  /**
274
   * Adds the specified amount of left and right padding to the given string.
275
   * The default character used is a space.
276
   *
277
   * @param string $str
278
   * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
279
   * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
280
   * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
281
   * @param string $encoding [optional] <p>Default: UTF-8</p>
282
   *
283
   * @return string String with padding applied.
284
   */
285 25
  private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
286
  {
287 25
    $strlen = self::strlen($str, $encoding);
288
289 25
    if ($left && $right) {
290 8
      $length = ($left + $right) + $strlen;
291 8
      $type = STR_PAD_BOTH;
292 17
    } elseif ($left) {
293 7
      $length = $left + $strlen;
294 7
      $type = STR_PAD_LEFT;
295 10
    } elseif ($right) {
296 10
      $length = $right + $strlen;
297 10
      $type = STR_PAD_RIGHT;
298
    } else {
299
      $length = ($left + $right) + $strlen;
300
      $type = STR_PAD_BOTH;
301
    }
302
303 25
    return self::str_pad($str, $length, $padStr, $type, $encoding);
304
  }
305
306
  /**
307
   * Changes all keys in an array.
308
   *
309
   * @param array $array <p>The array to work on</p>
310
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
311
   *                     or <strong>CASE_LOWER</strong> (default)</p>
312
   *
313
   * @return string[] An array with its keys lower or uppercased.
314
   */
315 2
  public static function array_change_key_case(array $array, int $case = CASE_LOWER): array
316
  {
317
    if (
318 2
        $case !== CASE_LOWER
319
        &&
320 2
        $case !== CASE_UPPER
321
    ) {
322
      $case = CASE_LOWER;
323
    }
324
325 2
    $return = [];
326 2
    foreach ($array as $key => $value) {
327 2
      if ($case === CASE_LOWER) {
328 2
        $key = self::strtolower($key);
329
      } else {
330 2
        $key = self::strtoupper($key);
331
      }
332
333 2
      $return[$key] = $value;
334
    }
335
336 2
    return $return;
337
  }
338
339
  /**
340
   * Returns the substring between $start and $end, if found, or an empty
341
   * string. An optional offset may be supplied from which to begin the
342
   * search for the start string.
343
   *
344
   * @param string $str
345
   * @param string $start    <p>Delimiter marking the start of the substring.</p>
346
   * @param string $end      <p>Delimiter marking the end of the substring.</p>
347
   * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
349
   *
350
   * @return string
351
   */
352 16
  public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
353
  {
354 16
    $posStart = self::strpos($str, $start, $offset, $encoding);
355 16
    if ($posStart === false) {
356 2
      return '';
357
    }
358
359 14
    $substrIndex = $posStart + self::strlen($start, $encoding);
360 14
    $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
361
    if (
362 14
        $posEnd === false
363
        ||
364 14
        $posEnd === $substrIndex
365
    ) {
366 4
      return '';
367
    }
368
369 10
    $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
370
371 10
    if ($return === false) {
372
      return '';
373
    }
374
375 10
    return $return;
376
  }
377
378
  /**
379
   * Convert binary into an string.
380
   *
381
   * @param mixed $bin 1|0
382
   *
383
   * @return string
384
   */
385 2
  public static function binary_to_str($bin): string
386
  {
387 2
    if (!isset($bin[0])) {
388
      return '';
389
    }
390
391 2
    $convert = \base_convert($bin, 2, 16);
392 2
    if ($convert === '0') {
393 1
      return '';
394
    }
395
396 2
    return \pack('H*', $convert);
397
  }
398
399
  /**
400
   * Returns the UTF-8 Byte Order Mark Character.
401
   *
402
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
   *
404
   * @return string UTF-8 Byte Order Mark
405
   */
406 4
  public static function bom(): string
407
  {
408 4
    return "\xef\xbb\xbf";
409
  }
410
411
  /**
412
   * @alias of UTF8::chr_map()
413
   *
414
   * @see   UTF8::chr_map()
415
   *
416
   * @param string|array $callback
417
   * @param string       $str
418
   *
419
   * @return string[]
420
   */
421 2
  public static function callback($callback, string $str): array
422
  {
423 2
    return self::chr_map($callback, $str);
424
  }
425
426
  /**
427
   * Returns the character at $index, with indexes starting at 0.
428
   *
429
   * @param string $str
430
   * @param int    $index    <p>Position of the character.</p>
431
   * @param string $encoding [optional] <p>Default is UTF-8</p>
432
   *
433
   * @return string The character at $index.
434
   */
435 9
  public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
  {
437 9
    return (string)self::substr($str, $index, 1, $encoding);
438
  }
439
440
  /**
441
   * Returns an array consisting of the characters in the string.
442
   *
443
   * @param string $str <p>The input string.</p>
444
   *
445
   * @return string[] An array of chars.
446
   */
447 3
  public static function chars(string $str): array
448
  {
449 3
    return self::str_split($str, 1);
450
  }
451
452
  /**
453
   * This method will auto-detect your server environment for UTF-8 support.
454
   *
455
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
456
   */
457 37
  public static function checkForSupport()
458
  {
459 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
460
461
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
462
463
      // http://php.net/manual/en/book.mbstring.php
464
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
465
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
466
467
      // http://php.net/manual/en/book.iconv.php
468
      self::$SUPPORT['iconv'] = self::iconv_loaded();
469
470
      // http://php.net/manual/en/book.intl.php
471
      self::$SUPPORT['intl'] = self::intl_loaded();
472
      self::$SUPPORT['intl__transliterator_list_ids'] = [];
473
474
      self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
475
476
      if (
477
          self::$SUPPORT['intl'] === true
478
          &&
479
          \function_exists('transliterator_list_ids') === true
480
      ) {
481
        /** @noinspection PhpComposerExtensionStubsInspection */
482
        self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
483
      }
484
485
      // http://php.net/manual/en/class.intlchar.php
486
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
487
488
      // http://php.net/manual/en/book.ctype.php
489
      self::$SUPPORT['ctype'] = self::ctype_loaded();
490
491
      // http://php.net/manual/en/class.finfo.php
492
      self::$SUPPORT['finfo'] = self::finfo_loaded();
493
494
      // http://php.net/manual/en/book.json.php
495
      self::$SUPPORT['json'] = self::json_loaded();
496
497
      // http://php.net/manual/en/book.pcre.php
498
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
499
    }
500 37
  }
501
502
  /**
503
   * Generates a UTF-8 encoded character from the given code point.
504
   *
505
   * INFO: opposite to UTF8::ord()
506
   *
507
   * @param int|string $code_point <p>The code point for which to generate a character.</p>
508
   * @param string     $encoding   [optional] <p>Default is UTF-8</p>
509
   *
510
   * @return string|null Multi-Byte character, returns null on failure or empty input.
511
   */
512 17
  public static function chr($code_point, string $encoding = 'UTF-8')
513
  {
514
    // init
515 17
    static $CHAR_CACHE = [];
516
517 17
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
518
      self::checkForSupport();
519
    }
520
521 17
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
522 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
523
    }
524
525
    if (
526 17
        $encoding !== 'UTF-8'
527
        &&
528 17
        $encoding !== 'ISO-8859-1'
529
        &&
530 17
        $encoding !== 'WINDOWS-1252'
531
        &&
532 17
        self::$SUPPORT['mbstring'] === false
533
    ) {
534
      \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
535
    }
536
537 17
    $cacheKey = $code_point . $encoding;
538 17
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
539 16
      return $CHAR_CACHE[$cacheKey];
540
    }
541
542 11
    if ($code_point <= 127) { // use "simple"-char only until "\x80"
543
544 10
      if (self::$CHR === null) {
545
        $chrTmp = self::getData('chr');
546
        if ($chrTmp) {
547
          self::$CHR = (array)$chrTmp;
548
        }
549
      }
550
551 10
      $chr = self::$CHR[$code_point];
552
553 10
      if ($encoding !== 'UTF-8') {
554 1
        $chr = self::encode($encoding, $chr);
555
      }
556
557 10
      return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560 7
    if (self::$SUPPORT['intlChar'] === true) {
561
      /** @noinspection PhpComposerExtensionStubsInspection */
562 7
      $chr = \IntlChar::chr($code_point);
563
564 7
      if ($encoding !== 'UTF-8') {
565
        $chr = self::encode($encoding, $chr);
566
      }
567
568 7
      return $CHAR_CACHE[$cacheKey] = $chr;
569
    }
570
571
    if (self::$CHR === null) {
572
      $chrTmp = self::getData('chr');
573
      if ($chrTmp) {
574
        self::$CHR = (array)$chrTmp;
575
      }
576
    }
577
578
    $code_point = (int)$code_point;
579
    if ($code_point <= 0x7F) {
580
      $chr = self::$CHR[$code_point];
581
    } elseif ($code_point <= 0x7FF) {
582
      $chr = self::$CHR[($code_point >> 6) + 0xC0] .
583
             self::$CHR[($code_point & 0x3F) + 0x80];
584
    } elseif ($code_point <= 0xFFFF) {
585
      $chr = self::$CHR[($code_point >> 12) + 0xE0] .
586
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
587
             self::$CHR[($code_point & 0x3F) + 0x80];
588
    } else {
589
      $chr = self::$CHR[($code_point >> 18) + 0xF0] .
590
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
591
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
592
             self::$CHR[($code_point & 0x3F) + 0x80];
593
    }
594
595
    if ($encoding !== 'UTF-8') {
596
      $chr = self::encode($encoding, $chr);
597
    }
598
599
    return $CHAR_CACHE[$cacheKey] = $chr;
600
  }
601
602
  /**
603
   * Applies callback to all characters of a string.
604
   *
605
   * @param string|array $callback <p>The callback function.</p>
606
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
607
   *
608
   * @return string[] The outcome of callback.
609
   */
610 2
  public static function chr_map($callback, string $str): array
611
  {
612 2
    $chars = self::split($str);
613
614 2
    return \array_map($callback, $chars);
615
  }
616
617
  /**
618
   * Generates an array of byte length of each character of a Unicode string.
619
   *
620
   * 1 byte => U+0000  - U+007F
621
   * 2 byte => U+0080  - U+07FF
622
   * 3 byte => U+0800  - U+FFFF
623
   * 4 byte => U+10000 - U+10FFFF
624
   *
625
   * @param string $str <p>The original unicode string.</p>
626
   *
627
   * @return int[] An array of byte lengths of each character.
628
   */
629 4
  public static function chr_size_list(string $str): array
630
  {
631 4
    if ('' === $str) {
632 4
      return [];
633
    }
634
635 4
    $strSplit = self::split($str);
636
637 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
638
      self::checkForSupport();
639
    }
640
641 4
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
642
      return \array_map(
643
          function ($data) {
644
            return UTF8::strlen_in_byte($data);
645
          },
646
          $strSplit
647
      );
648
    }
649
650 4
    return \array_map('\strlen', $strSplit);
651
  }
652
653
  /**
654
   * Get a decimal code representation of a specific character.
655
   *
656
   * @param string $char <p>The input character.</p>
657
   *
658
   * @return int
659
   */
660 4
  public static function chr_to_decimal(string $char): int
661
  {
662 4
    $code = self::ord($char[0]);
663 4
    $bytes = 1;
664
665 4
    if (!($code & 0x80)) {
666
      // 0xxxxxxx
667 4
      return $code;
668
    }
669
670 4
    if (($code & 0xe0) === 0xc0) {
671
      // 110xxxxx
672 4
      $bytes = 2;
673 4
      $code &= ~0xc0;
674 4
    } elseif (($code & 0xf0) === 0xe0) {
675
      // 1110xxxx
676 4
      $bytes = 3;
677 4
      $code &= ~0xe0;
678 2
    } elseif (($code & 0xf8) === 0xf0) {
679
      // 11110xxx
680 2
      $bytes = 4;
681 2
      $code &= ~0xf0;
682
    }
683
684 4
    for ($i = 2; $i <= $bytes; $i++) {
685
      // 10xxxxxx
686 4
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
687
    }
688
689 4
    return $code;
690
  }
691
692
  /**
693
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
694
   *
695
   * @param string|int $char <p>The input character</p>
696
   * @param string     $pfix [optional]
697
   *
698
   * @return string The code point encoded as U+xxxx
699
   */
700 2
  public static function chr_to_hex($char, string $pfix = 'U+'): string
701
  {
702 2
    if ('' === $char) {
703 2
      return '';
704
    }
705
706 2
    if ($char === '&#0;') {
707 2
      $char = '';
708
    }
709
710 2
    return self::int_to_hex(self::ord($char), $pfix);
711
  }
712
713
  /**
714
   * alias for "UTF8::chr_to_decimal()"
715
   *
716
   * @see UTF8::chr_to_decimal()
717
   *
718
   * @param string $chr
719
   *
720
   * @return int
721
   */
722 2
  public static function chr_to_int(string $chr): int
723
  {
724 2
    return self::chr_to_decimal($chr);
725
  }
726
727
  /**
728
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
729
   *
730
   * @param string $body     <p>The original string to be split.</p>
731
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
732
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
733
   *
734
   * @return string The chunked string.
735
   */
736 4
  public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
737
  {
738 4
    return \implode($end, self::split($body, $chunklen));
739
  }
740
741
  /**
742
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
743
   *
744
   * @param string $str                           <p>The string to be sanitized.</p>
745
   * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
746
   * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
747
   *                                              whitespace.</p>
748
   * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
749
   *                                              e.g.: "…"
750
   *                                              => "..."</p>
751
   * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
752
   *                                              combination with
753
   *                                              $normalize_whitespace</p>
754
   * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
755
   *                                              mark e.g.: "�"</p>
756
   * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
757
   *                                              characters e.g.: "\0"</p>
758
   *
759
   * @return string Clean UTF-8 encoded string.
760
   */
761 111
  public static function clean(
762
      string $str,
763
      bool $remove_bom = false,
764
      bool $normalize_whitespace = false,
765
      bool $normalize_msword = false,
766
      bool $keep_non_breaking_space = false,
767
      bool $replace_diamond_question_mark = false,
768
      bool $remove_invisible_characters = true
769
  ): string
770
  {
771
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
772
    // caused connection reset problem on larger strings
773
774 111
    $regx = '/
775
      (
776
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
777
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
778
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
779
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
780
        ){1,100}                      # ...one or more times
781
      )
782
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
783
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
784
    /x';
785 111
    $str = (string)\preg_replace($regx, '$1', $str);
786
787 111
    if ($replace_diamond_question_mark === true) {
788 61
      $str = self::replace_diamond_question_mark($str, '');
789
    }
790
791 111
    if ($remove_invisible_characters === true) {
792 111
      $str = self::remove_invisible_characters($str);
793
    }
794
795 111
    if ($normalize_whitespace === true) {
796 65
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
797
    }
798
799 111
    if ($normalize_msword === true) {
800 33
      $str = self::normalize_msword($str);
801
    }
802
803 111
    if ($remove_bom === true) {
804 63
      $str = self::remove_bom($str);
805
    }
806
807 111
    return $str;
808
  }
809
810
  /**
811
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
812
   *
813
   * @param string $str <p>The input string.</p>
814
   *
815
   * @return string
816
   */
817 33
  public static function cleanup($str): string
818
  {
819
    // init
820 33
    $str = (string)$str;
821
822 33
    if ('' === $str) {
823 5
      return '';
824
    }
825
826
    // fixed ISO <-> UTF-8 Errors
827 33
    $str = self::fix_simple_utf8($str);
828
829
    // remove all none UTF-8 symbols
830
    // && remove diamond question mark (�)
831
    // && remove remove invisible characters (e.g. "\0")
832
    // && remove BOM
833
    // && normalize whitespace chars (but keep non-breaking-spaces)
834 33
    $str = self::clean(
835 33
        $str,
836 33
        true,
837 33
        true,
838 33
        false,
839 33
        true,
840 33
        true,
841 33
        true
842
    );
843
844 33
    return $str;
845
  }
846
847
  /**
848
   * Accepts a string or a array of strings and returns an array of Unicode code points.
849
   *
850
   * INFO: opposite to UTF8::string()
851
   *
852
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
853
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
854
   *                                    default, code points will be returned as integers.</p>
855
   *
856
   * @return array<int|string>
857
   *                           The array of code points:<br>
858
   *                           array<int> for $u_style === false<br>
859
   *                           array<string> for $u_style === true<br>
860
   */
861 12
  public static function codepoints($arg, bool $u_style = false): array
862
  {
863 12
    if (\is_string($arg) === true) {
864 12
      $arg = self::split($arg);
865
    }
866
867 12
    $arg = \array_map(
868
        [
869 12
            self::class,
870
            'ord',
871
        ],
872 12
        $arg
873
    );
874
875 12
    if (\count($arg) === 0) {
876 7
      return [];
877
    }
878
879 11
    if ($u_style) {
880 2
      $arg = \array_map(
881
          [
882 2
              self::class,
883
              'int_to_hex',
884
          ],
885 2
          $arg
886
      );
887
    }
888
889 11
    return $arg;
890
  }
891
892
  /**
893
   * Trims the string and replaces consecutive whitespace characters with a
894
   * single space. This includes tabs and newline characters, as well as
895
   * multibyte whitespace such as the thin space and ideographic space.
896
   *
897
   * @param string $str <p>The input string.</p>
898
   *
899
   * @return string String with a trimmed $str and condensed whitespace.
900
   */
901 13
  public static function collapse_whitespace(string $str): string
902
  {
903 13
    return self::trim(
904 13
        self::regex_replace($str, '[[:space:]]+', ' ')
905
    );
906
  }
907
908
  /**
909
   * Returns count of characters used in a string.
910
   *
911
   * @param string $str       <p>The input string.</p>
912
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
913
   *
914
   * @return int[] An associative array of Character as keys and
915
   *               their count as values.
916
   */
917 18
  public static function count_chars(string $str, bool $cleanUtf8 = false): array
918
  {
919 18
    return \array_count_values(self::split($str, 1, $cleanUtf8));
920
  }
921
922
  /**
923
   * Remove css media-queries.
924
   *
925
   * @param string $str
926
   *
927
   * @return string
928
   */
929 1
  public static function css_stripe_media_queries(string $str): string
930
  {
931 1
    return (string)\preg_replace(
932 1
        '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
933 1
        '',
934 1
        $str
935
    );
936
  }
937
938
  /**
939
   * Checks whether ctype is available on the server.
940
   *
941
   * @return bool
942
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
943
   */
944
  public static function ctype_loaded(): bool
945
  {
946
    return \extension_loaded('ctype');
947
  }
948
949
  /**
950
   * Converts a int-value into an UTF-8 character.
951
   *
952
   * @param mixed $int
953
   *
954
   * @return string
955
   */
956 10
  public static function decimal_to_chr($int): string
957
  {
958 10
    return self::html_entity_decode('&#' . $int . ';', ENT_QUOTES | ENT_HTML5);
959
  }
960
961
  /**
962
   * Decodes a MIME header field
963
   *
964
   * @param string $str
965
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
966
   *
967
   * @return string|false
968
   *                      A decoded MIME field on success,
969
   *                      or false if an error occurs during the decoding.
970
   */
971
  public static function decode_mimeheader($str, $encoding = 'UTF-8')
972
  {
973
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
974
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
975
    }
976
977
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
978
      self::checkForSupport();
979
    }
980
981
    if (self::$SUPPORT['iconv'] === true) {
982
      return \iconv_mime_decode($str, ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
983
    }
984
985
    if ($encoding != 'UTF-8') {
986
      $str = self::encode($encoding, $str);
987
    }
988
989
    return \mb_decode_mimeheader($str);
990
  }
991
992
  /**
993
   * Encode a string with a new charset-encoding.
994
   *
995
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
996
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
997
   *
998
   * @param string      $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
999
   * @param string      $str                    <p>The input string</p>
1000
   * @param bool        $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1001
   *                                            encoding for UTF-8)<br> otherwise we auto-detect the current
1002
   *                                            string-encoding</p>
1003
   * @param string|null $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc. ... otherwise
1004
   *                                            we will autodetect the encoding anyway</p>
1005
   *
1006
   * @return string
1007
   */
1008 30
  public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = null): string
1009
  {
1010 30
    if ('' === $str || '' === $toEncoding) {
1011 12
      return $str;
1012
    }
1013
1014 30
    if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1015 8
      $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1016
    }
1017
1018 30
    if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1019
      $fromEncoding = self::normalize_encoding($fromEncoding, null);
1020
    }
1021
1022 30
    if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
1023
      return $str;
1024
    }
1025
1026 30
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1027
      self::checkForSupport();
1028
    }
1029
1030 30
    if ('BASE64' === $fromEncoding) {
1031
      $str = base64_decode($str);
1032
      $fromEncoding = null;
1033
    }
1034
1035 30
    if ('BASE64' === $toEncoding) {
1036
      return base64_encode($str);
1037
    }
1038
1039 30
    if ('HTML-ENTITIES' === $toEncoding || 'HTML' === $toEncoding) {
1040
1041 1
      if ('HTML-ENTITIES' === $fromEncoding || 'HTML' === $fromEncoding) {
1042
        $fromEncoding = null;
1043
      }
1044
1045 1
      if ('UTF-8' !== $fromEncoding) {
1046 1
        $str = self::encode('UTF-8', $str, false, $fromEncoding);
1047
      }
1048
1049 1
      return self::html_encode($str, true, $toEncoding);
1050
    }
1051
1052 30
    if ('HTML-ENTITIES' === $fromEncoding) {
1053
      $str = self::html_entity_decode($str, ENT_COMPAT, 'UTF-8');
1054
      $fromEncoding = 'UTF-8';
1055
    }
1056
1057 30
    $fromEncodingDetected = false;
1058
    if (
1059 30
        $autodetectFromEncoding === true
1060
        ||
1061 30
        !$fromEncoding
1062
    ) {
1063 30
      $fromEncodingDetected = self::str_detect_encoding($str);
1064
    }
1065
1066
    // DEBUG
1067
    //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1068
1069 30
    if ($fromEncodingDetected !== false) {
1070 25
      $fromEncoding = $fromEncodingDetected;
1071 7
    } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1072
      // fallback for the "autodetect"-mode
1073 7
      return self::to_utf8($str);
1074
    }
1075
1076
    if (
1077 25
        !$fromEncoding
1078
        ||
1079 25
        $fromEncoding === $toEncoding
1080
    ) {
1081 15
      return $str;
1082
    }
1083
1084
    if (
1085 19
        $toEncoding === 'UTF-8'
1086
        &&
1087
        (
1088 17
            $fromEncoding === 'WINDOWS-1252'
1089
            ||
1090 19
            $fromEncoding === 'ISO-8859-1'
1091
        )
1092
    ) {
1093 14
      return self::to_utf8($str);
1094
    }
1095
1096
    if (
1097 11
        $toEncoding === 'ISO-8859-1'
1098
        &&
1099
        (
1100 6
            $fromEncoding === 'WINDOWS-1252'
1101
            ||
1102 11
            $fromEncoding === 'UTF-8'
1103
        )
1104
    ) {
1105 6
      return self::to_iso8859($str);
1106
    }
1107
1108
    if (
1109 9
        $toEncoding !== 'UTF-8'
1110
        &&
1111 9
        $toEncoding !== 'ISO-8859-1'
1112
        &&
1113 9
        $toEncoding !== 'WINDOWS-1252'
1114
        &&
1115 9
        self::$SUPPORT['mbstring'] === false
1116
    ) {
1117
      \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', E_USER_WARNING);
1118
    }
1119
1120 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1121
      self::checkForSupport();
1122
    }
1123
1124 9
    if (self::$SUPPORT['mbstring'] === true) {
1125
      // info: do not use the symfony polyfill here
1126 9
      $strEncoded = \mb_convert_encoding(
1127 9
          $str,
1128 9
          $toEncoding,
1129 9
          ($autodetectFromEncoding === true ? $toEncoding : $fromEncoding)
1130
      );
1131
1132 9
      if ($strEncoded) {
1133 9
        return $strEncoded;
1134
      }
1135
    }
1136
1137
    $return = \iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
1138
    if ($return !== false) {
1139
      return $return;
1140
    }
1141
1142
    return $str;
1143
  }
1144
1145
  /**
1146
   * @param string $str
1147
   * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1148
   * @param string $toCharset        [optional] <p>Set the output charset.</p>
1149
   * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1150
   * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1151
   * @param int    $indent           [optional] <p>Set the max length indent.</p>
1152
   *
1153
   * @return string|false
1154
   *                      An encoded MIME field on success,
1155
   *                      or false if an error occurs during the encoding.
1156
   */
1157
  public static function encode_mimeheader(
1158
      $str,
1159
      $fromCharset = 'UTF-8',
1160
      $toCharset = 'UTF-8',
1161
      $transferEncoding = 'Q',
1162
      $linefeed = "\r\n",
1163
      $indent = 76
1164
  )
1165
  {
1166
    if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1167
      $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1168
    }
1169
1170
    if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1171
      $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1172
    }
1173
1174
    $output = \iconv_mime_encode(
1175
        '',
1176
        $str,
1177
        [
1178
            'scheme'           => $transferEncoding,
1179
            'line-length'      => $indent,
1180
            'input-charset'    => $fromCharset,
1181
            'output-charset'   => $toCharset,
1182
            'line-break-chars' => $linefeed,
1183
        ]
1184
    );
1185
1186
    return $output;
1187
  }
1188
1189
  /**
1190
   * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1191
   *
1192
   * @param string   $str                    <p>The input string.</p>
1193
   * @param string   $search                 <p>The searched string.</p>
1194
   * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1195
   * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1196
   * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
   *
1198
   * @return string
1199
   */
1200 1
  public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1201
  {
1202 1
    if ('' === $str) {
1203 1
      return '';
1204
    }
1205
1206 1
    $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1207
1208 1
    if ($length === null) {
1209 1
      $length = (int)\round(self::strlen($str, $encoding) / 2, 0);
1210
    }
1211
1212 1
    if (empty($search)) {
1213
1214 1
      $stringLength = self::strlen($str, $encoding);
1215
1216 1
      if ($length > 0) {
1217 1
        $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1218
      } else {
1219 1
        $end = 0;
1220
      }
1221
1222 1
      $pos = (int)\min(
1223 1
          self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1223
          self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1224 1
          self::strpos($str, '.', $end, $encoding)
1225
      );
1226
1227 1
      if ($pos) {
1228 1
        $strSub = self::substr($str, 0, $pos, $encoding);
1229 1
        if ($strSub === false) {
1230
          return '';
1231
        }
1232
1233 1
        return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1234
      }
1235
1236
      return $str;
1237
    }
1238
1239 1
    $wordPos = self::stripos($str, $search, 0, $encoding);
1240 1
    $halfSide = (int)($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1241
1242 1
    $pos_start = 0;
1243 1
    if ($halfSide > 0) {
1244 1
      $halfText = self::substr($str, 0, $halfSide, $encoding);
1245 1
      if ($halfText !== false) {
1246 1
        $pos_start = (int)\max(
1247 1
            self::strrpos($halfText, ' ', 0, $encoding),
1248 1
            self::strrpos($halfText, '.', 0, $encoding)
1249
        );
1250
      }
1251
    }
1252
1253 1
    if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type integer|false is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1254 1
      $l = $pos_start + $length - 1;
1255 1
      $realLength = self::strlen($str, $encoding);
1256
1257 1
      if ($l > $realLength) {
1258
        $l = $realLength;
1259
      }
1260
1261 1
      $pos_end = (int)\min(
1262 1
              self::strpos($str, ' ', $l, $encoding),
1263 1
              self::strpos($str, '.', $l, $encoding)
1264 1
          ) - $pos_start;
1265
1266 1
      if (!$pos_end || $pos_end <= 0) {
1267 1
        $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1267
        $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1268 1
        if ($strSub !== false) {
1269 1
          $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1270
        } else {
1271 1
          $extract = '';
1272
        }
1273
      } else {
1274 1
        $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1275 1
        if ($strSub !== false) {
1276 1
          $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1277
        } else {
1278 1
          $extract = '';
1279
        }
1280
      }
1281
1282
    } else {
1283
1284 1
      $l = $length - 1;
1285 1
      $trueLength = self::strlen($str, $encoding);
1286
1287 1
      if ($l > $trueLength) {
1288
        $l = $trueLength;
1289
      }
1290
1291 1
      $pos_end = \min(
1292 1
          self::strpos($str, ' ', $l, $encoding),
1293 1
          self::strpos($str, '.', $l, $encoding)
1294
      );
1295
1296 1
      if ($pos_end) {
1297 1
        $strSub = self::substr($str, 0, $pos_end, $encoding);
1298 1
        if ($strSub !== false) {
1299 1
          $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1300
        } else {
1301 1
          $extract = '';
1302
        }
1303
      } else {
1304 1
        $extract = $str;
1305
      }
1306
    }
1307
1308 1
    return $extract;
1309
  }
1310
1311
  /**
1312
   * Reads entire file into a string.
1313
   *
1314
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1315
   *
1316
   * @link http://php.net/manual/en/function.file-get-contents.php
1317
   *
1318
   * @param string        $filename         <p>
1319
   *                                        Name of the file to read.
1320
   *                                        </p>
1321
   * @param bool          $use_include_path [optional] <p>
1322
   *                                        Prior to PHP 5, this parameter is called
1323
   *                                        use_include_path and is a bool.
1324
   *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1325
   *                                        to trigger include path
1326
   *                                        search.
1327
   *                                        </p>
1328
   * @param resource|null $context          [optional] <p>
1329
   *                                        A valid context resource created with
1330
   *                                        stream_context_create. If you don't need to use a
1331
   *                                        custom context, you can skip this parameter by &null;.
1332
   *                                        </p>
1333
   * @param int|null      $offset           [optional] <p>
1334
   *                                        The offset where the reading starts.
1335
   *                                        </p>
1336
   * @param int|null      $maxLength        [optional] <p>
1337
   *                                        Maximum length of data read. The default is to read until end
1338
   *                                        of file is reached.
1339
   *                                        </p>
1340
   * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1341
   *
1342
   * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g.
1343
   *                                        images or pdf, because they used non default utf-8 chars.</p>
1344
   * @param string|null   $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc. ... otherwise we
1345
   *                                        will autodetect the encoding</p>
1346
   *
1347
   * @return string|false The function returns the read data or false on failure.
1348
   */
1349 11
  public static function file_get_contents(
1350
      string $filename,
1351
      bool $use_include_path = false,
1352
      $context = null,
1353
      int $offset = null,
1354
      int $maxLength = null,
1355
      int $timeout = 10,
1356
      bool $convertToUtf8 = true,
1357
      string $fromEncoding = ''
1358
  )
1359
  {
1360
    // init
1361 11
    $filename = \filter_var($filename, FILTER_SANITIZE_STRING);
1362
1363 11
    if ($timeout && $context === null) {
1364 9
      $context = \stream_context_create(
1365
          [
1366
              'http' =>
1367
                  [
1368 9
                      'timeout' => $timeout,
1369
                  ],
1370
          ]
1371
      );
1372
    }
1373
1374 11
    if ($offset === null) {
1375 11
      $offset = 0;
1376
    }
1377
1378 11
    if (\is_int($maxLength) === true) {
1379 2
      $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1380
    } else {
1381 11
      $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1382
    }
1383
1384
    // return false on error
1385 11
    if ($data === false) {
1386
      return false;
1387
    }
1388
1389 11
    if ($convertToUtf8 === true) {
1390
      // only for non binary, but also for UTF-16 or UTF-32
1391
      if (
1392 11
          self::is_binary($data, true) !== true
1393
          ||
1394 8
          self::is_utf16($data) !== false
1395
          ||
1396 11
          self::is_utf32($data) !== false
1397
      ) {
1398 9
        $data = self::encode('UTF-8', $data, false, $fromEncoding);
1399 9
        $data = self::cleanup($data);
1400
      }
1401
    }
1402
1403 11
    return $data;
1404
  }
1405
1406
  /**
1407
   * Checks if a file starts with BOM (Byte Order Mark) character.
1408
   *
1409
   * @param string $file_path <p>Path to a valid file.</p>
1410
   *
1411
   * @throws \RuntimeException if file_get_contents() returned false
1412
   *
1413
   * @return bool
1414
   *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.
1415
   */
1416 2
  public static function file_has_bom(string $file_path): bool
1417
  {
1418 2
    $file_content = \file_get_contents($file_path);
1419 2
    if ($file_content === false) {
1420
      throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1421
    }
1422
1423 2
    return self::string_has_bom($file_content);
1424
  }
1425
1426
  /**
1427
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1428
   *
1429
   * @param mixed  $var
1430
   * @param int    $normalization_form
1431
   * @param string $leading_combining
1432
   *
1433
   * @return mixed
1434
   */
1435 43
  public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1436
  {
1437 43
    switch (\gettype($var)) {
1438 43
      case 'array':
1439 6
        foreach ($var as $k => $v) {
1440
          /** @noinspection AlterInForeachInspection */
1441 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1442
        }
1443 6
        break;
1444 43
      case 'object':
1445 4
        foreach ($var as $k => $v) {
1446 4
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1447
        }
1448 4
        break;
1449 43
      case 'string':
1450
1451 43
        if (false !== \strpos($var, "\r")) {
1452
          // Workaround https://bugs.php.net/65732
1453 3
          $var = self::normalize_line_ending($var);
1454
        }
1455
1456 43
        if (self::is_ascii($var) === false) {
1457
          /** @noinspection PhpUndefinedClassInspection */
1458 26
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1459 20
            $n = '-';
1460
          } else {
1461
            /** @noinspection PhpUndefinedClassInspection */
1462 13
            $n = \Normalizer::normalize($var, $normalization_form);
1463
1464 13
            if (isset($n[0])) {
1465 7
              $var = $n;
1466
            } else {
1467 9
              $var = self::encode('UTF-8', $var, true);
1468
            }
1469
          }
1470
1471
          if (
1472 26
              $var[0] >= "\x80"
1473
              &&
1474 26
              isset($n[0], $leading_combining[0])
1475
              &&
1476 26
              \preg_match('/^\p{Mn}/u', $var)
1477
          ) {
1478
            // Prevent leading combining chars
1479
            // for NFC-safe concatenations.
1480 3
            $var = $leading_combining . $var;
1481
          }
1482
        }
1483
1484 43
        break;
1485
    }
1486
1487 43
    return $var;
1488
  }
1489
1490
  /**
1491
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1492
   *
1493
   * Gets a specific external variable by name and optionally filters it
1494
   *
1495
   * @link  http://php.net/manual/en/function.filter-input.php
1496
   *
1497
   * @param int    $type          <p>
1498
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1499
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1500
   *                              <b>INPUT_ENV</b>.
1501
   *                              </p>
1502
   * @param string $variable_name <p>
1503
   *                              Name of a variable to get.
1504
   *                              </p>
1505
   * @param int    $filter        [optional] <p>
1506
   *                              The ID of the filter to apply. The
1507
   *                              manual page lists the available filters.
1508
   *                              </p>
1509
   * @param mixed  $options       [optional] <p>
1510
   *                              Associative array of options or bitwise disjunction of flags. If filter
1511
   *                              accepts options, flags can be provided in "flags" field of array.
1512
   *                              </p>
1513
   *
1514
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1515
   *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1516
   *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1517
   */
1518
  public static function filter_input(int $type, string $variable_name, int $filter = FILTER_DEFAULT, $options = null)
1519
  {
1520
    if (4 > \func_num_args()) {
1521
      $var = \filter_input($type, $variable_name, $filter);
1522
    } else {
1523
      $var = \filter_input($type, $variable_name, $filter, $options);
1524
    }
1525
1526
    return self::filter($var);
1527
  }
1528
1529
  /**
1530
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1531
   *
1532
   * Gets external variables and optionally filters them
1533
   *
1534
   * @link  http://php.net/manual/en/function.filter-input-array.php
1535
   *
1536
   * @param int   $type       <p>
1537
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1538
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1539
   *                          <b>INPUT_ENV</b>.
1540
   *                          </p>
1541
   * @param mixed $definition [optional] <p>
1542
   *                          An array defining the arguments. A valid key is a string
1543
   *                          containing a variable name and a valid value is either a filter type, or an array
1544
   *                          optionally specifying the filter, flags and options. If the value is an
1545
   *                          array, valid keys are filter which specifies the
1546
   *                          filter type,
1547
   *                          flags which specifies any flags that apply to the
1548
   *                          filter, and options which specifies any options that
1549
   *                          apply to the filter. See the example below for a better understanding.
1550
   *                          </p>
1551
   *                          <p>
1552
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1553
   *                          input array are filtered by this filter.
1554
   *                          </p>
1555
   * @param bool  $add_empty  [optional] <p>
1556
   *                          Add missing keys as <b>NULL</b> to the return value.
1557
   *                          </p>
1558
   *
1559
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1560
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set. Or
1561
   *               if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable is not
1562
   *               set and <b>NULL</b> if the filter fails.
1563
   */
1564
  public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1565
  {
1566
    if (2 > \func_num_args()) {
1567
      $a = \filter_input_array($type);
1568
    } else {
1569
      $a = \filter_input_array($type, $definition, $add_empty);
1570
    }
1571
1572
    return self::filter($a);
1573
  }
1574
1575
  /**
1576
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1577
   *
1578
   * Filters a variable with a specified filter
1579
   *
1580
   * @link  http://php.net/manual/en/function.filter-var.php
1581
   *
1582
   * @param mixed $variable <p>
1583
   *                        Value to filter.
1584
   *                        </p>
1585
   * @param int   $filter   [optional] <p>
1586
   *                        The ID of the filter to apply. The
1587
   *                        manual page lists the available filters.
1588
   *                        </p>
1589
   * @param mixed $options  [optional] <p>
1590
   *                        Associative array of options or bitwise disjunction of flags. If filter
1591
   *                        accepts options, flags can be provided in "flags" field of array. For
1592
   *                        the "callback" filter, callable type should be passed. The
1593
   *                        callback must accept one argument, the value to be filtered, and return
1594
   *                        the value after filtering/sanitizing it.
1595
   *                        </p>
1596
   *                        <p>
1597
   *                        <code>
1598
   *                        // for filters that accept options, use this format
1599
   *                        $options = array(
1600
   *                        'options' => array(
1601
   *                        'default' => 3, // value to return if the filter fails
1602
   *                        // other options here
1603
   *                        'min_range' => 0
1604
   *                        ),
1605
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1606
   *                        );
1607
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1608
   *                        // for filter that only accept flags, you can pass them directly
1609
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1610
   *                        // for filter that only accept flags, you can also pass as an array
1611
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1612
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1613
   *                        // callback validate filter
1614
   *                        function foo($value)
1615
   *                        {
1616
   *                        // Expected format: Surname, GivenNames
1617
   *                        if (strpos($value, ", ") === false) return false;
1618
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1619
   *                        $empty = (empty($surname) || empty($givennames));
1620
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1621
   *                        if ($empty || $notstrings) {
1622
   *                        return false;
1623
   *                        } else {
1624
   *                        return $value;
1625
   *                        }
1626
   *                        }
1627
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1628
   *                        </code>
1629
   *                        </p>
1630
   *
1631
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1632
   */
1633 2
  public static function filter_var($variable, int $filter = FILTER_DEFAULT, $options = null)
1634
  {
1635 2
    if (3 > \func_num_args()) {
1636 2
      $variable = \filter_var($variable, $filter);
1637
    } else {
1638 2
      $variable = \filter_var($variable, $filter, $options);
1639
    }
1640
1641 2
    return self::filter($variable);
1642
  }
1643
1644
  /**
1645
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1646
   *
1647
   * Gets multiple variables and optionally filters them
1648
   *
1649
   * @link  http://php.net/manual/en/function.filter-var-array.php
1650
   *
1651
   * @param array $data       <p>
1652
   *                          An array with string keys containing the data to filter.
1653
   *                          </p>
1654
   * @param mixed $definition [optional] <p>
1655
   *                          An array defining the arguments. A valid key is a string
1656
   *                          containing a variable name and a valid value is either a
1657
   *                          filter type, or an
1658
   *                          array optionally specifying the filter, flags and options.
1659
   *                          If the value is an array, valid keys are filter
1660
   *                          which specifies the filter type,
1661
   *                          flags which specifies any flags that apply to the
1662
   *                          filter, and options which specifies any options that
1663
   *                          apply to the filter. See the example below for a better understanding.
1664
   *                          </p>
1665
   *                          <p>
1666
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1667
   *                          input array are filtered by this filter.
1668
   *                          </p>
1669
   * @param bool  $add_empty  [optional] <p>
1670
   *                          Add missing keys as <b>NULL</b> to the return value.
1671
   *                          </p>
1672
   *
1673
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. An
1674
   *               array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not set.
1675
   */
1676 2
  public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1677
  {
1678 2
    if (2 > \func_num_args()) {
1679 2
      $a = \filter_var_array($data);
1680
    } else {
1681 2
      $a = \filter_var_array($data, $definition, $add_empty);
1682
    }
1683
1684 2
    return self::filter($a);
1685
  }
1686
1687
  /**
1688
   * Checks whether finfo is available on the server.
1689
   *
1690
   * @return bool
1691
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
1692
   */
1693
  public static function finfo_loaded(): bool
1694
  {
1695
    return \class_exists('finfo');
1696
  }
1697
1698
  /**
1699
   * Returns the first $n characters of the string.
1700
   *
1701
   * @param string $str      <p>The input string.</p>
1702
   * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1703
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1704
   *
1705
   * @return string
1706
   */
1707 13
  public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1708
  {
1709 13
    if ($n <= 0) {
1710 4
      return '';
1711
    }
1712
1713 9
    $strSub = self::substr($str, 0, $n, $encoding);
1714 9
    if ($strSub === false) {
1715
      return '';
1716
    }
1717
1718 9
    return $strSub;
1719
  }
1720
1721
  /**
1722
   * Check if the number of unicode characters are not more than the specified integer.
1723
   *
1724
   * @param string $str      The original string to be checked.
1725
   * @param int    $box_size The size in number of chars to be checked against string.
1726
   *
1727
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1728
   */
1729 2
  public static function fits_inside(string $str, int $box_size): bool
1730
  {
1731 2
    return (self::strlen($str) <= $box_size);
1732
  }
1733
1734
  /**
1735
   * @param string $str
1736
   * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
1737
   * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
1738
   *
1739
   * @return string
1740
   */
1741 54
  private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
1742
  {
1743 54
    $upper = self::$COMMON_CASE_FOLD['upper'];
1744 54
    $lower = self::$COMMON_CASE_FOLD['lower'];
1745
1746 54
    if ($useLower === true) {
1747 2
      $str = (string)\str_replace(
1748 2
          $upper,
1749 2
          $lower,
1750 2
          $str
1751
      );
1752
    } else {
1753 52
      $str = (string)\str_replace(
1754 52
          $lower,
1755 52
          $upper,
1756 52
          $str
1757
      );
1758
    }
1759
1760 54
    if ($fullCaseFold) {
1761
1762 52
      static $FULL_CASE_FOLD = null;
1763 52
      if ($FULL_CASE_FOLD === null) {
1764 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
1765
      }
1766
1767 52
      if ($useLower === true) {
1768 2
        $str = (string)\str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
1769
      } else {
1770 50
        $str = (string)\str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
1771
      }
1772
    }
1773
1774 54
    return $str;
1775
  }
1776
1777
  /**
1778
   * Try to fix simple broken UTF-8 strings.
1779
   *
1780
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1781
   *
1782
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1783
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1784
   * See: http://en.wikipedia.org/wiki/Windows-1252
1785
   *
1786
   * @param string $str <p>The input string</p>
1787
   *
1788
   * @return string
1789
   */
1790 42
  public static function fix_simple_utf8(string $str): string
1791
  {
1792 42
    if ('' === $str) {
1793 4
      return '';
1794
    }
1795
1796 42
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1797 42
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1798
1799 42
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1800
1801 1
      if (self::$BROKEN_UTF8_FIX === null) {
1802 1
        self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_fix') can also be of type false. However, the property $BROKEN_UTF8_FIX is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
1803
      }
1804
1805 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1805
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1806 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
0 ignored issues
show
Bug introduced by
It seems like self::BROKEN_UTF8_FIX can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1806
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$BROKEN_UTF8_FIX);
Loading history...
1807
    }
1808
1809 42
    return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1810
  }
1811
1812
  /**
1813
   * Fix a double (or multiple) encoded UTF8 string.
1814
   *
1815
   * @param string[]|string $str You can use a string or an array of strings.
1816
   *
1817
   * @return string[]|string
1818
   *                          Will return the fixed input-"array" or
1819
   *                          the fixed input-"string".
1820
   */
1821 2
  public static function fix_utf8($str)
1822
  {
1823 2
    if (\is_array($str) === true) {
1824 2
      foreach ($str as $k => $v) {
1825 2
        $str[$k] = self::fix_utf8($v);
1826
      }
1827
1828 2
      return $str;
1829
    }
1830
1831 2
    $str = (string)$str;
1832 2
    $last = '';
1833 2
    while ($last !== $str) {
1834 2
      $last = $str;
1835 2
      $str = self::to_utf8(
1836 2
          self::utf8_decode($str, true)
1837
      );
1838
    }
1839
1840 2
    return $str;
1841
  }
1842
1843
  /**
1844
   * Get character of a specific character.
1845
   *
1846
   * @param string $char
1847
   *
1848
   * @return string 'RTL' or 'LTR'
1849
   */
1850 2
  public static function getCharDirection(string $char): string
1851
  {
1852 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1853
      self::checkForSupport();
1854
    }
1855
1856 2
    if (self::$SUPPORT['intlChar'] === true) {
1857
      /** @noinspection PhpComposerExtensionStubsInspection */
1858 2
      $tmpReturn = \IntlChar::charDirection($char);
1859
1860
      // from "IntlChar"-Class
1861
      $charDirection = [
1862 2
          'RTL' => [1, 13, 14, 15, 21],
1863
          'LTR' => [0, 11, 12, 20],
1864
      ];
1865
1866 2
      if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1867
        return 'LTR';
1868
      }
1869
1870 2
      if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1871 2
        return 'RTL';
1872
      }
1873
    }
1874
1875 2
    $c = static::chr_to_decimal($char);
1876
1877 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1878 2
      return 'LTR';
1879
    }
1880
1881 2
    if (0x85e >= $c) {
1882
1883 2
      if (0x5be === $c ||
1884 2
          0x5c0 === $c ||
1885 2
          0x5c3 === $c ||
1886 2
          0x5c6 === $c ||
1887 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
1888 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1889 2
          0x608 === $c ||
1890 2
          0x60b === $c ||
1891 2
          0x60d === $c ||
1892 2
          0x61b === $c ||
1893 2
          (0x61e <= $c && 0x64a >= $c) ||
1894
          (0x66d <= $c && 0x66f >= $c) ||
1895
          (0x671 <= $c && 0x6d5 >= $c) ||
1896
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1897
          (0x6ee <= $c && 0x6ef >= $c) ||
1898
          (0x6fa <= $c && 0x70d >= $c) ||
1899
          0x710 === $c ||
1900
          (0x712 <= $c && 0x72f >= $c) ||
1901
          (0x74d <= $c && 0x7a5 >= $c) ||
1902
          0x7b1 === $c ||
1903
          (0x7c0 <= $c && 0x7ea >= $c) ||
1904
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1905
          0x7fa === $c ||
1906
          (0x800 <= $c && 0x815 >= $c) ||
1907
          0x81a === $c ||
1908
          0x824 === $c ||
1909
          0x828 === $c ||
1910
          (0x830 <= $c && 0x83e >= $c) ||
1911
          (0x840 <= $c && 0x858 >= $c) ||
1912 2
          0x85e === $c
1913
      ) {
1914 2
        return 'RTL';
1915
      }
1916
1917 2
    } elseif (0x200f === $c) {
1918
1919
      return 'RTL';
1920
1921 2
    } elseif (0xfb1d <= $c) {
1922
1923 2
      if (0xfb1d === $c ||
1924 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1925 2
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1926 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1927 2
          0xfb3e === $c ||
1928 2
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1929 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1930 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1931 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1932 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1933 2
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1934 2
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1935 2
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1936 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
1937 2
          (0x10800 <= $c && 0x10805 >= $c) ||
1938 2
          0x10808 === $c ||
1939 2
          (0x1080a <= $c && 0x10835 >= $c) ||
1940 2
          (0x10837 <= $c && 0x10838 >= $c) ||
1941 2
          0x1083c === $c ||
1942 2
          (0x1083f <= $c && 0x10855 >= $c) ||
1943 2
          (0x10857 <= $c && 0x1085f >= $c) ||
1944 2
          (0x10900 <= $c && 0x1091b >= $c) ||
1945 2
          (0x10920 <= $c && 0x10939 >= $c) ||
1946 2
          0x1093f === $c ||
1947 2
          0x10a00 === $c ||
1948 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1949 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1950 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1951 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1952 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1953 2
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1954 2
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1955 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1956 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1957 2
          (0x10b78 <= $c && 0x10b7f >= $c)
1958
      ) {
1959 2
        return 'RTL';
1960
      }
1961
    }
1962
1963 2
    return 'LTR';
1964
  }
1965
1966
  /**
1967
   * get data from "/data/*.ser"
1968
   *
1969
   * @param string $file
1970
   *
1971
   * @return mixed|false Will return false on error.
1972
   */
1973 13
  private static function getData(string $file)
1974
  {
1975 13
    $file = __DIR__ . '/data/' . $file . '.php';
1976 13
    if (\file_exists($file)) {
1977
      /** @noinspection PhpIncludeInspection */
1978 12
      return require $file;
1979
    }
1980
1981 2
    return false;
1982
  }
1983
1984
  /**
1985
   * Check for php-support.
1986
   *
1987
   * @param string|null $key
1988
   *
1989
   * @return mixed
1990
   *               Return the full support-"array", if $key === null<br>
1991
   *               return bool-value, if $key is used and available<br>
1992
   *               otherwise return <strong>null</strong>.
1993
   */
1994 26
  public static function getSupportInfo(string $key = null)
1995
  {
1996 26
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1997
      self::checkForSupport();
1998
    }
1999
2000 26
    if ($key === null) {
2001 4
      return self::$SUPPORT;
2002
    }
2003
2004 24
    if (!isset(self::$SUPPORT[$key])) {
2005 2
      return null;
2006
    }
2007
2008 22
    return self::$SUPPORT[$key];
2009
  }
2010
2011
  /**
2012
   * @param string $str
2013
   *
2014
   * @return string[]
2015
   */
2016 40
  private static function get_file_type($str)
2017
  {
2018 40
    if ('' === $str) {
2019
      return ['ext' => '', 'type' => ''];
2020
    }
2021
2022 40
    $str_info = self::substr_in_byte($str, 0, 2);
2023 40
    if (self::strlen_in_byte($str_info) !== 2) {
2024 11
      return ['ext' => '', 'type' => ''];
2025
    }
2026
2027 35
    $str_info = \unpack("C2chars", $str_info);
2028 35
    $type_code = (int)($str_info['chars1'] . $str_info['chars2']);
2029
2030
    // DEBUG
2031
    //var_dump($type_code);
2032
2033
    switch ($type_code) {
2034 35
      case 3780:
2035 4
        $ext = 'pdf';
2036 4
        $type = 'binary';
2037 4
        break;
2038 35
      case 7790:
2039
        $ext = 'exe';
2040
        $type = 'binary';
2041
        break;
2042 35
      case 7784:
2043
        $ext = 'midi';
2044
        $type = 'binary';
2045
        break;
2046 35
      case 8075:
2047 6
        $ext = 'zip';
2048 6
        $type = 'binary';
2049 6
        break;
2050 35
      case 8297:
2051
        $ext = 'rar';
2052
        $type = 'binary';
2053
        break;
2054 35
      case 255216:
2055
        $ext = 'jpg';
2056
        $type = 'binary';
2057
        break;
2058 35
      case 7173:
2059
        $ext = 'gif';
2060
        $type = 'binary';
2061
        break;
2062 35
      case 6677:
2063
        $ext = 'bmp';
2064
        $type = 'binary';
2065
        break;
2066 35
      case 13780:
2067 6
        $ext = 'png';
2068 6
        $type = 'binary';
2069 6
        break;
2070
      default:
2071 33
        $ext = '???';
2072 33
        $type = '???';
2073 33
        break;
2074
    }
2075
2076 35
    return ['ext' => $ext, 'type' => $type];
2077
  }
2078
2079
  /**
2080
   * @param int    $length        <p>Length of the random string.</p>
2081
   * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2082
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2083
   *
2084
   * @return string
2085
   */
2086 1
  public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2087
  {
2088
    // init
2089 1
    $i = 0;
2090 1
    $str = '';
2091 1
    $maxlength = self::strlen($possibleChars, $encoding);
2092
2093 1
    if ($maxlength === 0) {
2094 1
      return '';
2095
    }
2096
2097
    // add random chars
2098 1
    while ($i < $length) {
2099
      try {
2100 1
        $randInt = \random_int(0, $maxlength - 1);
2101
      } catch (\Exception $e) {
2102
        /** @noinspection RandomApiMigrationInspection */
2103
        $randInt = \mt_rand(0, $maxlength - 1);
2104
      }
2105 1
      $char = self::substr($possibleChars, $randInt, 1, $encoding);
2106 1
      $str .= $char;
2107 1
      $i++;
2108
    }
2109
2110 1
    return $str;
2111
  }
2112
2113
  /**
2114
   * @param string|int $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2115
   * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2116
   *
2117
   * @return string
2118
   */
2119 1
  public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2120
  {
2121 1
    $uniqueHelper = \mt_rand() .
2122 1
                    \session_id() .
2123 1
                    ($_SERVER['REMOTE_ADDR'] ?? '') .
2124 1
                    ($_SERVER['SERVER_ADDR'] ?? '') .
2125 1
                    $entropyExtra;
2126
2127 1
    $uniqueString = \uniqid($uniqueHelper, true);
2128
2129 1
    if ($md5) {
2130 1
      $uniqueString = \md5($uniqueString . $uniqueHelper);
2131
    }
2132
2133 1
    return $uniqueString;
2134
  }
2135
2136
  /**
2137
   * alias for "UTF8::string_has_bom()"
2138
   *
2139
   * @see        UTF8::string_has_bom()
2140
   *
2141
   * @param string $str
2142
   *
2143
   * @return bool
2144
   *
2145
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2146
   */
2147 2
  public static function hasBom(string $str): bool
2148
  {
2149 2
    return self::string_has_bom($str);
2150
  }
2151
2152
  /**
2153
   * Returns true if the string contains a lower case char, false otherwise.
2154
   *
2155
   * @param string $str <p>The input string.</p>
2156
   *
2157
   * @return bool Whether or not the string contains a lower case character.
2158
   */
2159 47
  public static function has_lowercase(string $str): bool
2160
  {
2161 47
    return self::str_matches_pattern($str, '.*[[:lower:]]');
2162
  }
2163
2164
  /**
2165
   * Returns true if the string contains an upper case char, false otherwise.
2166
   *
2167
   * @param string $str <p>The input string.</p>
2168
   *
2169
   * @return bool Whether or not the string contains an upper case character.
2170
   */
2171 12
  public static function has_uppercase(string $str): bool
2172
  {
2173 12
    return self::str_matches_pattern($str, '.*[[:upper:]]');
2174
  }
2175
2176
  /**
2177
   * Converts a hexadecimal-value into an UTF-8 character.
2178
   *
2179
   * @param string $hexdec <p>The hexadecimal value.</p>
2180
   *
2181
   * @return string|false One single UTF-8 character.
2182
   */
2183 4
  public static function hex_to_chr(string $hexdec)
2184
  {
2185 4
    return self::decimal_to_chr(\hexdec($hexdec));
2186
  }
2187
2188
  /**
2189
   * Converts hexadecimal U+xxxx code point representation to integer.
2190
   *
2191
   * INFO: opposite to UTF8::int_to_hex()
2192
   *
2193
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2194
   *
2195
   * @return int|false The code point, or false on failure.
2196
   */
2197 2
  public static function hex_to_int($hexDec)
2198
  {
2199
    // init
2200 2
    $hexDec = (string)$hexDec;
2201
2202 2
    if ('' === $hexDec) {
2203 2
      return false;
2204
    }
2205
2206 2
    if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2207 2
      return \intval($match[1], 16);
2208
    }
2209
2210 2
    return false;
2211
  }
2212
2213
  /**
2214
   * alias for "UTF8::html_entity_decode()"
2215
   *
2216
   * @see UTF8::html_entity_decode()
2217
   *
2218
   * @param string $str
2219
   * @param int    $flags
2220
   * @param string $encoding
2221
   *
2222
   * @return string
2223
   */
2224 2
  public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2225
  {
2226 2
    return self::html_entity_decode($str, $flags, $encoding);
2227
  }
2228
2229
  /**
2230
   * Converts a UTF-8 string to a series of HTML numbered entities.
2231
   *
2232
   * INFO: opposite to UTF8::html_decode()
2233
   *
2234
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2235
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2236
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2237
   *
2238
   * @return string HTML numbered entities.
2239
   */
2240 12
  public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2241
  {
2242 12
    if ('' === $str) {
2243 4
      return '';
2244
    }
2245
2246 12
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2247 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2248
    }
2249
2250 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2251
      self::checkForSupport();
2252
    }
2253
2254
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2255 12
    if (self::$SUPPORT['mbstring'] === true) {
2256 12
      $startCode = 0x00;
2257 12
      if ($keepAsciiChars === true) {
2258 12
        $startCode = 0x80;
2259
      }
2260
2261 12
      return \mb_encode_numericentity(
2262 12
          $str,
2263 12
          [$startCode, 0xfffff, 0, 0xfffff, 0],
2264 12
          $encoding
2265
      );
2266
    }
2267
2268
    //
2269
    // fallback via vanilla php
2270
    //
2271
2272
    return \implode(
2273
        '',
2274
        \array_map(
2275
            function ($data) use ($keepAsciiChars, $encoding) {
2276
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2277
            },
2278
            self::split($str)
2279
        )
2280
    );
2281
  }
2282
2283
  /**
2284
   * UTF-8 version of html_entity_decode()
2285
   *
2286
   * The reason we are not using html_entity_decode() by itself is because
2287
   * while it is not technically correct to leave out the semicolon
2288
   * at the end of an entity most browsers will still interpret the entity
2289
   * correctly. html_entity_decode() does not convert entities without
2290
   * semicolons, so we are left with our own little solution here. Bummer.
2291
   *
2292
   * Convert all HTML entities to their applicable characters
2293
   *
2294
   * INFO: opposite to UTF8::html_encode()
2295
   *
2296
   * @link http://php.net/manual/en/function.html-entity-decode.php
2297
   *
2298
   * @param string $str      <p>
2299
   *                         The input string.
2300
   *                         </p>
2301
   * @param int    $flags    [optional] <p>
2302
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2303
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2304
   *                         <table>
2305
   *                         Available <i>flags</i> constants
2306
   *                         <tr valign="top">
2307
   *                         <td>Constant Name</td>
2308
   *                         <td>Description</td>
2309
   *                         </tr>
2310
   *                         <tr valign="top">
2311
   *                         <td><b>ENT_COMPAT</b></td>
2312
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2313
   *                         </tr>
2314
   *                         <tr valign="top">
2315
   *                         <td><b>ENT_QUOTES</b></td>
2316
   *                         <td>Will convert both double and single quotes.</td>
2317
   *                         </tr>
2318
   *                         <tr valign="top">
2319
   *                         <td><b>ENT_NOQUOTES</b></td>
2320
   *                         <td>Will leave both double and single quotes unconverted.</td>
2321
   *                         </tr>
2322
   *                         <tr valign="top">
2323
   *                         <td><b>ENT_HTML401</b></td>
2324
   *                         <td>
2325
   *                         Handle code as HTML 4.01.
2326
   *                         </td>
2327
   *                         </tr>
2328
   *                         <tr valign="top">
2329
   *                         <td><b>ENT_XML1</b></td>
2330
   *                         <td>
2331
   *                         Handle code as XML 1.
2332
   *                         </td>
2333
   *                         </tr>
2334
   *                         <tr valign="top">
2335
   *                         <td><b>ENT_XHTML</b></td>
2336
   *                         <td>
2337
   *                         Handle code as XHTML.
2338
   *                         </td>
2339
   *                         </tr>
2340
   *                         <tr valign="top">
2341
   *                         <td><b>ENT_HTML5</b></td>
2342
   *                         <td>
2343
   *                         Handle code as HTML 5.
2344
   *                         </td>
2345
   *                         </tr>
2346
   *                         </table>
2347
   *                         </p>
2348
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2349
   *
2350
   * @return string The decoded string.
2351
   */
2352 38
  public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2353
  {
2354 38
    if ('' === $str) {
2355 12
      return '';
2356
    }
2357
2358 38
    if (!isset($str[3])) { // examples: &; || &x;
2359 19
      return $str;
2360
    }
2361
2362
    if (
2363 37
        \strpos($str, '&') === false
2364
        ||
2365
        (
2366 37
            \strpos($str, '&#') === false
2367
            &&
2368 37
            \strpos($str, ';') === false
2369
        )
2370
    ) {
2371 18
      return $str;
2372
    }
2373
2374 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2375 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2376
    }
2377
2378 37
    if ($flags === null) {
2379 10
      $flags = ENT_QUOTES | ENT_HTML5;
2380
    }
2381
2382
    if (
2383 37
        $encoding !== 'UTF-8'
2384
        &&
2385 37
        $encoding !== 'ISO-8859-1'
2386
        &&
2387 37
        $encoding !== 'WINDOWS-1252'
2388
        &&
2389 37
        self::$SUPPORT['mbstring'] === false
2390
    ) {
2391
      \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2392
    }
2393
2394 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2395
      self::checkForSupport();
2396
    }
2397
2398
    do {
2399 37
      $str_compare = $str;
2400
2401
      # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2402 37
      if (self::$SUPPORT['mbstring'] === true) {
2403
2404 37
        $str = \mb_decode_numericentity(
2405 37
            $str,
2406 37
            [0x80, 0xfffff, 0, 0xfffff, 0],
2407 37
            $encoding
2408
        );
2409
2410
      } else {
2411
2412
        $str = (string)\preg_replace_callback(
2413
            "/&#\d{2,6};/",
2414
            function ($matches) use ($encoding) {
2415
              // always fallback via symfony polyfill
2416
              $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2417
2418
              if ($returnTmp !== '"' && $returnTmp !== "'") {
2419
                return $returnTmp;
2420
              }
2421
2422
              return $matches[0];
2423
            },
2424
            $str
2425
        );
2426
2427
      }
2428
2429
      // decode numeric & UTF16 two byte entities
2430 37
      $str = \html_entity_decode(
2431 37
          \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2432 37
          $flags,
2433 37
          $encoding
2434
      );
2435
2436 37
    } while ($str_compare !== $str);
2437
2438 37
    return $str;
2439
  }
2440
2441
  /**
2442
   * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2443
   *
2444
   * @param string $str
2445
   * @param string $encoding [optional] <p>Default: UTF-8</p>
2446
   *
2447
   * @return string
2448
   */
2449 6
  public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2450
  {
2451 6
    return self::htmlspecialchars(
2452 6
        $str,
2453 6
        ENT_QUOTES | ENT_SUBSTITUTE,
2454 6
        $encoding
2455
    );
2456
  }
2457
2458
  /**
2459
   * Remove empty html-tag.
2460
   *
2461
   * e.g.: <tag></tag>
2462
   *
2463
   * @param string $str
2464
   *
2465
   * @return string
2466
   */
2467 1
  public static function html_stripe_empty_tags(string $str): string
2468
  {
2469 1
    return (string)\preg_replace(
2470 1
        "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2471 1
        '',
2472 1
        $str
2473
    );
2474
  }
2475
2476
  /**
2477
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2478
   *
2479
   * @link http://php.net/manual/en/function.htmlentities.php
2480
   *
2481
   * @param string $str           <p>
2482
   *                              The input string.
2483
   *                              </p>
2484
   * @param int    $flags         [optional] <p>
2485
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2486
   *                              invalid code unit sequences and the used document type. The default is
2487
   *                              ENT_COMPAT | ENT_HTML401.
2488
   *                              <table>
2489
   *                              Available <i>flags</i> constants
2490
   *                              <tr valign="top">
2491
   *                              <td>Constant Name</td>
2492
   *                              <td>Description</td>
2493
   *                              </tr>
2494
   *                              <tr valign="top">
2495
   *                              <td><b>ENT_COMPAT</b></td>
2496
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2497
   *                              </tr>
2498
   *                              <tr valign="top">
2499
   *                              <td><b>ENT_QUOTES</b></td>
2500
   *                              <td>Will convert both double and single quotes.</td>
2501
   *                              </tr>
2502
   *                              <tr valign="top">
2503
   *                              <td><b>ENT_NOQUOTES</b></td>
2504
   *                              <td>Will leave both double and single quotes unconverted.</td>
2505
   *                              </tr>
2506
   *                              <tr valign="top">
2507
   *                              <td><b>ENT_IGNORE</b></td>
2508
   *                              <td>
2509
   *                              Silently discard invalid code unit sequences instead of returning
2510
   *                              an empty string. Using this flag is discouraged as it
2511
   *                              may have security implications.
2512
   *                              </td>
2513
   *                              </tr>
2514
   *                              <tr valign="top">
2515
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2516
   *                              <td>
2517
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2518
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2519
   *                              </td>
2520
   *                              </tr>
2521
   *                              <tr valign="top">
2522
   *                              <td><b>ENT_DISALLOWED</b></td>
2523
   *                              <td>
2524
   *                              Replace invalid code points for the given document type with a
2525
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2526
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2527
   *                              instance, to ensure the well-formedness of XML documents with
2528
   *                              embedded external content.
2529
   *                              </td>
2530
   *                              </tr>
2531
   *                              <tr valign="top">
2532
   *                              <td><b>ENT_HTML401</b></td>
2533
   *                              <td>
2534
   *                              Handle code as HTML 4.01.
2535
   *                              </td>
2536
   *                              </tr>
2537
   *                              <tr valign="top">
2538
   *                              <td><b>ENT_XML1</b></td>
2539
   *                              <td>
2540
   *                              Handle code as XML 1.
2541
   *                              </td>
2542
   *                              </tr>
2543
   *                              <tr valign="top">
2544
   *                              <td><b>ENT_XHTML</b></td>
2545
   *                              <td>
2546
   *                              Handle code as XHTML.
2547
   *                              </td>
2548
   *                              </tr>
2549
   *                              <tr valign="top">
2550
   *                              <td><b>ENT_HTML5</b></td>
2551
   *                              <td>
2552
   *                              Handle code as HTML 5.
2553
   *                              </td>
2554
   *                              </tr>
2555
   *                              </table>
2556
   *                              </p>
2557
   * @param string $encoding      [optional] <p>
2558
   *                              Like <b>htmlspecialchars</b>,
2559
   *                              <b>htmlentities</b> takes an optional third argument
2560
   *                              <i>encoding</i> which defines encoding used in
2561
   *                              conversion.
2562
   *                              Although this argument is technically optional, you are highly
2563
   *                              encouraged to specify the correct value for your code.
2564
   *                              </p>
2565
   * @param bool   $double_encode [optional] <p>
2566
   *                              When <i>double_encode</i> is turned off PHP will not
2567
   *                              encode existing html entities. The default is to convert everything.
2568
   *                              </p>
2569
   *
2570
   *
2571
   * @return string The encoded string.
2572
   * </p>
2573
   * <p>
2574
   * If the input <i>string</i> contains an invalid code unit
2575
   * sequence within the given <i>encoding</i> an empty string
2576
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2577
   * <b>ENT_SUBSTITUTE</b> flags are set.
2578
   */
2579 9
  public static function htmlentities(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2580
  {
2581 9
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2582 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2583
    }
2584
2585 9
    $str = \htmlentities($str, $flags, $encoding, $double_encode);
2586
2587
    /**
2588
     * PHP doesn't replace a backslash to its html entity since this is something
2589
     * that's mostly used to escape characters when inserting in a database. Since
2590
     * we're using a decent database layer, we don't need this shit and we're replacing
2591
     * the double backslashes by its' html entity equivalent.
2592
     *
2593
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2594
     */
2595 9
    $str = \str_replace('\\', '&#92;', $str);
2596
2597 9
    return self::html_encode($str, true, $encoding);
2598
  }
2599
2600
  /**
2601
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2602
   *
2603
   * INFO: Take a look at "UTF8::htmlentities()"
2604
   *
2605
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2606
   *
2607
   * @param string $str           <p>
2608
   *                              The string being converted.
2609
   *                              </p>
2610
   * @param int    $flags         [optional] <p>
2611
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2612
   *                              invalid code unit sequences and the used document type. The default is
2613
   *                              ENT_COMPAT | ENT_HTML401.
2614
   *                              <table>
2615
   *                              Available <i>flags</i> constants
2616
   *                              <tr valign="top">
2617
   *                              <td>Constant Name</td>
2618
   *                              <td>Description</td>
2619
   *                              </tr>
2620
   *                              <tr valign="top">
2621
   *                              <td><b>ENT_COMPAT</b></td>
2622
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2623
   *                              </tr>
2624
   *                              <tr valign="top">
2625
   *                              <td><b>ENT_QUOTES</b></td>
2626
   *                              <td>Will convert both double and single quotes.</td>
2627
   *                              </tr>
2628
   *                              <tr valign="top">
2629
   *                              <td><b>ENT_NOQUOTES</b></td>
2630
   *                              <td>Will leave both double and single quotes unconverted.</td>
2631
   *                              </tr>
2632
   *                              <tr valign="top">
2633
   *                              <td><b>ENT_IGNORE</b></td>
2634
   *                              <td>
2635
   *                              Silently discard invalid code unit sequences instead of returning
2636
   *                              an empty string. Using this flag is discouraged as it
2637
   *                              may have security implications.
2638
   *                              </td>
2639
   *                              </tr>
2640
   *                              <tr valign="top">
2641
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2642
   *                              <td>
2643
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2644
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2645
   *                              </td>
2646
   *                              </tr>
2647
   *                              <tr valign="top">
2648
   *                              <td><b>ENT_DISALLOWED</b></td>
2649
   *                              <td>
2650
   *                              Replace invalid code points for the given document type with a
2651
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2652
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2653
   *                              instance, to ensure the well-formedness of XML documents with
2654
   *                              embedded external content.
2655
   *                              </td>
2656
   *                              </tr>
2657
   *                              <tr valign="top">
2658
   *                              <td><b>ENT_HTML401</b></td>
2659
   *                              <td>
2660
   *                              Handle code as HTML 4.01.
2661
   *                              </td>
2662
   *                              </tr>
2663
   *                              <tr valign="top">
2664
   *                              <td><b>ENT_XML1</b></td>
2665
   *                              <td>
2666
   *                              Handle code as XML 1.
2667
   *                              </td>
2668
   *                              </tr>
2669
   *                              <tr valign="top">
2670
   *                              <td><b>ENT_XHTML</b></td>
2671
   *                              <td>
2672
   *                              Handle code as XHTML.
2673
   *                              </td>
2674
   *                              </tr>
2675
   *                              <tr valign="top">
2676
   *                              <td><b>ENT_HTML5</b></td>
2677
   *                              <td>
2678
   *                              Handle code as HTML 5.
2679
   *                              </td>
2680
   *                              </tr>
2681
   *                              </table>
2682
   *                              </p>
2683
   * @param string $encoding      [optional] <p>
2684
   *                              Defines encoding used in conversion.
2685
   *                              </p>
2686
   *                              <p>
2687
   *                              For the purposes of this function, the encodings
2688
   *                              ISO-8859-1, ISO-8859-15,
2689
   *                              UTF-8, cp866,
2690
   *                              cp1251, cp1252, and
2691
   *                              KOI8-R are effectively equivalent, provided the
2692
   *                              <i>string</i> itself is valid for the encoding, as
2693
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2694
   *                              the same positions in all of these encodings.
2695
   *                              </p>
2696
   * @param bool   $double_encode [optional] <p>
2697
   *                              When <i>double_encode</i> is turned off PHP will not
2698
   *                              encode existing html entities, the default is to convert everything.
2699
   *                              </p>
2700
   *
2701
   * @return string The converted string.
2702
   * </p>
2703
   * <p>
2704
   * If the input <i>string</i> contains an invalid code unit
2705
   * sequence within the given <i>encoding</i> an empty string
2706
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2707
   * <b>ENT_SUBSTITUTE</b> flags are set.
2708
   */
2709 8
  public static function htmlspecialchars(string $str, int $flags = ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2710
  {
2711 8
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2712 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2713
    }
2714
2715 8
    return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2716
  }
2717
2718
  /**
2719
   * Checks whether iconv is available on the server.
2720
   *
2721
   * @return bool
2722
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2723
   */
2724
  public static function iconv_loaded(): bool
2725
  {
2726
    return \extension_loaded('iconv') ? true : false;
2727
  }
2728
2729
  /**
2730
   * alias for "UTF8::decimal_to_chr()"
2731
   *
2732
   * @see UTF8::decimal_to_chr()
2733
   *
2734
   * @param mixed $int
2735
   *
2736
   * @return string
2737
   */
2738 4
  public static function int_to_chr($int): string
2739
  {
2740 4
    return self::decimal_to_chr($int);
2741
  }
2742
2743
  /**
2744
   * Converts Integer to hexadecimal U+xxxx code point representation.
2745
   *
2746
   * INFO: opposite to UTF8::hex_to_int()
2747
   *
2748
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2749
   * @param string $pfix [optional]
2750
   *
2751
   * @return string The code point, or empty string on failure.
2752
   */
2753 6
  public static function int_to_hex(int $int, string $pfix = 'U+'): string
2754
  {
2755 6
    $hex = \dechex($int);
2756
2757 6
    $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2758
2759 6
    return $pfix . $hex . '';
2760
  }
2761
2762
  /**
2763
   * Checks whether intl-char is available on the server.
2764
   *
2765
   * @return bool
2766
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2767
   */
2768
  public static function intlChar_loaded(): bool
2769
  {
2770
    return \class_exists('IntlChar');
2771
  }
2772
2773
  /**
2774
   * Checks whether intl is available on the server.
2775
   *
2776
   * @return bool
2777
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
2778
   */
2779 5
  public static function intl_loaded(): bool
2780
  {
2781 5
    return \extension_loaded('intl');
2782
  }
2783
2784
  /**
2785
   * alias for "UTF8::is_ascii()"
2786
   *
2787
   * @see        UTF8::is_ascii()
2788
   *
2789
   * @param string $str
2790
   *
2791
   * @return bool
2792
   *
2793
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2794
   */
2795 2
  public static function isAscii(string $str): bool
2796
  {
2797 2
    return self::is_ascii($str);
2798
  }
2799
2800
  /**
2801
   * alias for "UTF8::is_base64()"
2802
   *
2803
   * @see        UTF8::is_base64()
2804
   *
2805
   * @param string $str
2806
   *
2807
   * @return bool
2808
   *
2809
   * @deprecated <p>use "UTF8::is_base64()"</p>
2810
   */
2811 2
  public static function isBase64($str): bool
2812
  {
2813 2
    return self::is_base64($str);
2814
  }
2815
2816
  /**
2817
   * alias for "UTF8::is_binary()"
2818
   *
2819
   * @see        UTF8::is_binary()
2820
   *
2821
   * @param mixed $str
2822
   * @param bool  $strict
2823
   *
2824
   * @return bool
2825
   *
2826
   * @deprecated <p>use "UTF8::is_binary()"</p>
2827
   */
2828 4
  public static function isBinary($str, $strict = false): bool
2829
  {
2830 4
    return self::is_binary($str, $strict);
2831
  }
2832
2833
  /**
2834
   * alias for "UTF8::is_bom()"
2835
   *
2836
   * @see        UTF8::is_bom()
2837
   *
2838
   * @param string $utf8_chr
2839
   *
2840
   * @return bool
2841
   *
2842
   * @deprecated <p>use "UTF8::is_bom()"</p>
2843
   */
2844 2
  public static function isBom(string $utf8_chr): bool
2845
  {
2846 2
    return self::is_bom($utf8_chr);
2847
  }
2848
2849
  /**
2850
   * alias for "UTF8::is_html()"
2851
   *
2852
   * @see        UTF8::is_html()
2853
   *
2854
   * @param string $str
2855
   *
2856
   * @return bool
2857
   *
2858
   * @deprecated <p>use "UTF8::is_html()"</p>
2859
   */
2860 2
  public static function isHtml(string $str): bool
2861
  {
2862 2
    return self::is_html($str);
2863
  }
2864
2865
  /**
2866
   * alias for "UTF8::is_json()"
2867
   *
2868
   * @see        UTF8::is_json()
2869
   *
2870
   * @param string $str
2871
   *
2872
   * @return bool
2873
   *
2874
   * @deprecated <p>use "UTF8::is_json()"</p>
2875
   */
2876
  public static function isJson(string $str): bool
2877
  {
2878
    return self::is_json($str);
2879
  }
2880
2881
  /**
2882
   * alias for "UTF8::is_utf16()"
2883
   *
2884
   * @see        UTF8::is_utf16()
2885
   *
2886
   * @param mixed $str
2887
   *
2888
   * @return int|false
2889
   *                    <strong>false</strong> if is't not UTF16,<br>
2890
   *                    <strong>1</strong> for UTF-16LE,<br>
2891
   *                    <strong>2</strong> for UTF-16BE.
2892
   *
2893
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2894
   */
2895 2
  public static function isUtf16($str)
2896
  {
2897 2
    return self::is_utf16($str);
2898
  }
2899
2900
  /**
2901
   * alias for "UTF8::is_utf32()"
2902
   *
2903
   * @see        UTF8::is_utf32()
2904
   *
2905
   * @param mixed $str
2906
   *
2907
   * @return int|false
2908
   *                   <strong>false</strong> if is't not UTF16,
2909
   *                   <strong>1</strong> for UTF-32LE,
2910
   *                   <strong>2</strong> for UTF-32BE.
2911
   *
2912
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2913
   */
2914 2
  public static function isUtf32($str)
2915
  {
2916 2
    return self::is_utf32($str);
2917
  }
2918
2919
  /**
2920
   * alias for "UTF8::is_utf8()"
2921
   *
2922
   * @see        UTF8::is_utf8()
2923
   *
2924
   * @param string $str
2925
   * @param bool   $strict
2926
   *
2927
   * @return bool
2928
   *
2929
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2930
   */
2931 17
  public static function isUtf8($str, $strict = false): bool
2932
  {
2933 17
    return self::is_utf8($str, $strict);
2934
  }
2935
2936
  /**
2937
   * Returns true if the string contains only alphabetic chars, false otherwise.
2938
   *
2939
   * @param string $str
2940
   *
2941
   * @return bool
2942
   *               Whether or not $str contains only alphabetic chars.
2943
   */
2944 10
  public static function is_alpha(string $str): bool
2945
  {
2946 10
    return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2947
  }
2948
2949
  /**
2950
   * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2951
   *
2952
   * @param string $str
2953
   *
2954
   * @return bool
2955
   *               Whether or not $str contains only alphanumeric chars.
2956
   */
2957 13
  public static function is_alphanumeric(string $str): bool
2958
  {
2959 13
    return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2960
  }
2961
2962
  /**
2963
   * Checks if a string is 7 bit ASCII.
2964
   *
2965
   * @param string $str <p>The string to check.</p>
2966
   *
2967
   * @return bool
2968
   *              <strong>true</strong> if it is ASCII<br>
2969
   *              <strong>false</strong> otherwise
2970
   *
2971
   */
2972 201
  public static function is_ascii(string $str): bool
2973
  {
2974 201
    if ('' === $str) {
2975 10
      return true;
2976
    }
2977
2978 200
    return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2979
  }
2980
2981
  /**
2982
   * Returns true if the string is base64 encoded, false otherwise.
2983
   *
2984
   * @param string $str <p>The input string.</p>
2985
   *
2986
   * @return bool Whether or not $str is base64 encoded.
2987
   */
2988 9
  public static function is_base64($str): bool
2989
  {
2990 9
    if ('' === $str) {
2991 3
      return false;
2992
    }
2993
2994 8
    if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2995 2
      return false;
2996
    }
2997
2998 8
    $base64String = (string)\base64_decode($str, true);
2999
3000 8
    return $base64String && \base64_encode($base64String) === $str;
3001
  }
3002
3003
  /**
3004
   * Check if the input is binary... (is look like a hack).
3005
   *
3006
   * @param mixed $input
3007
   * @param bool  $strict
3008
   *
3009
   * @return bool
3010
   */
3011 40
  public static function is_binary($input, bool $strict = false): bool
3012
  {
3013 40
    $input = (string)$input;
3014 40
    if ('' === $input) {
3015 10
      return false;
3016
    }
3017
3018 40
    if (\preg_match('~^[01]+$~', $input)) {
3019 12
      return true;
3020
    }
3021
3022 40
    if ($strict === true) {
3023
3024 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3025
        self::checkForSupport();
3026
      }
3027
3028 34
      if (self::$SUPPORT['finfo'] === false) {
3029
        throw new \RuntimeException('ext-fileinfo: is not installed');
3030
      }
3031
3032
      /** @noinspection PhpComposerExtensionStubsInspection */
3033 34
      $finfo = new \finfo(FILEINFO_MIME_ENCODING);
3034 34
      $finfo_encoding = $finfo->buffer($input);
3035 34
      if ($finfo_encoding && $finfo_encoding === 'binary') {
3036 15
        return true;
3037
      }
3038
3039
    }
3040
3041 40
    $ext = self::get_file_type($input);
3042 40
    if ($ext['type'] === 'binary') {
3043 6
      return true;
3044
    }
3045
3046 38
    $testLength = self::strlen_in_byte($input);
3047 38
    if ($testLength) {
3048 38
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3049
        self::checkForSupport();
3050
      }
3051
3052 38
      $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
3053 38
      if (($testNull / $testLength) > 0.256) {
3054 12
        return true;
3055
      }
3056
    }
3057
3058 36
    return false;
3059
  }
3060
3061
  /**
3062
   * Check if the file is binary.
3063
   *
3064
   * @param string $file
3065
   *
3066
   * @return bool
3067
   */
3068 6
  public static function is_binary_file($file): bool
3069
  {
3070
    // init
3071 6
    $block = '';
3072
3073 6
    $fp = \fopen($file, 'rb');
3074 6
    if (\is_resource($fp)) {
3075 6
      $block = \fread($fp, 512);
3076 6
      \fclose($fp);
3077
    }
3078
3079 6
    if ($block === '') {
3080 2
      return false;
3081
    }
3082
3083 6
    return self::is_binary($block, true);
3084
  }
3085
3086
  /**
3087
   * Returns true if the string contains only whitespace chars, false otherwise.
3088
   *
3089
   * @param string $str
3090
   *
3091
   * @return bool
3092
   *               Whether or not $str contains only whitespace characters.
3093
   */
3094 15
  public static function is_blank(string $str): bool
3095
  {
3096 15
    return self::str_matches_pattern($str, '^[[:space:]]*$');
3097
  }
3098
3099
  /**
3100
   * Checks if the given string is equal to any "Byte Order Mark".
3101
   *
3102
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3103
   *
3104
   * @param string $str <p>The input string.</p>
3105
   *
3106
   * @return bool
3107
   *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.
3108
   */
3109 2
  public static function is_bom($str): bool
3110
  {
3111 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
3112 2
      if ($str === $bomString) {
3113 2
        return true;
3114
      }
3115
    }
3116
3117 2
    return false;
3118
  }
3119
3120
  /**
3121
   * Determine whether the string is considered to be empty.
3122
   *
3123
   * A variable is considered empty if it does not exist or if its value equals FALSE.
3124
   * empty() does not generate a warning if the variable does not exist.
3125
   *
3126
   * @param mixed $str
3127
   *
3128
   * @return bool Whether or not $str is empty().
3129
   */
3130
  public static function is_empty($str): bool
3131
  {
3132
    return empty($str);
3133
  }
3134
3135
  /**
3136
   * Returns true if the string contains only hexadecimal chars, false otherwise.
3137
   *
3138
   * @param string $str
3139
   *
3140
   * @return bool
3141
   *               Whether or not $str contains only hexadecimal chars.
3142
   */
3143 13
  public static function is_hexadecimal(string $str): bool
3144
  {
3145 13
    return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3146
  }
3147
3148
  /**
3149
   * Check if the string contains any html-tags <lall>.
3150
   *
3151
   * @param string $str <p>The input string.</p>
3152
   *
3153
   * @return bool
3154
   */
3155 3
  public static function is_html(string $str): bool
3156
  {
3157 3
    if ('' === $str) {
3158 3
      return false;
3159
    }
3160
3161
    // init
3162 3
    $matches = [];
3163
3164 3
    \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3165
3166 3
    return !(\count($matches) === 0);
3167
  }
3168
3169
  /**
3170
   * Try to check if "$str" is an json-string.
3171
   *
3172
   * @param string $str <p>The input string.</p>
3173
   *
3174
   * @return bool
3175
   */
3176 22
  public static function is_json(string $str): bool
3177
  {
3178 22
    if ('' === $str) {
3179 3
      return false;
3180
    }
3181
3182 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3183
      self::checkForSupport();
3184
    }
3185
3186 21
    if (self::$SUPPORT['json'] === false) {
3187
      throw new \RuntimeException('ext-json: is not installed');
3188
    }
3189
3190 21
    $json = self::json_decode($str);
3191
3192
    /** @noinspection PhpComposerExtensionStubsInspection */
3193
    return (
3194 21
               \is_object($json) === true
3195
               ||
3196 21
               \is_array($json) === true
3197
           )
3198
           &&
3199 21
           \json_last_error() === JSON_ERROR_NONE;
3200
  }
3201
3202
  /**
3203
   * @param string $str
3204
   *
3205
   * @return bool
3206
   */
3207 8
  public static function is_lowercase(string $str): bool
3208
  {
3209 8
    if (self::str_matches_pattern($str, '^[[:lower:]]*$')) {
3210 3
      return true;
3211
    }
3212
3213 5
    return false;
3214
  }
3215
3216
  /**
3217
   * Returns true if the string is serialized, false otherwise.
3218
   *
3219
   * @param string $str
3220
   *
3221
   * @return bool Whether or not $str is serialized.
3222
   */
3223 7
  public static function is_serialized(string $str): bool
3224
  {
3225 7
    if ('' === $str) {
3226 1
      return false;
3227
    }
3228
3229
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3230
    /** @noinspection UnserializeExploitsInspection */
3231 6
    return $str === 'b:0;'
3232
           ||
3233 6
           @\unserialize($str) !== false;
3234
  }
3235
3236
  /**
3237
   * Returns true if the string contains only lower case chars, false
3238
   * otherwise.
3239
   *
3240
   * @param string $str <p>The input string.</p>
3241
   *
3242
   * @return bool
3243
   *               Whether or not $str contains only lower case characters.
3244
   */
3245 8
  public static function is_uppercase(string $str): bool
3246
  {
3247 8
    return self::str_matches_pattern($str, '^[[:upper:]]*$');
3248
  }
3249
3250
  /**
3251
   * Check if the string is UTF-16.
3252
   *
3253
   * @param mixed $str <p>The input string.</p>
3254
   *
3255
   * @return int|false
3256
   *                   <strong>false</strong> if is't not UTF-16,<br>
3257
   *                   <strong>1</strong> for UTF-16LE,<br>
3258
   *                   <strong>2</strong> for UTF-16BE.
3259
   */
3260 21
  public static function is_utf16($str)
3261
  {
3262
    // init
3263 21
    $str = (string)$str;
3264
3265 21
    if (self::is_binary($str) === false) {
3266 9
      return false;
3267
    }
3268
3269 16
    if (self::$SUPPORT['mbstring'] === false) {
3270 2
      \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', E_USER_WARNING);
3271
    }
3272
3273
    // init
3274 16
    $strChars = [];
3275
3276 16
    $str = self::remove_bom($str);
3277
3278 16
    $maybeUTF16LE = 0;
3279 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3280 16
    if ($test) {
3281 14
      $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3282 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3283 14
      if ($test3 === $test) {
3284 14
        if (\count($strChars) === 0) {
3285 14
          $strChars = self::count_chars($str, true);
3286
        }
3287 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3288 14
          if (\in_array($test3char, $strChars, true) === true) {
3289 14
            $maybeUTF16LE++;
3290
          }
3291
        }
3292
      }
3293
    }
3294
3295 16
    $maybeUTF16BE = 0;
3296 16
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3297 16
    if ($test) {
3298 14
      $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3299 14
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3300 14
      if ($test3 === $test) {
3301 14
        if (\count($strChars) === 0) {
3302 6
          $strChars = self::count_chars($str, true);
3303
        }
3304 14
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3305 14
          if (\in_array($test3char, $strChars, true) === true) {
3306 14
            $maybeUTF16BE++;
3307
          }
3308
        }
3309
      }
3310
    }
3311
3312 16
    if ($maybeUTF16BE !== $maybeUTF16LE) {
3313 6
      if ($maybeUTF16LE > $maybeUTF16BE) {
3314 4
        return 1;
3315
      }
3316
3317 6
      return 2;
3318
    }
3319
3320 12
    return false;
3321
  }
3322
3323
  /**
3324
   * Check if the string is UTF-32.
3325
   *
3326
   * @param mixed $str
3327
   *
3328
   * @return int|false
3329
   *                   <strong>false</strong> if is't not UTF-32,<br>
3330
   *                   <strong>1</strong> for UTF-32LE,<br>
3331
   *                   <strong>2</strong> for UTF-32BE.
3332
   */
3333 17
  public static function is_utf32($str)
3334
  {
3335
    // init
3336 17
    $str = (string)$str;
3337
3338 17
    if (self::is_binary($str) === false) {
3339 9
      return false;
3340
    }
3341
3342 12
    if (self::$SUPPORT['mbstring'] === false) {
3343 2
      \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', E_USER_WARNING);
3344
    }
3345
3346
    // init
3347 12
    $strChars = [];
3348
3349 12
    $str = self::remove_bom($str);
3350
3351 12
    $maybeUTF32LE = 0;
3352 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3353 12
    if ($test) {
3354 10
      $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3355 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3356 10
      if ($test3 === $test) {
3357 10
        if (\count($strChars) === 0) {
3358 10
          $strChars = self::count_chars($str, true);
3359
        }
3360 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3361 10
          if (\in_array($test3char, $strChars, true) === true) {
3362 10
            $maybeUTF32LE++;
3363
          }
3364
        }
3365
      }
3366
    }
3367
3368 12
    $maybeUTF32BE = 0;
3369 12
    $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3370 12
    if ($test) {
3371 10
      $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3372 10
      $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3373 10
      if ($test3 === $test) {
3374 10
        if (\count($strChars) === 0) {
3375 6
          $strChars = self::count_chars($str, true);
3376
        }
3377 10
        foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3378 10
          if (\in_array($test3char, $strChars, true) === true) {
3379 10
            $maybeUTF32BE++;
3380
          }
3381
        }
3382
      }
3383
    }
3384
3385 12
    if ($maybeUTF32BE !== $maybeUTF32LE) {
3386 2
      if ($maybeUTF32LE > $maybeUTF32BE) {
3387 2
        return 1;
3388
      }
3389
3390 2
      return 2;
3391
    }
3392
3393 12
    return false;
3394
  }
3395
3396
  /**
3397
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3398
   *
3399
   * @see    http://hsivonen.iki.fi/php-utf8/
3400
   *
3401
   * @param string|string[] $str    <p>The string to be checked.</p>
3402
   * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3403
   *
3404
   * @return bool
3405
   */
3406 107
  public static function is_utf8($str, bool $strict = false): bool
3407
  {
3408 107
    if (\is_array($str) === true) {
3409 2
      foreach ($str as $k => $v) {
3410 2
        if (false === self::is_utf8($v, $strict)) {
3411 2
          return false;
3412
        }
3413
      }
3414
3415
      return true;
3416
    }
3417
3418 107
    if ('' === $str) {
3419 12
      return true;
3420
    }
3421
3422 103
    if ($strict === true) {
3423 2
      if (self::is_utf16($str) !== false) {
3424 2
        return false;
3425
      }
3426
3427
      if (self::is_utf32($str) !== false) {
3428
        return false;
3429
      }
3430
    }
3431
3432 103
    if (self::pcre_utf8_support() !== true) {
3433
3434
      // If even just the first character can be matched, when the /u
3435
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3436
      // invalid, nothing at all will match, even if the string contains
3437
      // some valid sequences
3438
      return (\preg_match('/^.{1}/us', $str, $ar) === 1);
3439
    }
3440
3441 103
    $mState = 0; // cached expected number of octets after the current octet
3442
    // until the beginning of the next UTF8 character sequence
3443 103
    $mUcs4 = 0; // cached Unicode character
3444 103
    $mBytes = 1; // cached expected number of octets in the current sequence
3445
3446 103
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3447
      self::checkForSupport();
3448
    }
3449
3450 103
    if (self::$ORD === null) {
3451
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3452
    }
3453
3454 103
    $len = self::strlen_in_byte((string)$str);
3455
    /** @noinspection ForeachInvariantsInspection */
3456 103
    for ($i = 0; $i < $len; $i++) {
3457 103
      $in = self::$ORD[$str[$i]];
3458 103
      if ($mState === 0) {
3459
        // When mState is zero we expect either a US-ASCII character or a
3460
        // multi-octet sequence.
3461 103
        if (0 === (0x80 & $in)) {
3462
          // US-ASCII, pass straight through.
3463 98
          $mBytes = 1;
3464 84
        } elseif (0xC0 === (0xE0 & $in)) {
3465
          // First octet of 2 octet sequence.
3466 75
          $mUcs4 = $in;
3467 75
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3468 75
          $mState = 1;
3469 75
          $mBytes = 2;
3470 58
        } elseif (0xE0 === (0xF0 & $in)) {
3471
          // First octet of 3 octet sequence.
3472 41
          $mUcs4 = $in;
3473 41
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3474 41
          $mState = 2;
3475 41
          $mBytes = 3;
3476 30
        } elseif (0xF0 === (0xF8 & $in)) {
3477
          // First octet of 4 octet sequence.
3478 19
          $mUcs4 = $in;
3479 19
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3480 19
          $mState = 3;
3481 19
          $mBytes = 4;
3482 13
        } elseif (0xF8 === (0xFC & $in)) {
3483
          /* First octet of 5 octet sequence.
3484
          *
3485
          * This is illegal because the encoded codepoint must be either
3486
          * (a) not the shortest form or
3487
          * (b) outside the Unicode range of 0-0x10FFFF.
3488
          * Rather than trying to resynchronize, we will carry on until the end
3489
          * of the sequence and let the later error handling code catch it.
3490
          */
3491 5
          $mUcs4 = $in;
3492 5
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3493 5
          $mState = 4;
3494 5
          $mBytes = 5;
3495 10
        } elseif (0xFC === (0xFE & $in)) {
3496
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3497 5
          $mUcs4 = $in;
3498 5
          $mUcs4 = ($mUcs4 & 1) << 30;
3499 5
          $mState = 5;
3500 5
          $mBytes = 6;
3501
        } else {
3502
          // Current octet is neither in the US-ASCII range nor a legal first
3503
          // octet of a multi-octet sequence.
3504 103
          return false;
3505
        }
3506
      } else {
3507
        // When mState is non-zero, we expect a continuation of the multi-octet
3508
        // sequence
3509 84
        if (0x80 === (0xC0 & $in)) {
3510
          // Legal continuation.
3511 76
          $shift = ($mState - 1) * 6;
3512 76
          $tmp = $in;
3513 76
          $tmp = ($tmp & 0x0000003F) << $shift;
3514 76
          $mUcs4 |= $tmp;
3515
          // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3516
          // Unicode code point to be output.
3517 76
          if (0 === --$mState) {
3518
            // Check for illegal sequences and code points.
3519
            //
3520
            // From Unicode 3.1, non-shortest form is illegal
3521
            if (
3522 76
                (2 === $mBytes && $mUcs4 < 0x0080)
3523
                ||
3524 76
                (3 === $mBytes && $mUcs4 < 0x0800)
3525
                ||
3526 76
                (4 === $mBytes && $mUcs4 < 0x10000)
3527
                ||
3528 76
                (4 < $mBytes)
3529
                ||
3530
                // From Unicode 3.2, surrogate characters are illegal.
3531 76
                (($mUcs4 & 0xFFFFF800) === 0xD800)
3532
                ||
3533
                // Code points outside the Unicode range are illegal.
3534 76
                ($mUcs4 > 0x10FFFF)
3535
            ) {
3536 8
              return false;
3537
            }
3538
            // initialize UTF8 cache
3539 76
            $mState = 0;
3540 76
            $mUcs4 = 0;
3541 76
            $mBytes = 1;
3542
          }
3543
        } else {
3544
          // ((0xC0 & (*in) != 0x80) && (mState != 0))
3545
          // Incomplete multi-octet sequence.
3546 36
          return false;
3547
        }
3548
      }
3549
    }
3550
3551 67
    return true;
3552
  }
3553
3554
  /**
3555
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3556
   * Decodes a JSON string
3557
   *
3558
   * @link http://php.net/manual/en/function.json-decode.php
3559
   *
3560
   * @param string $json    <p>
3561
   *                        The <i>json</i> string being decoded.
3562
   *                        </p>
3563
   *                        <p>
3564
   *                        This function only works with UTF-8 encoded strings.
3565
   *                        </p>
3566
   *                        <p>PHP implements a superset of
3567
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3568
   *                        only supports these values when they are nested inside an array or an object.
3569
   *                        </p>
3570
   * @param bool   $assoc   [optional] <p>
3571
   *                        When <b>TRUE</b>, returned objects will be converted into
3572
   *                        associative arrays.
3573
   *                        </p>
3574
   * @param int    $depth   [optional] <p>
3575
   *                        User specified recursion depth.
3576
   *                        </p>
3577
   * @param int    $options [optional] <p>
3578
   *                        Bitmask of JSON decode options. Currently only
3579
   *                        <b>JSON_BIGINT_AS_STRING</b>
3580
   *                        is supported (default is to cast large integers as floats)
3581
   *                        </p>
3582
   *
3583
   * @return mixed
3584
   *                The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3585
   *                null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3586
   *                <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3587
   *                is deeper than the recursion limit.
3588
   */
3589 23
  public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3590
  {
3591 23
    $json = self::filter($json);
3592
3593 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3594
      self::checkForSupport();
3595
    }
3596
3597 23
    if (self::$SUPPORT['json'] === false) {
3598
      throw new \RuntimeException('ext-json: is not installed');
3599
    }
3600
3601
    /** @noinspection PhpComposerExtensionStubsInspection */
3602 23
    $json = \json_decode($json, $assoc, $depth, $options);
3603
3604 23
    return $json;
3605
  }
3606
3607
  /**
3608
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3609
   * Returns the JSON representation of a value.
3610
   *
3611
   * @link http://php.net/manual/en/function.json-encode.php
3612
   *
3613
   * @param mixed $value   <p>
3614
   *                       The <i>value</i> being encoded. Can be any type except
3615
   *                       a resource.
3616
   *                       </p>
3617
   *                       <p>
3618
   *                       All string data must be UTF-8 encoded.
3619
   *                       </p>
3620
   *                       <p>PHP implements a superset of
3621
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3622
   *                       only supports these values when they are nested inside an array or an object.
3623
   *                       </p>
3624
   * @param int   $options [optional] <p>
3625
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3626
   *                       <b>JSON_HEX_TAG</b>,
3627
   *                       <b>JSON_HEX_AMP</b>,
3628
   *                       <b>JSON_HEX_APOS</b>,
3629
   *                       <b>JSON_NUMERIC_CHECK</b>,
3630
   *                       <b>JSON_PRETTY_PRINT</b>,
3631
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3632
   *                       <b>JSON_FORCE_OBJECT</b>,
3633
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3634
   *                       constants is described on
3635
   *                       the JSON constants page.
3636
   *                       </p>
3637
   * @param int   $depth   [optional] <p>
3638
   *                       Set the maximum depth. Must be greater than zero.
3639
   *                       </p>
3640
   *
3641
   * @return string|false
3642
   *                      A JSON encoded <strong>string</strong> on success or<br>
3643
   *                      <strong>FALSE</strong> on failure.
3644
   */
3645 4
  public static function json_encode($value, int $options = 0, int $depth = 512)
3646
  {
3647 4
    $value = self::filter($value);
3648
3649 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3650
      self::checkForSupport();
3651
    }
3652
3653 4
    if (self::$SUPPORT['json'] === false) {
3654
      throw new \RuntimeException('ext-json: is not installed');
3655
    }
3656
3657
    /** @noinspection PhpComposerExtensionStubsInspection */
3658 4
    $json = \json_encode($value, $options, $depth);
3659
3660 4
    return $json;
3661
  }
3662
3663
  /**
3664
   * Checks whether JSON is available on the server.
3665
   *
3666
   * @return bool
3667
   *              <strong>true</strong> if available, <strong>false</strong> otherwise
3668
   */
3669
  public static function json_loaded(): bool
3670
  {
3671
    return \function_exists('json_decode');
3672
  }
3673
3674
  /**
3675
   * Makes string's first char lowercase.
3676
   *
3677
   * @param string $str       <p>The input string</p>
3678
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
3679
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3680
   *
3681
   * @return string The resulting string.
3682
   */
3683 46
  public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3684
  {
3685 46
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3686 46
    if ($strPartTwo === false) {
3687
      $strPartTwo = '';
3688
    }
3689
3690 46
    $strPartOne = self::strtolower(
3691 46
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3692 46
        $encoding,
3693 46
        $cleanUtf8
3694
    );
3695
3696 46
    return $strPartOne . $strPartTwo;
3697
  }
3698
3699
  /**
3700
   * alias for "UTF8::lcfirst()"
3701
   *
3702
   * @see UTF8::lcfirst()
3703
   *
3704
   * @param string $str
3705
   * @param string $encoding
3706
   * @param bool   $cleanUtf8
3707
   *
3708
   * @return string
3709
   */
3710 2
  public static function lcword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3711
  {
3712 2
    return self::lcfirst($str, $encoding, $cleanUtf8);
3713
  }
3714
3715
  /**
3716
   * Lowercase for all words in the string.
3717
   *
3718
   * @param string   $str        <p>The input string.</p>
3719
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3720
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3721
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3722
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3723
   *
3724
   * @return string
3725
   */
3726 2
  public static function lcwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3727
  {
3728 2
    if (!$str) {
3729 2
      return '';
3730
    }
3731
3732 2
    $words = self::str_to_words($str, $charlist);
3733 2
    $newWords = [];
3734
3735 2
    if (\count($exceptions) > 0) {
3736 2
      $useExceptions = true;
3737
    } else {
3738 2
      $useExceptions = false;
3739
    }
3740
3741 2
    foreach ($words as $word) {
3742
3743 2
      if (!$word) {
3744 2
        continue;
3745
      }
3746
3747
      if (
3748 2
          $useExceptions === false
3749
          ||
3750
          (
3751 2
              $useExceptions === true
3752
              &&
3753 2
              !\in_array($word, $exceptions, true)
3754
          )
3755
      ) {
3756 2
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3757
      }
3758
3759 2
      $newWords[] = $word;
3760
    }
3761
3762 2
    return \implode('', $newWords);
3763
  }
3764
3765
  /**
3766
   * alias for "UTF8::lcfirst()"
3767
   *
3768
   * @see UTF8::lcfirst()
3769
   *
3770
   * @param string $str
3771
   * @param string $encoding
3772
   * @param bool   $cleanUtf8
3773
   *
3774
   * @return string
3775
   */
3776 5
  public static function lowerCaseFirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
3777
  {
3778 5
    return self::lcfirst($str, $encoding, $cleanUtf8);
3779
  }
3780
3781
  /**
3782
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3783
   *
3784
   * @param string $str   <p>The string to be trimmed</p>
3785
   * @param mixed  $chars <p>Optional characters to be stripped</p>
3786
   *
3787
   * @return string The string with unwanted characters stripped from the left.
3788
   */
3789 22
  public static function ltrim(string $str = '', $chars = INF): string
3790
  {
3791 22
    if ('' === $str) {
3792 3
      return '';
3793
    }
3794
3795
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3796 21
    if ($chars === INF || !$chars) {
3797 14
      $pattern = "^[\pZ\pC]+";
3798
    } else {
3799 10
      $chars = \preg_quote($chars, '/');
3800 10
      $pattern = "^[$chars]+";
3801
    }
3802
3803 21
    return self::regex_replace($str, $pattern, '', '', '/');
3804
  }
3805
3806
  /**
3807
   * Returns the UTF-8 character with the maximum code point in the given data.
3808
   *
3809
   * @param string|array<string> $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3810
   *
3811
   * @return string|null The character with the highest code point than others, returns null on failure or empty input.
3812
   */
3813 2
  public static function max($arg)
3814
  {
3815 2
    if (\is_array($arg) === true) {
3816 2
      $arg = \implode('', $arg);
3817
    }
3818
3819 2
    $codepoints = self::codepoints($arg, false);
3820 2
    if (\count($codepoints) === 0) {
3821 2
      return null;
3822
    }
3823
3824 2
    $codepoint_max = \max($codepoints);
3825
3826 2
    return self::chr($codepoint_max);
3827
  }
3828
3829
  /**
3830
   * Calculates and returns the maximum number of bytes taken by any
3831
   * UTF-8 encoded character in the given string.
3832
   *
3833
   * @param string $str <p>The original Unicode string.</p>
3834
   *
3835
   * @return int Max byte lengths of the given chars.
3836
   */
3837 2
  public static function max_chr_width(string $str): int
3838
  {
3839 2
    $bytes = self::chr_size_list($str);
3840 2
    if (\count($bytes) > 0) {
3841 2
      return (int)\max($bytes);
3842
    }
3843
3844 2
    return 0;
3845
  }
3846
3847
  /**
3848
   * Checks whether mbstring is available on the server.
3849
   *
3850
   * @return bool
3851
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
3852
   */
3853 27
  public static function mbstring_loaded(): bool
3854
  {
3855 27
    $return = \extension_loaded('mbstring') ? true : false;
3856
3857 27
    if ($return === true) {
3858 27
      \mb_internal_encoding('UTF-8');
3859
    }
3860
3861 27
    return $return;
3862
  }
3863
3864
  /**
3865
   * Checks whether mbstring "overloaded" is active on the server.
3866
   *
3867
   * @return bool
3868
   */
3869
  private static function mbstring_overloaded(): bool
3870
  {
3871
    /**
3872
     * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
3873
     */
3874
3875
    /** @noinspection PhpComposerExtensionStubsInspection */
3876
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3877
    return \defined('MB_OVERLOAD_STRING')
3878
           &&
3879
           (@\ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING);
3880
  }
3881
3882
  /**
3883
   * Returns the UTF-8 character with the minimum code point in the given data.
3884
   *
3885
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3886
   *
3887
   * @return string|null The character with the lowest code point than others, returns null on failure or empty input.
3888
   */
3889 2
  public static function min($arg)
3890
  {
3891 2
    if (\is_array($arg) === true) {
3892 2
      $arg = \implode('', $arg);
3893
    }
3894
3895 2
    $codepoints = self::codepoints($arg, false);
3896 2
    if (\count($codepoints) === 0) {
3897 2
      return null;
3898
    }
3899
3900 2
    $codepoint_min = \min($codepoints);
3901
3902 2
    return self::chr($codepoint_min);
3903
  }
3904
3905
  /**
3906
   * alias for "UTF8::normalize_encoding()"
3907
   *
3908
   * @see        UTF8::normalize_encoding()
3909
   *
3910
   * @param mixed $encoding
3911
   * @param mixed $fallback
3912
   *
3913
   * @return mixed
3914
   *
3915
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3916
   */
3917 2
  public static function normalizeEncoding($encoding, $fallback = '')
3918
  {
3919 2
    return self::normalize_encoding($encoding, $fallback);
3920
  }
3921
3922
  /**
3923
   * Normalize the encoding-"name" input.
3924
   *
3925
   * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3926
   * @param mixed $fallback <p>e.g.: UTF-8</p>
3927
   *
3928
   * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3929
   */
3930 340
  public static function normalize_encoding($encoding, $fallback = '')
3931
  {
3932 340
    static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3933
3934
    // init
3935 340
    $encoding = (string)$encoding;
3936
3937
    if (
3938 340
        !$encoding
3939
        ||
3940 49
        $encoding === '1' // only a fallback, for non "strict_types" usage ...
3941
        ||
3942 340
        $encoding === '0' // only a fallback, for non "strict_types" usage ...
3943
    ) {
3944 296
      return $fallback;
3945
    }
3946
3947
    if (
3948 48
        'UTF-8' === $encoding
3949
        ||
3950 48
        'UTF8' === $encoding
3951
    ) {
3952 21
      return 'UTF-8';
3953
    }
3954
3955
    if (
3956 41
        '8BIT' === $encoding
3957
        ||
3958 41
        'BINARY' === $encoding
3959
    ) {
3960
      return 'CP850';
3961
    }
3962
3963 41
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3964 39
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3965
    }
3966
3967 6
    if (self::$ENCODINGS === null) {
3968 1
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
3969
    }
3970
3971 6
    if (\in_array($encoding, self::$ENCODINGS, true)) {
0 ignored issues
show
Bug introduced by
It seems like self::ENCODINGS can also be of type false; however, parameter $haystack of in_array() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3971
    if (\in_array($encoding, /** @scrutinizer ignore-type */ self::$ENCODINGS, true)) {
Loading history...
3972 4
      $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3973
3974 4
      return $encoding;
3975
    }
3976
3977 5
    $encodingOrig = $encoding;
3978 5
    $encoding = \strtoupper($encoding);
3979 5
    $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3980
3981
    $equivalences = [
3982 5
        'ISO8859'     => 'ISO-8859-1',
3983
        'ISO88591'    => 'ISO-8859-1',
3984
        'ISO'         => 'ISO-8859-1',
3985
        'LATIN'       => 'ISO-8859-1',
3986
        'LATIN1'      => 'ISO-8859-1', // Western European
3987
        'ISO88592'    => 'ISO-8859-2',
3988
        'LATIN2'      => 'ISO-8859-2', // Central European
3989
        'ISO88593'    => 'ISO-8859-3',
3990
        'LATIN3'      => 'ISO-8859-3', // Southern European
3991
        'ISO88594'    => 'ISO-8859-4',
3992
        'LATIN4'      => 'ISO-8859-4', // Northern European
3993
        'ISO88595'    => 'ISO-8859-5',
3994
        'ISO88596'    => 'ISO-8859-6', // Greek
3995
        'ISO88597'    => 'ISO-8859-7',
3996
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3997
        'ISO88599'    => 'ISO-8859-9',
3998
        'LATIN5'      => 'ISO-8859-9', // Turkish
3999
        'ISO885911'   => 'ISO-8859-11',
4000
        'TIS620'      => 'ISO-8859-11', // Thai
4001
        'ISO885910'   => 'ISO-8859-10',
4002
        'LATIN6'      => 'ISO-8859-10', // Nordic
4003
        'ISO885913'   => 'ISO-8859-13',
4004
        'LATIN7'      => 'ISO-8859-13', // Baltic
4005
        'ISO885914'   => 'ISO-8859-14',
4006
        'LATIN8'      => 'ISO-8859-14', // Celtic
4007
        'ISO885915'   => 'ISO-8859-15',
4008
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4009
        'ISO885916'   => 'ISO-8859-16',
4010
        'LATIN10'     => 'ISO-8859-16', // Southeast European
4011
        'CP1250'      => 'WINDOWS-1250',
4012
        'WIN1250'     => 'WINDOWS-1250',
4013
        'WINDOWS1250' => 'WINDOWS-1250',
4014
        'CP1251'      => 'WINDOWS-1251',
4015
        'WIN1251'     => 'WINDOWS-1251',
4016
        'WINDOWS1251' => 'WINDOWS-1251',
4017
        'CP1252'      => 'WINDOWS-1252',
4018
        'WIN1252'     => 'WINDOWS-1252',
4019
        'WINDOWS1252' => 'WINDOWS-1252',
4020
        'CP1253'      => 'WINDOWS-1253',
4021
        'WIN1253'     => 'WINDOWS-1253',
4022
        'WINDOWS1253' => 'WINDOWS-1253',
4023
        'CP1254'      => 'WINDOWS-1254',
4024
        'WIN1254'     => 'WINDOWS-1254',
4025
        'WINDOWS1254' => 'WINDOWS-1254',
4026
        'CP1255'      => 'WINDOWS-1255',
4027
        'WIN1255'     => 'WINDOWS-1255',
4028
        'WINDOWS1255' => 'WINDOWS-1255',
4029
        'CP1256'      => 'WINDOWS-1256',
4030
        'WIN1256'     => 'WINDOWS-1256',
4031
        'WINDOWS1256' => 'WINDOWS-1256',
4032
        'CP1257'      => 'WINDOWS-1257',
4033
        'WIN1257'     => 'WINDOWS-1257',
4034
        'WINDOWS1257' => 'WINDOWS-1257',
4035
        'CP1258'      => 'WINDOWS-1258',
4036
        'WIN1258'     => 'WINDOWS-1258',
4037
        'WINDOWS1258' => 'WINDOWS-1258',
4038
        'UTF16'       => 'UTF-16',
4039
        'UTF32'       => 'UTF-32',
4040
        'UTF8'        => 'UTF-8',
4041
        'UTF'         => 'UTF-8',
4042
        'UTF7'        => 'UTF-7',
4043
        '8BIT'        => 'CP850',
4044
        'BINARY'      => 'CP850',
4045
    ];
4046
4047 5
    if (!empty($equivalences[$encodingUpperHelper])) {
4048 4
      $encoding = $equivalences[$encodingUpperHelper];
4049
    }
4050
4051 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4052
4053 5
    return $encoding;
4054
  }
4055
4056
  /**
4057
   * Standardize line ending to unix-like.
4058
   *
4059
   * @param string $str
4060
   *
4061
   * @return string
4062
   */
4063 5
  public static function normalize_line_ending(string $str): string
4064
  {
4065 5
    return (string)str_replace(["\r\n", "\r"], "\n", $str);
4066
  }
4067
4068
  /**
4069
   * Normalize some MS Word special characters.
4070
   *
4071
   * @param string $str <p>The string to be normalized.</p>
4072
   *
4073
   * @return string
4074
   */
4075 39
  public static function normalize_msword(string $str): string
4076
  {
4077 39
    if ('' === $str) {
4078 2
      return '';
4079
    }
4080
4081 39
    static $UTF8_MSWORD_KEYS_CACHE = null;
4082 39
    static $UTF8_MSWORD_VALUES_CACHE = null;
4083
4084 39
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
4085
4086 1
      if (self::$UTF8_MSWORD === null) {
4087 1
        self::$UTF8_MSWORD = self::getData('utf8_msword');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('utf8_msword') can also be of type false. However, the property $UTF8_MSWORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
4088
      }
4089
4090 1
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4090
      $UTF8_MSWORD_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4091 1
      $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
0 ignored issues
show
Bug introduced by
It seems like self::UTF8_MSWORD can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4091
      $UTF8_MSWORD_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$UTF8_MSWORD);
Loading history...
4092
    }
4093
4094 39
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4095
  }
4096
4097
  /**
4098
   * Normalize the whitespace.
4099
   *
4100
   * @param string $str                     <p>The string to be normalized.</p>
4101
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4102
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4103
   *                                        bidirectional text chars.</p>
4104
   *
4105
   * @return string
4106
   */
4107 87
  public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4108
  {
4109 87
    if ('' === $str) {
4110 9
      return '';
4111
    }
4112
4113 87
    static $WHITESPACE_CACHE = [];
4114 87
    $cacheKey = (int)$keepNonBreakingSpace;
4115
4116 87
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4117
4118 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4119
4120 2
      if ($keepNonBreakingSpace === true) {
4121 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4122
      }
4123
4124 2
      $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4125
    }
4126
4127 87
    if ($keepBidiUnicodeControls === false) {
4128 87
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
4129
4130 87
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4131 1
        $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4132
      }
4133
4134 87
      $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4135
    }
4136
4137 87
    return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4138
  }
4139
4140
  /**
4141
   * Calculates Unicode code point of the given UTF-8 encoded character.
4142
   *
4143
   * INFO: opposite to UTF8::chr()
4144
   *
4145
   * @param string $chr      <p>The character of which to calculate code point.<p/>
4146
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4147
   *
4148
   * @return int
4149
   *             Unicode code point of the given character,<br>
4150
   *             0 on invalid UTF-8 byte sequence.
4151
   */
4152 35
  public static function ord($chr, string $encoding = 'UTF-8'): int
4153
  {
4154
    // init
4155 35
    $chr = (string)$chr;
4156
4157 35
    static $CHAR_CACHE = [];
4158
4159
    // save the original string
4160 35
    $chr_orig = $chr;
4161
4162 35
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4163 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4164
4165
      // check again, if it's still not UTF-8
4166 4
      if ($encoding !== 'UTF-8') {
4167 4
        $chr = self::encode($encoding, $chr);
4168
      }
4169
    }
4170
4171 35
    $cacheKey = $chr_orig . $encoding;
4172 35
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
4173 35
      return $CHAR_CACHE[$cacheKey];
4174
    }
4175
4176 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4177
      self::checkForSupport();
4178
    }
4179
4180 12
    if (self::$SUPPORT['intlChar'] === true) {
4181
      /** @noinspection PhpComposerExtensionStubsInspection */
4182 11
      $code = \IntlChar::ord($chr);
4183 11
      if ($code) {
4184 10
        return $CHAR_CACHE[$cacheKey] = $code;
4185
      }
4186
    }
4187
4188
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4189 6
    $chr = \unpack('C*', (string)self::substr($chr, 0, 4, 'CP850'));
0 ignored issues
show
Bug introduced by
$chr of type array is incompatible with the type string expected by parameter $str of voku\helper\UTF8::substr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4189
    $chr = \unpack('C*', (string)self::substr(/** @scrutinizer ignore-type */ $chr, 0, 4, 'CP850'));
Loading history...
4190 6
    $code = $chr ? $chr[1] : 0;
4191
4192 6
    if (0xF0 <= $code && isset($chr[4])) {
4193
      /** @noinspection UnnecessaryCastingInspection */
4194
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4195
    }
4196
4197 6
    if (0xE0 <= $code && isset($chr[3])) {
4198
      /** @noinspection UnnecessaryCastingInspection */
4199 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4200
    }
4201
4202 6
    if (0xC0 <= $code && isset($chr[2])) {
4203
      /** @noinspection UnnecessaryCastingInspection */
4204 1
      return $CHAR_CACHE[$cacheKey] = (int)((($code - 0xC0) << 6) + $chr[2] - 0x80);
4205
    }
4206
4207 6
    return $CHAR_CACHE[$cacheKey] = $code;
4208
  }
4209
4210
  /**
4211
   * Parses the string into an array (into the the second parameter).
4212
   *
4213
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4214
   *          if the second parameter is not set!
4215
   *
4216
   * @link http://php.net/manual/en/function.parse-str.php
4217
   *
4218
   * @param string $str       <p>The input string.</p>
4219
   * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4220
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4221
   *
4222
   * @return bool
4223
   *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result.
4224
   */
4225 2
  public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4226
  {
4227 2
    if ($cleanUtf8 === true) {
4228 2
      $str = self::clean($str);
4229
    }
4230
4231 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4232
      self::checkForSupport();
4233
    }
4234
4235 2
    if (self::$SUPPORT['mbstring'] === true) {
4236 2
      $return = \mb_parse_str($str, $result);
4237
4238 2
      return !($return === false || empty($result));
4239
    }
4240
4241
    /** @noinspection PhpVoidFunctionResultUsedInspection */
4242
    \parse_str($str, $result);
4243
4244
    return !empty($result);
4245
  }
4246
4247
  /**
4248
   * Checks if \u modifier is available that enables Unicode support in PCRE.
4249
   *
4250
   * @return bool
4251
   *              <strong>true</strong> if support is available,<br>
4252
   *              <strong>false</strong> otherwise.
4253
   */
4254 103
  public static function pcre_utf8_support(): bool
4255
  {
4256
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4257 103
    return (bool)@\preg_match('//u', '');
4258
  }
4259
4260
  /**
4261
   * Create an array containing a range of UTF-8 characters.
4262
   *
4263
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4264
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4265
   *
4266
   * @return string[]
4267
   */
4268 2
  public static function range($var1, $var2): array
4269
  {
4270 2
    if (!$var1 || !$var2) {
4271 2
      return [];
4272
    }
4273
4274 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4275
      self::checkForSupport();
4276
    }
4277
4278 2
    if (self::$SUPPORT['ctype'] === false) {
4279
      throw new \RuntimeException('ext-ctype: is not installed');
4280
    }
4281
4282
    /** @noinspection PhpComposerExtensionStubsInspection */
4283 2
    if (\ctype_digit((string)$var1)) {
4284 2
      $start = (int)$var1;
4285 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4286
      $start = (int)self::hex_to_int($var1);
4287
    } else {
4288 2
      $start = self::ord($var1);
4289
    }
4290
4291 2
    if (!$start) {
4292
      return [];
4293
    }
4294
4295
    /** @noinspection PhpComposerExtensionStubsInspection */
4296 2
    if (\ctype_digit((string)$var2)) {
4297 2
      $end = (int)$var2;
4298 2
    } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4299
      $end = (int)self::hex_to_int($var2);
4300
    } else {
4301 2
      $end = self::ord($var2);
4302
    }
4303
4304 2
    if (!$end) {
4305
      return [];
4306
    }
4307
4308 2
    return \array_map(
4309
        [
4310 2
            self::class,
4311
            'chr',
4312
        ],
4313 2
        \range($start, $end)
4314
    );
4315
  }
4316
4317
  /**
4318
   * Multi decode html entity & fix urlencoded-win1252-chars.
4319
   *
4320
   * e.g:
4321
   * 'test+test'                     => 'test+test'
4322
   * 'D&#252;sseldorf'               => 'Düsseldorf'
4323
   * 'D%FCsseldorf'                  => 'Düsseldorf'
4324
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4325
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4326
   * 'Düsseldorf'                   => 'Düsseldorf'
4327
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4328
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4329
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4330
   *
4331
   * @param string $str          <p>The input string.</p>
4332
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
4333
   *
4334
   * @return string
4335
   */
4336 3
  public static function rawurldecode(string $str, bool $multi_decode = true): string
4337
  {
4338 3
    if ('' === $str) {
4339 2
      return '';
4340
    }
4341
4342 3
    $pattern = '/%u([0-9a-f]{3,4})/i';
4343 3
    if (\preg_match($pattern, $str)) {
4344 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4345
    }
4346
4347 3
    $flags = ENT_QUOTES | ENT_HTML5;
4348
4349
    do {
4350 3
      $str_compare = $str;
4351
4352 3
      $str = self::fix_simple_utf8(
4353 3
          \rawurldecode(
4354 3
              self::html_entity_decode(
4355 3
                  self::to_utf8($str),
4356 3
                  $flags
4357
              )
4358
          )
4359
      );
4360
4361 3
    } while ($multi_decode === true && $str_compare !== $str);
4362
4363 3
    return $str;
4364
  }
4365
4366
  /**
4367
   * @param array $strings
4368
   * @param bool  $removeEmptyValues
4369
   * @param int   $removeShortValues
4370
   *
4371
   * @return array
4372
   */
4373 2
  private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
4374
  {
4375
    // init
4376 2
    $return = [];
4377
4378 2
    foreach ($strings as $str) {
4379
      if (
4380 2
          $removeShortValues !== null
4381
          &&
4382 2
          self::strlen($str) <= $removeShortValues
4383
      ) {
4384 2
        continue;
4385
      }
4386
4387
      if (
4388 2
          $removeEmptyValues === true
4389
          &&
4390 2
          \trim($str) === ''
4391
      ) {
4392 2
        continue;
4393
      }
4394
4395 2
      $return[] = $str;
4396
    }
4397
4398 2
    return $return;
4399
  }
4400
4401
  /**
4402
   * Replaces all occurrences of $pattern in $str by $replacement.
4403
   *
4404
   * @param string $str         <p>The input string.</p>
4405
   * @param string $pattern     <p>The regular expression pattern.</p>
4406
   * @param string $replacement <p>The string to replace with.</p>
4407
   * @param string $options     [optional] <p>Matching conditions to be used.</p>
4408
   * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4409
   *
4410
   * @return string
4411
   */
4412 291
  public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4413
  {
4414 291
    if ($options === 'msr') {
4415 9
      $options = 'ms';
4416
    }
4417
4418
    // fallback
4419 291
    if (!$delimiter) {
4420
      $delimiter = '/';
4421
    }
4422
4423 291
    $str = (string)\preg_replace(
4424 291
        $delimiter . $pattern . $delimiter . 'u' . $options,
4425 291
        $replacement,
4426 291
        $str
4427
    );
4428
4429 291
    return $str;
4430
  }
4431
4432
  /**
4433
   * alias for "UTF8::remove_bom()"
4434
   *
4435
   * @see        UTF8::remove_bom()
4436
   *
4437
   * @param string $str
4438
   *
4439
   * @return string
4440
   *
4441
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4442
   */
4443
  public static function removeBOM(string $str): string
4444
  {
4445
    return self::remove_bom($str);
4446
  }
4447
4448
  /**
4449
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4450
   *
4451
   * @param string $str <p>The input string.</p>
4452
   *
4453
   * @return string String without UTF-BOM.
4454
   */
4455 75
  public static function remove_bom(string $str): string
4456
  {
4457 75
    if ('' === $str) {
4458 7
      return '';
4459
    }
4460
4461 75
    $strLength = self::strlen_in_byte($str);
4462 75
    foreach (self::$BOM as $bomString => $bomByteLength) {
4463 75
      if (0 === self::strpos_in_byte($str, $bomString, 0)) {
4464 10
        $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4465 10
        if ($strTmp === false) {
4466
          return '';
4467
        }
4468
4469 10
        $strLength -= $bomByteLength;
4470
4471 75
        $str = (string)$strTmp;
4472
      }
4473
    }
4474
4475 75
    return $str;
4476
  }
4477
4478
  /**
4479
   * Removes duplicate occurrences of a string in another string.
4480
   *
4481
   * @param string          $str  <p>The base string.</p>
4482
   * @param string|string[] $what <p>String to search for in the base string.</p>
4483
   *
4484
   * @return string The result string with removed duplicates.
4485
   */
4486 2
  public static function remove_duplicates(string $str, $what = ' '): string
4487
  {
4488 2
    if (\is_string($what) === true) {
4489 2
      $what = [$what];
4490
    }
4491
4492 2
    if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4493
      /** @noinspection ForeachSourceInspection */
4494 2
      foreach ($what as $item) {
4495 2
        $str = (string)\preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4496
      }
4497
    }
4498
4499 2
    return $str;
4500
  }
4501
4502
  /**
4503
   * Remove html via "strip_tags()" from the string.
4504
   *
4505
   * @param string $str
4506
   * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4507
   *                              not be stripped. Default: null
4508
   *                              </p>
4509
   *
4510
   * @return string
4511
   */
4512 6
  public static function remove_html(string $str, string $allowableTags = ''): string
4513
  {
4514 6
    return \strip_tags($str, $allowableTags);
4515
  }
4516
4517
  /**
4518
   * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4519
   *
4520
   * @param string $str
4521
   * @param string $replacement [optional] <p>Default is a empty string.</p>
4522
   *
4523
   * @return string
4524
   */
4525 6
  public static function remove_html_breaks(string $str, string $replacement = ''): string
4526
  {
4527 6
    return (string)\preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4528
  }
4529
4530
  /**
4531
   * Remove invisible characters from a string.
4532
   *
4533
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4534
   *
4535
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4536
   *
4537
   * @param string $str
4538
   * @param bool   $url_encoded
4539
   * @param string $replacement
4540
   *
4541
   * @return string
4542
   */
4543 113
  public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4544
  {
4545
    // init
4546 113
    $non_displayables = [];
4547
4548
    // every control character except newline (dec 10),
4549
    // carriage return (dec 13) and horizontal tab (dec 09)
4550 113
    if ($url_encoded) {
4551 113
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4552 113
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4553
    }
4554
4555 113
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4556
4557
    do {
4558 113
      $str = (string)\preg_replace($non_displayables, $replacement, $str, -1, $count);
4559 113
    } while ($count !== 0);
4560
4561 113
    return $str;
4562
  }
4563
4564
  /**
4565
   * Returns a new string with the prefix $substring removed, if present.
4566
   *
4567
   * @param string $str
4568
   * @param string $substring <p>The prefix to remove.</p>
4569
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4570
   *
4571
   * @return string String without the prefix $substring.
4572
   */
4573 12
  public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4574
  {
4575 12
    if (self::str_starts_with($str, $substring)) {
4576
4577 6
      return (string)self::substr(
4578 6
          $str,
4579 6
          self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4579
          /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4580 6
          null,
4581 6
          $encoding
4582
      );
4583
    }
4584
4585 6
    return $str;
4586
  }
4587
4588
  /**
4589
   * Returns a new string with the suffix $substring removed, if present.
4590
   *
4591
   * @param string $str
4592
   * @param string $substring <p>The suffix to remove.</p>
4593
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
4594
   *
4595
   * @return string String having a $str without the suffix $substring.
4596
   */
4597 12
  public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4598
  {
4599 12
    if (self::str_ends_with($str, $substring)) {
4600
4601 6
      return (string)self::substr(
4602 6
          $str,
4603 6
          0,
4604 6
          self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4605
      );
4606
    }
4607
4608 6
    return $str;
4609
  }
4610
4611
  /**
4612
   * Replaces all occurrences of $search in $str by $replacement.
4613
   *
4614
   * @param string $str           <p>The input string.</p>
4615
   * @param string $search        <p>The needle to search for.</p>
4616
   * @param string $replacement   <p>The string to replace with.</p>
4617
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4618
   *
4619
   * @return string String after the replacements.
4620
   */
4621 29
  public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4622
  {
4623 29
    if ($caseSensitive) {
4624 22
      return self::str_replace($search, $replacement, $str);
4625
    }
4626
4627 7
    return self::str_ireplace($search, $replacement, $str);
4628
  }
4629
4630
  /**
4631
   * Replaces all occurrences of $search in $str by $replacement.
4632
   *
4633
   * @param string       $str           <p>The input string.</p>
4634
   * @param array        $search        <p>The elements to search for.</p>
4635
   * @param string|array $replacement   <p>The string to replace with.</p>
4636
   * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4637
   *
4638
   * @return string String after the replacements.
4639
   */
4640 30
  public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4641
  {
4642 30
    if ($caseSensitive) {
4643 23
      return self::str_replace($search, $replacement, $str);
4644
    }
4645
4646 7
    return self::str_ireplace($search, $replacement, $str);
4647
  }
4648
4649
  /**
4650
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4651
   *
4652
   * @param string $str                <p>The input string</p>
4653
   * @param string $replacementChar    <p>The replacement character.</p>
4654
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4655
   *
4656
   * @return string
4657
   */
4658 63
  public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4659
  {
4660 63
    if ('' === $str) {
4661 9
      return '';
4662
    }
4663
4664 63
    if ($processInvalidUtf8 === true) {
4665 63
      $replacementCharHelper = $replacementChar;
4666 63
      if ($replacementChar === '') {
4667 63
        $replacementCharHelper = 'none';
4668
      }
4669
4670 63
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4671
        self::checkForSupport();
4672
      }
4673
4674 63
      if (self::$SUPPORT['mbstring'] === false) {
4675
        // if there is no native support for "mbstring",
4676
        // then we need to clean the string before ...
4677
        $str = self::clean($str);
4678
      }
4679
4680
      // always fallback via symfony polyfill
4681 63
      $save = \mb_substitute_character();
4682 63
      \mb_substitute_character($replacementCharHelper);
4683 63
      $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4684 63
      \mb_substitute_character($save);
4685
4686 63
      if (\is_string($strTmp)) {
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4687 63
        $str = $strTmp;
4688
      } else {
4689
        $str = '';
4690
      }
4691
    }
4692
4693 63
    return str_replace(
4694
        [
4695 63
            "\xEF\xBF\xBD",
4696
            '�',
4697
        ],
4698
        [
4699 63
            $replacementChar,
4700 63
            $replacementChar,
4701
        ],
4702 63
        $str
4703
    );
4704
  }
4705
4706
  /**
4707
   * Strip whitespace or other characters from end of a UTF-8 string.
4708
   *
4709
   * @param string $str   <p>The string to be trimmed.</p>
4710
   * @param mixed  $chars <p>Optional characters to be stripped.</p>
4711
   *
4712
   * @return string The string with unwanted characters stripped from the right.
4713
   */
4714 22
  public static function rtrim(string $str = '', $chars = INF): string
4715
  {
4716 22
    if ('' === $str) {
4717 3
      return '';
4718
    }
4719
4720
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4721 21
    if ($chars === INF || !$chars) {
4722 16
      $pattern = "[\pZ\pC]+\$";
4723
    } else {
4724 8
      $chars = \preg_quote($chars, '/');
4725 8
      $pattern = "[$chars]+\$";
4726
    }
4727
4728 21
    return self::regex_replace($str, $pattern, '', '', '/');
4729
  }
4730
4731
  /**
4732
   * rxClass
4733
   *
4734
   * @param string $s
4735
   * @param string $class
4736
   *
4737
   * @return string
4738
   */
4739 37
  private static function rxClass(string $s, string $class = ''): string
4740
  {
4741 37
    static $RX_CLASSS_CACHE = [];
4742
4743 37
    $cacheKey = $s . $class;
4744
4745 37
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4746 25
      return $RX_CLASSS_CACHE[$cacheKey];
4747
    }
4748
4749
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4750 16
    $class = [$class];
4751
4752
    /** @noinspection SuspiciousLoopInspection */
4753 16
    foreach (self::str_split($s) as $s) {
4754 15
      if ('-' === $s) {
4755
        $class[0] = '-' . $class[0];
4756 15
      } elseif (!isset($s[2])) {
4757 15
        $class[0] .= \preg_quote($s, '/');
4758 1
      } elseif (1 === self::strlen($s)) {
4759 1
        $class[0] .= $s;
4760
      } else {
4761 15
        $class[] = $s;
4762
      }
4763
    }
4764
4765 16
    if ($class[0]) {
4766 16
      $class[0] = '[' . $class[0] . ']';
4767
    }
4768
4769 16
    if (1 === \count($class)) {
4770 16
      $return = $class[0];
4771
    } else {
4772
      $return = '(?:' . \implode('|', $class) . ')';
4773
    }
4774
4775 16
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4776
4777 16
    return $return;
4778
  }
4779
4780
  /**
4781
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4782
   */
4783 2
  public static function showSupport()
4784
  {
4785 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4786
      self::checkForSupport();
4787
    }
4788
4789 2
    echo '<pre>';
4790 2
    foreach (self::$SUPPORT as $key => $value) {
4791 2
      echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4792
    }
4793 2
    echo '</pre>';
4794 2
  }
4795
4796
  /**
4797
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4798
   *
4799
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4800
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4801
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4802
   *
4803
   * @return string The HTML numbered entity.
4804
   */
4805 2
  public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4806
  {
4807 2
    if ('' === $char) {
4808 2
      return '';
4809
    }
4810
4811
    if (
4812 2
        $keepAsciiChars === true
4813
        &&
4814 2
        self::is_ascii($char) === true
4815
    ) {
4816 2
      return $char;
4817
    }
4818
4819 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4820 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4821
    }
4822
4823 2
    return '&#' . self::ord($char, $encoding) . ';';
4824
  }
4825
4826
  /**
4827
   * @param string $str
4828
   * @param int    $tabLength
4829
   *
4830
   * @return string
4831
   */
4832 5
  public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4833
  {
4834 5
    return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4835
  }
4836
4837
  /**
4838
   * Convert a string to an array of Unicode characters.
4839
   *
4840
   * @param string|int $str       <p>The string to split into array.</p>
4841
   * @param int        $length    [optional] <p>Max character length of each array element.</p>
4842
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4843
   *
4844
   * @return string[] An array containing chunks of the string.
4845
   */
4846 60
  public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4847
  {
4848
    // init
4849 60
    $str = (string)$str;
4850
4851 60
    if ('' === $str) {
4852 9
      return [];
4853
    }
4854
4855
    // init
4856 59
    $ret = [];
4857
4858 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4859
      self::checkForSupport();
4860
    }
4861
4862 59
    if ($cleanUtf8 === true) {
4863 18
      $str = self::clean($str);
4864
    }
4865
4866 59
    if (self::$SUPPORT['pcre_utf8'] === true) {
4867
4868 55
      \preg_match_all('/./us', $str, $retArray);
4869 55
      if (isset($retArray[0])) {
4870 55
        $ret = $retArray[0];
4871
      }
4872 55
      unset($retArray);
4873
4874
    } else {
4875
4876
      // fallback
4877
4878 8
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4879
        self::checkForSupport();
4880
      }
4881
4882 8
      $len = self::strlen_in_byte($str);
4883
4884
      /** @noinspection ForeachInvariantsInspection */
4885 8
      for ($i = 0; $i < $len; $i++) {
4886
4887 8
        if (($str[$i] & "\x80") === "\x00") {
4888
4889 8
          $ret[] = $str[$i];
4890
4891
        } elseif (
4892 8
            isset($str[$i + 1])
4893
            &&
4894 8
            ($str[$i] & "\xE0") === "\xC0"
4895
        ) {
4896
4897 4
          if (($str[$i + 1] & "\xC0") === "\x80") {
4898 4
            $ret[] = $str[$i] . $str[$i + 1];
4899
4900 4
            $i++;
4901
          }
4902
4903
        } elseif (
4904 6
            isset($str[$i + 2])
4905
            &&
4906 6
            ($str[$i] & "\xF0") === "\xE0"
4907
        ) {
4908
4909
          if (
4910 6
              ($str[$i + 1] & "\xC0") === "\x80"
4911
              &&
4912 6
              ($str[$i + 2] & "\xC0") === "\x80"
4913
          ) {
4914 6
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4915
4916 6
            $i += 2;
4917
          }
4918
4919
        } elseif (
4920
            isset($str[$i + 3])
4921
            &&
4922
            ($str[$i] & "\xF8") === "\xF0"
4923
        ) {
4924
4925
          if (
4926
              ($str[$i + 1] & "\xC0") === "\x80"
4927
              &&
4928
              ($str[$i + 2] & "\xC0") === "\x80"
4929
              &&
4930
              ($str[$i + 3] & "\xC0") === "\x80"
4931
          ) {
4932
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4933
4934
            $i += 3;
4935
          }
4936
4937
        }
4938
      }
4939
    }
4940
4941 59
    if ($length > 1) {
4942 9
      $ret = \array_chunk($ret, $length);
4943
4944 9
      return \array_map(
4945 9
          function ($item) {
4946 9
            return \implode('', $item);
4947 9
          }, $ret
4948
      );
4949
    }
4950
4951 52
    if (isset($ret[0]) && $ret[0] === '') {
4952
      return [];
4953
    }
4954
4955 52
    return $ret;
4956
  }
4957
4958
  /**
4959
   * Returns a camelCase version of the string. Trims surrounding spaces,
4960
   * capitalizes letters following digits, spaces, dashes and underscores,
4961
   * and removes spaces, dashes, as well as underscores.
4962
   *
4963
   * @param string $str      <p>The input string.</p>
4964
   * @param string $encoding [optional] <p>Default: UTF-8</p>
4965
   *
4966
   * @return string
4967
   */
4968 32
  public static function str_camelize(string $str, string $encoding = 'UTF-8'): string
4969
  {
4970 32
    $str = self::lcfirst(self::trim($str), $encoding);
4971 32
    $str = (string)\preg_replace('/^[-_]+/', '', $str);
4972
4973 32
    $str = (string)\preg_replace_callback(
4974 32
        '/[-_\s]+(.)?/u',
4975 32
        function ($match) use ($encoding) {
4976 27
          if (isset($match[1])) {
4977 27
            return UTF8::strtoupper($match[1], $encoding);
4978
          }
4979
4980 1
          return '';
4981 32
        },
4982 32
        $str
4983
    );
4984
4985 32
    $str = (string)\preg_replace_callback(
4986 32
        '/[\d]+(.)?/u',
4987 32
        function ($match) use ($encoding) {
4988 6
          return UTF8::strtoupper($match[0], $encoding);
4989 32
        },
4990 32
        $str
4991
    );
4992
4993 32
    return $str;
4994
  }
4995
4996
  /**
4997
   * Returns the string with the first letter of each word capitalized,
4998
   * except for when the word is a name which shouldn't be capitalized.
4999
   *
5000
   * @param string $str
5001
   *
5002
   * @return string String with $str capitalized.
5003
   */
5004 1
  public static function str_capitalize_name(string $str): string
5005
  {
5006 1
    $str = self::collapse_whitespace($str);
5007
5008 1
    $str = self::str_capitalize_name_helper($str, ' ');
5009 1
    $str = self::str_capitalize_name_helper($str, '-');
5010
5011 1
    return $str;
5012
  }
5013
5014
  /**
5015
   * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
5016
   *
5017
   * @param string $names
5018
   * @param string $delimiter
5019
   * @param string $encoding
5020
   *
5021
   * @return string
5022
   */
5023 1
  private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
5024
  {
5025
    // init
5026 1
    $namesArray = \explode($delimiter, $names);
5027
5028 1
    if ($namesArray === false) {
5029
      return '';
5030
    }
5031
5032
    $specialCases = [
5033 1
        'names'    => [
5034
            'ab',
5035
            'af',
5036
            'al',
5037
            'and',
5038
            'ap',
5039
            'bint',
5040
            'binte',
5041
            'da',
5042
            'de',
5043
            'del',
5044
            'den',
5045
            'der',
5046
            'di',
5047
            'dit',
5048
            'ibn',
5049
            'la',
5050
            'mac',
5051
            'nic',
5052
            'of',
5053
            'ter',
5054
            'the',
5055
            'und',
5056
            'van',
5057
            'von',
5058
            'y',
5059
            'zu',
5060
        ],
5061
        'prefixes' => [
5062
            'al-',
5063
            "d'",
5064
            'ff',
5065
            "l'",
5066
            'mac',
5067
            'mc',
5068
            'nic',
5069
        ],
5070
    ];
5071
5072 1
    foreach ($namesArray as &$name) {
5073 1
      if (\in_array($name, $specialCases['names'], true)) {
5074 1
        continue;
5075
      }
5076
5077 1
      $continue = false;
5078
5079 1
      if ($delimiter == '-') {
5080 1
        foreach ($specialCases['names'] as $beginning) {
5081 1
          if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5082 1
            $continue = true;
5083
          }
5084
        }
5085
      }
5086
5087 1
      foreach ($specialCases['prefixes'] as $beginning) {
5088 1
        if (self::strpos($name, $beginning, 0, $encoding) === 0) {
5089 1
          $continue = true;
5090
        }
5091
      }
5092
5093 1
      if ($continue) {
5094 1
        continue;
5095
      }
5096
5097 1
      $name = self::str_upper_first($name);
5098
    }
5099
5100 1
    return \implode($delimiter, $namesArray);
5101
  }
5102
5103
  /**
5104
   * Returns true if the string contains $needle, false otherwise. By default
5105
   * the comparison is case-sensitive, but can be made insensitive by setting
5106
   * $caseSensitive to false.
5107
   *
5108
   * @param string $haystack      <p>The input string.</p>
5109
   * @param string $needle        <p>Substring to look for.</p>
5110
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5111
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5112
   *
5113
   * @return bool Whether or not $haystack contains $needle.
5114
   */
5115 106
  public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5116
  {
5117 106
    if ('' === $haystack || '' === $needle) {
5118 1
      return false;
5119
    }
5120
5121
    // only a fallback to prevent BC in the api ...
5122 105
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5123 2
      $encoding = (string)$caseSensitive;
5124
    }
5125
5126 105
    if ($caseSensitive) {
5127 55
      return (self::strpos($haystack, $needle, 0, $encoding) !== false);
5128
    }
5129
5130 50
    return (self::stripos($haystack, $needle, 0, $encoding) !== false);
5131
  }
5132
5133
  /**
5134
   * Returns true if the string contains all $needles, false otherwise. By
5135
   * default the comparison is case-sensitive, but can be made insensitive by
5136
   * setting $caseSensitive to false.
5137
   *
5138
   * @param string $haystack      <p>The input string.</p>
5139
   * @param array  $needles       <p>SubStrings to look for.</p>
5140
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5141
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5142
   *
5143
   * @return bool Whether or not $haystack contains $needle.
5144
   */
5145 44
  public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5146
  {
5147 44
    if ('' === $haystack) {
5148
      return false;
5149
    }
5150
5151 44
    if (empty($needles)) {
5152 1
      return false;
5153
    }
5154
5155
    // only a fallback to prevent BC in the api ...
5156 43
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
5157 1
      $encoding = (string)$caseSensitive;
5158
    }
5159
5160 43
    foreach ($needles as $needle) {
5161 43
      if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5162 43
        return false;
5163
      }
5164
    }
5165
5166 24
    return true;
5167
  }
5168
5169
  /**
5170
   * Returns true if the string contains any $needles, false otherwise. By
5171
   * default the comparison is case-sensitive, but can be made insensitive by
5172
   * setting $caseSensitive to false.
5173
   *
5174
   * @param string $haystack      <p>The input string.</p>
5175
   * @param array  $needles       <p>SubStrings to look for.</p>
5176
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5177
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
5178
   *
5179
   * @return bool
5180
   *               Whether or not $str contains $needle.
5181
   */
5182 43
  public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
5183
  {
5184 43
    if (empty($needles)) {
5185 1
      return false;
5186
    }
5187
5188 42
    foreach ($needles as $needle) {
5189 42
      if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
5190 42
        return true;
5191
      }
5192
    }
5193
5194 18
    return false;
5195
  }
5196
5197
  /**
5198
   * Returns a lowercase and trimmed string separated by dashes. Dashes are
5199
   * inserted before uppercase characters (with the exception of the first
5200
   * character of the string), and in place of spaces as well as underscores.
5201
   *
5202
   * @param string $str      <p>The input string.</p>
5203
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5204
   *
5205
   * @return string
5206
   */
5207 19
  public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5208
  {
5209 19
    return self::str_delimit($str, '-', $encoding);
5210
  }
5211
5212
  /**
5213
   * Returns a lowercase and trimmed string separated by the given delimiter.
5214
   * Delimiters are inserted before uppercase characters (with the exception
5215
   * of the first character of the string), and in place of spaces, dashes,
5216
   * and underscores. Alpha delimiters are not converted to lowercase.
5217
   *
5218
   * @param string $str       <p>The input string.</p>
5219
   * @param string $delimiter <p>Sequence used to separate parts of the string.</p>
5220
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5221
   *
5222
   * @return string
5223
   */
5224 49
  public static function str_delimit(string $str, string $delimiter, string $encoding = 'UTF-8'): string
5225
  {
5226 49
    $str = self::trim($str);
5227
5228 49
    $str = (string)\preg_replace('/\B([A-Z])/u', '-\1', $str);
5229
5230 49
    $str = self::strtolower($str, $encoding);
5231
5232 49
    return (string)\preg_replace('/[-_\s]+/u', $delimiter, $str);
5233
  }
5234
5235
  /**
5236
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5237
   *
5238
   * @param string $str <p>The input string.</p>
5239
   *
5240
   * @return false|string
5241
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5242
   *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5243
   */
5244 32
  public static function str_detect_encoding($str)
5245
  {
5246
    // init
5247 32
    $str = (string)$str;
5248
5249
    //
5250
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5251
    //
5252
5253 32
    if (self::is_binary($str, true) === true) {
5254
5255 11
      if (self::is_utf16($str) === 1) {
5256 2
        return 'UTF-16LE';
5257
      }
5258
5259 11
      if (self::is_utf16($str) === 2) {
5260 2
        return 'UTF-16BE';
5261
      }
5262
5263 9
      if (self::is_utf32($str) === 1) {
5264
        return 'UTF-32LE';
5265
      }
5266
5267 9
      if (self::is_utf32($str) === 2) {
5268
        return 'UTF-32BE';
5269
      }
5270
5271
      // is binary but not "UTF-16" or "UTF-32"
5272 9
      return false;
5273
    }
5274
5275
    //
5276
    // 2.) simple check for ASCII chars
5277
    //
5278
5279 27
    if (self::is_ascii($str) === true) {
5280 9
      return 'ASCII';
5281
    }
5282
5283
    //
5284
    // 3.) simple check for UTF-8 chars
5285
    //
5286
5287 27
    if (self::is_utf8($str) === true) {
5288 19
      return 'UTF-8';
5289
    }
5290
5291
    //
5292
    // 4.) check via "mb_detect_encoding()"
5293
    //
5294
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5295
5296
    $detectOrder = [
5297 16
        'ISO-8859-1',
5298
        'ISO-8859-2',
5299
        'ISO-8859-3',
5300
        'ISO-8859-4',
5301
        'ISO-8859-5',
5302
        'ISO-8859-6',
5303
        'ISO-8859-7',
5304
        'ISO-8859-8',
5305
        'ISO-8859-9',
5306
        'ISO-8859-10',
5307
        'ISO-8859-13',
5308
        'ISO-8859-14',
5309
        'ISO-8859-15',
5310
        'ISO-8859-16',
5311
        'WINDOWS-1251',
5312
        'WINDOWS-1252',
5313
        'WINDOWS-1254',
5314
        'CP932',
5315
        'CP936',
5316
        'CP950',
5317
        'CP866',
5318
        'CP850',
5319
        'CP51932',
5320
        'CP50220',
5321
        'CP50221',
5322
        'CP50222',
5323
        'ISO-2022-JP',
5324
        'ISO-2022-KR',
5325
        'JIS',
5326
        'JIS-ms',
5327
        'EUC-CN',
5328
        'EUC-JP',
5329
    ];
5330
5331 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5332
      self::checkForSupport();
5333
    }
5334
5335 16
    if (self::$SUPPORT['mbstring'] === true) {
5336
      // info: do not use the symfony polyfill here
5337 16
      $encoding = \mb_detect_encoding($str, $detectOrder, true);
5338 16
      if ($encoding) {
5339 16
        return $encoding;
5340
      }
5341
    }
5342
5343
    //
5344
    // 5.) check via "iconv()"
5345
    //
5346
5347
    if (self::$ENCODINGS === null) {
5348
      self::$ENCODINGS = self::getData('encodings');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('encodings') can also be of type false. However, the property $ENCODINGS is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
5349
    }
5350
5351
    foreach (self::$ENCODINGS as $encodingTmp) {
5352
      # INFO: //IGNORE but still throw notice
5353
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
5354
      if ((string)@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5355
        return $encodingTmp;
5356
      }
5357
    }
5358
5359
    return false;
5360
  }
5361
5362
  /**
5363
   * Check if the string ends with the given substring.
5364
   *
5365
   * @param string $haystack <p>The string to search in.</p>
5366
   * @param string $needle   <p>The substring to search for.</p>
5367
   *
5368
   * @return bool
5369
   */
5370 40
  public static function str_ends_with(string $haystack, string $needle): bool
5371
  {
5372 40
    if ('' === $haystack || '' === $needle) {
5373 4
      return false;
5374
    }
5375
5376 38
    return \substr($haystack, -\strlen($needle)) === $needle;
5377
  }
5378
5379
  /**
5380
   * Returns true if the string ends with any of $substrings, false otherwise.
5381
   *
5382
   * - case-sensitive
5383
   *
5384
   * @param string   $str        <p>The input string.</p>
5385
   * @param string[] $substrings <p>Substrings to look for.</p>
5386
   *
5387
   * @return bool Whether or not $str ends with $substring.
5388
   */
5389 7
  public static function str_ends_with_any(string $str, array $substrings): bool
5390
  {
5391 7
    if (empty($substrings)) {
5392
      return false;
5393
    }
5394
5395 7
    foreach ($substrings as $substring) {
5396 7
      if (self::str_ends_with($str, $substring)) {
5397 7
        return true;
5398
      }
5399
    }
5400
5401 6
    return false;
5402
  }
5403
5404
  /**
5405
   * Ensures that the string begins with $substring. If it doesn't, it's
5406
   * prepended.
5407
   *
5408
   * @param string $str       <p>The input string.</p>
5409
   * @param string $substring <p>The substring to add if not present.</p>
5410
   *
5411
   * @return string
5412
   */
5413 10
  public static function str_ensure_left(string $str, string $substring): string
5414
  {
5415 10
    if (!self::str_starts_with($str, $substring)) {
5416 4
      $str = $substring . $str;
5417
    }
5418
5419 10
    return $str;
5420
  }
5421
5422
  /**
5423
   * Ensures that the string ends with $substring. If it doesn't, it's appended.
5424
   *
5425
   * @param string $str       <p>The input string.</p>
5426
   * @param string $substring <p>The substring to add if not present.</p>
5427
   *
5428
   * @return string
5429
   */
5430 10
  public static function str_ensure_right(string $str, string $substring): string
5431
  {
5432 10
    if (!self::str_ends_with($str, $substring)) {
5433 4
      $str .= $substring;
5434
    }
5435
5436 10
    return $str;
5437
  }
5438
5439
  /**
5440
   * Capitalizes the first word of the string, replaces underscores with
5441
   * spaces, and strips '_id'.
5442
   *
5443
   * @param string $str
5444
   *
5445
   * @return string
5446
   */
5447 3
  public static function str_humanize($str): string
5448
  {
5449 3
    $str = self::str_replace(
5450
        [
5451 3
            '_id',
5452
            '_',
5453
        ],
5454
        [
5455 3
            '',
5456
            ' ',
5457
        ],
5458 3
        $str
5459
    );
5460
5461 3
    return self::ucfirst(self::trim($str));
5462
  }
5463
5464
  /**
5465
   * Check if the string ends with the given substring, case insensitive.
5466
   *
5467
   * @param string $haystack <p>The string to search in.</p>
5468
   * @param string $needle   <p>The substring to search for.</p>
5469
   *
5470
   * @return bool
5471
   */
5472 12
  public static function str_iends_with(string $haystack, string $needle): bool
5473
  {
5474 12
    if ('' === $haystack || '' === $needle) {
5475 2
      return false;
5476
    }
5477
5478 12
    if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) {
5479 12
      return true;
5480
    }
5481
5482 8
    return false;
5483
  }
5484
5485
  /**
5486
   * Returns true if the string ends with any of $substrings, false otherwise.
5487
   *
5488
   * - case-insensitive
5489
   *
5490
   * @param string   $str        <p>The input string.</p>
5491
   * @param string[] $substrings <p>Substrings to look for.</p>
5492
   *
5493
   * @return bool Whether or not $str ends with $substring.
5494
   */
5495 4
  public static function str_iends_with_any(string $str, array $substrings): bool
5496
  {
5497 4
    if (empty($substrings)) {
5498
      return false;
5499
    }
5500
5501 4
    foreach ($substrings as $substring) {
5502 4
      if (self::str_iends_with($str, $substring)) {
5503 4
        return true;
5504
      }
5505
    }
5506
5507
    return false;
5508
  }
5509
5510
  /**
5511
   * Returns the index of the first occurrence of $needle in the string,
5512
   * and false if not found. Accepts an optional offset from which to begin
5513
   * the search.
5514
   *
5515
   * @param string $str      <p>The input string.</p>
5516
   * @param string $needle   <p>Substring to look for.</p>
5517
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5518
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5519
   *
5520
   * @return int|false
5521
   *                    The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5522
   */
5523 2
  public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5524
  {
5525 2
    return self::stripos(
5526 2
        $str,
5527 2
        $needle,
5528 2
        $offset,
5529 2
        $encoding
5530
    );
5531
  }
5532
5533
  /**
5534
   * Returns the index of the last occurrence of $needle in the string,
5535
   * and false if not found. Accepts an optional offset from which to begin
5536
   * the search. Offsets may be negative to count from the last character
5537
   * in the string.
5538
   *
5539
   * @param string $str      <p>The input string.</p>
5540
   * @param string $needle   <p>Substring to look for.</p>
5541
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5542
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5543
   *
5544
   * @return int|false
5545
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5546
   */
5547 2
  public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5548
  {
5549 2
    return self::strripos(
5550 2
        $str,
5551 2
        $needle,
5552 2
        $offset,
5553 2
        $encoding
5554
    );
5555
  }
5556
5557
  /**
5558
   * Returns the index of the first occurrence of $needle in the string,
5559
   * and false if not found. Accepts an optional offset from which to begin
5560
   * the search.
5561
   *
5562
   * @param string $str      <p>The input string.</p>
5563
   * @param string $needle   <p>Substring to look for.</p>
5564
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5565
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5566
   *
5567
   * @return int|false
5568
   *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5569
   */
5570 12
  public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5571
  {
5572 12
    return self::strpos(
5573 12
        $str,
5574 12
        $needle,
5575 12
        $offset,
5576 12
        $encoding
5577
    );
5578
  }
5579
5580
  /**
5581
   * Returns the index of the last occurrence of $needle in the string,
5582
   * and false if not found. Accepts an optional offset from which to begin
5583
   * the search. Offsets may be negative to count from the last character
5584
   * in the string.
5585
   *
5586
   * @param string $str      <p>The input string.</p>
5587
   * @param string $needle   <p>Substring to look for.</p>
5588
   * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5589
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5590
   *
5591
   * @return int|false
5592
   *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.
5593
   */
5594 12
  public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5595
  {
5596 12
    return self::strrpos(
5597 12
        $str,
5598 12
        $needle,
5599 12
        $offset,
5600 12
        $encoding
5601
    );
5602
  }
5603
5604
  /**
5605
   * Inserts $substring into the string at the $index provided.
5606
   *
5607
   * @param string $str       <p>The input string.</p>
5608
   * @param string $substring <p>String to be inserted.</p>
5609
   * @param int    $index     <p>The index at which to insert the substring.</p>
5610
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5611
   *
5612
   * @return string
5613
   */
5614 8
  public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5615
  {
5616 8
    $len = self::strlen($str, $encoding);
5617
5618 8
    if ($index > $len) {
5619 1
      return $str;
5620
    }
5621
5622 7
    $start = self::substr($str, 0, $index, $encoding);
5623 7
    $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5623
    $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5624
5625 7
    return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5625
    return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5625
    return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5626
  }
5627
5628
  /**
5629
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5630
   *
5631
   * @link  http://php.net/manual/en/function.str-ireplace.php
5632
   *
5633
   * @param mixed $search  <p>
5634
   *                       Every replacement with search array is
5635
   *                       performed on the result of previous replacement.
5636
   *                       </p>
5637
   * @param mixed $replace <p>
5638
   *                       </p>
5639
   * @param mixed $subject <p>
5640
   *                       If subject is an array, then the search and
5641
   *                       replace is performed with every entry of
5642
   *                       subject, and the return value is an array as
5643
   *                       well.
5644
   *                       </p>
5645
   * @param int   $count   [optional] <p>
5646
   *                       The number of matched and replaced needles will
5647
   *                       be returned in count which is passed by
5648
   *                       reference.
5649
   *                       </p>
5650
   *
5651
   * @return mixed A string or an array of replacements.
5652
   */
5653 41
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5654
  {
5655 41
    $search = (array)$search;
5656
5657
    /** @noinspection AlterInForeachInspection */
5658 41
    foreach ($search as &$s) {
5659 41
      if ('' === $s .= '') {
5660 7
        $s = '/^(?<=.)$/';
5661
      } else {
5662 41
        $s = '/' . \preg_quote($s, '/') . '/ui';
5663
      }
5664
    }
5665
5666 41
    $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5667 41
    $count = $replace; // used as reference parameter
5668
5669 41
    return $subject;
5670
  }
5671
5672
  /**
5673
   * Check if the string starts with the given substring, case insensitive.
5674
   *
5675
   * @param string $haystack <p>The string to search in.</p>
5676
   * @param string $needle   <p>The substring to search for.</p>
5677
   *
5678
   * @return bool
5679
   */
5680 12
  public static function str_istarts_with(string $haystack, string $needle): bool
5681
  {
5682 12
    if ('' === $haystack || '' === $needle) {
5683 2
      return false;
5684
    }
5685
5686 12
    if (self::stripos($haystack, $needle) === 0) {
5687 12
      return true;
5688
    }
5689
5690 4
    return false;
5691
  }
5692
5693
  /**
5694
   * Returns true if the string begins with any of $substrings, false otherwise.
5695
   *
5696
   * - case-insensitive
5697
   *
5698
   * @param string $str        <p>The input string.</p>
5699
   * @param array  $substrings <p>Substrings to look for.</p>
5700
   *
5701
   * @return bool Whether or not $str starts with $substring.
5702
   */
5703 4
  public static function str_istarts_with_any(string $str, array $substrings): bool
5704
  {
5705 4
    if ('' === $str) {
5706
      return false;
5707
    }
5708
5709 4
    if (empty($substrings)) {
5710
      return false;
5711
    }
5712
5713 4
    foreach ($substrings as $substring) {
5714 4
      if (self::str_istarts_with($str, $substring)) {
5715 4
        return true;
5716
      }
5717
    }
5718
5719
    return false;
5720
  }
5721
5722
  /**
5723
   * Gets the substring after the first occurrence of a separator.
5724
   *
5725
   * @param string $str       <p>The input string.</p>
5726
   * @param string $separator <p>The string separator.</p>
5727
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5728
   *
5729
   * @return string
5730
   */
5731 1
  public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5732
  {
5733
    if (
5734 1
        $separator === ''
5735
        ||
5736 1
        $str === ''
5737
    ) {
5738 1
      return '';
5739
    }
5740
5741 1
    $offset = self::str_iindex_first($str, $separator);
5742 1
    if ($offset === false) {
5743 1
      return '';
5744
    }
5745
5746 1
    return (string)self::substr(
5747 1
        $str,
5748 1
        $offset + self::strlen($separator, $encoding),
5749 1
        null,
5750 1
        $encoding
5751
    );
5752
  }
5753
5754
  /**
5755
   * Gets the substring after the last occurrence of a separator.
5756
   *
5757
   * @param string $str       <p>The input string.</p>
5758
   * @param string $separator <p>The string separator.</p>
5759
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5760
   *
5761
   * @return string
5762
   */
5763 1
  public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5764
  {
5765
    if (
5766 1
        $separator === ''
5767
        ||
5768 1
        $str === ''
5769
    ) {
5770 1
      return '';
5771
    }
5772
5773 1
    $offset = self::str_iindex_last($str, $separator);
5774 1
    if ($offset === false) {
5775 1
      return '';
5776
    }
5777
5778 1
    return (string)self::substr(
5779 1
        $str,
5780 1
        $offset + self::strlen($separator, $encoding),
5781 1
        null,
5782 1
        $encoding
5783
    );
5784
  }
5785
5786
  /**
5787
   * Gets the substring before the first occurrence of a separator.
5788
   *
5789
   * @param string $str       <p>The input string.</p>
5790
   * @param string $separator <p>The string separator.</p>
5791
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5792
   *
5793
   * @return string
5794
   */
5795 1
  public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5796
  {
5797
    if (
5798 1
        $separator === ''
5799
        ||
5800 1
        $str === ''
5801
    ) {
5802 1
      return '';
5803
    }
5804
5805 1
    $offset = self::str_iindex_first($str, $separator);
5806 1
    if ($offset === false) {
5807 1
      return '';
5808
    }
5809
5810 1
    return (string)self::substr($str, 0, $offset, $encoding);
5811
  }
5812
5813
  /**
5814
   * Gets the substring before the last occurrence of a separator.
5815
   *
5816
   * @param string $str       <p>The input string.</p>
5817
   * @param string $separator <p>The string separator.</p>
5818
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
5819
   *
5820
   * @return string
5821
   */
5822 1
  public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5823
  {
5824
    if (
5825 1
        $separator === ''
5826
        ||
5827 1
        $str === ''
5828
    ) {
5829 1
      return '';
5830
    }
5831
5832 1
    $offset = self::str_iindex_last($str, $separator);
5833 1
    if ($offset === false) {
5834 1
      return '';
5835
    }
5836
5837 1
    return (string)self::substr($str, 0, $offset, $encoding);
5838
  }
5839
5840
  /**
5841
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5842
   *
5843
   * @param string $str          <p>The input string.</p>
5844
   * @param string $needle       <p>The string to look for.</p>
5845
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5846
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5847
   *
5848
   * @return string
5849
   */
5850 2
  public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5851
  {
5852
    if (
5853 2
        '' === $needle
5854
        ||
5855 2
        '' === $str
5856
    ) {
5857 2
      return '';
5858
    }
5859
5860 2
    $part = self::stristr(
5861 2
        $str,
5862 2
        $needle,
5863 2
        $beforeNeedle,
5864 2
        $encoding
5865
    );
5866 2
    if (false === $part) {
5867 2
      return '';
5868
    }
5869
5870 2
    return $part;
5871
  }
5872
5873
  /**
5874
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5875
   *
5876
   * @param string $str          <p>The input string.</p>
5877
   * @param string $needle       <p>The string to look for.</p>
5878
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5879
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
5880
   *
5881
   * @return string
5882
   */
5883 1
  public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5884
  {
5885
    if (
5886 1
        '' === $needle
5887
        ||
5888 1
        '' === $str
5889
    ) {
5890 1
      return '';
5891
    }
5892
5893 1
    $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5894 1
    if (false === $part) {
5895 1
      return '';
5896
    }
5897
5898 1
    return $part;
5899
  }
5900
5901
  /**
5902
   * Returns the last $n characters of the string.
5903
   *
5904
   * @param string $str      <p>The input string.</p>
5905
   * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5906
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5907
   *
5908
   * @return string
5909
   */
5910 12
  public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5911
  {
5912 12
    if ($n <= 0) {
5913 4
      return '';
5914
    }
5915
5916 8
    $returnTmp = self::substr($str, -$n, null, $encoding);
5917
5918 8
    return ($returnTmp === false ? '' : $returnTmp);
5919
  }
5920
5921
  /**
5922
   * Limit the number of characters in a string.
5923
   *
5924
   * @param string $str      <p>The input string.</p>
5925
   * @param int    $length   [optional] <p>Default: 100</p>
5926
   * @param string $strAddOn [optional] <p>Default: …</p>
5927
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5928
   *
5929
   * @return string
5930
   */
5931 2
  public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5932
  {
5933 2
    if ('' === $str) {
5934 2
      return '';
5935
    }
5936
5937 2
    if ($length <= 0) {
5938 2
      return '';
5939
    }
5940
5941 2
    if (self::strlen($str, $encoding) <= $length) {
5942 2
      return $str;
5943
    }
5944
5945 2
    return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5945
    return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5946
  }
5947
5948
  /**
5949
   * Limit the number of characters in a string, but also after the next word.
5950
   *
5951
   * @param string $str      <p>The input string.</p>
5952
   * @param int    $length   [optional] <p>Default: 100</p>
5953
   * @param string $strAddOn [optional] <p>Default: …</p>
5954
   * @param string $encoding [optional] <p>Default: UTF-8</p>
5955
   *
5956
   * @return string
5957
   */
5958 6
  public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5959
  {
5960 6
    if ('' === $str) {
5961 2
      return '';
5962
    }
5963
5964 6
    if ($length <= 0) {
5965 2
      return '';
5966
    }
5967
5968 6
    if (self::strlen($str, $encoding) <= $length) {
5969 2
      return $str;
5970
    }
5971
5972 6
    if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5973 5
      return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5973
      return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5974
    }
5975
5976 3
    $str = (string)self::substr($str, 0, $length, $encoding);
5977 3
    $array = \explode(' ', $str);
5978 3
    \array_pop($array);
5979 3
    $new_str = \implode(' ', $array);
5980
5981 3
    if ($new_str === '') {
5982 2
      $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5983
    } else {
5984 3
      $str = $new_str . $strAddOn;
5985
    }
5986
5987 3
    return $str;
5988
  }
5989
5990
  /**
5991
   * Returns the longest common prefix between the string and $otherStr.
5992
   *
5993
   * @param string $str      <p>The input sting.</p>
5994
   * @param string $otherStr <p>Second string for comparison.</p>
5995
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5996
   *
5997
   * @return string
5998
   */
5999 10
  public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6000
  {
6001 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6002
6003 10
    $longestCommonPrefix = '';
6004 10
    for ($i = 0; $i < $maxLength; $i++) {
6005 8
      $char = self::substr($str, $i, 1, $encoding);
6006
6007 8
      if ($char == self::substr($otherStr, $i, 1, $encoding)) {
6008 6
        $longestCommonPrefix .= $char;
6009
      } else {
6010 6
        break;
6011
      }
6012
    }
6013
6014 10
    return $longestCommonPrefix;
6015
  }
6016
6017
  /**
6018
   * Returns the longest common substring between the string and $otherStr.
6019
   * In the case of ties, it returns that which occurs first.
6020
   *
6021
   * @param string $str
6022
   * @param string $otherStr <p>Second string for comparison.</p>
6023
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6024
   *
6025
   * @return string String with its $str being the longest common substring.
6026
   */
6027 11
  public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6028
  {
6029
    // Uses dynamic programming to solve
6030
    // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6031 11
    $strLength = self::strlen($str, $encoding);
6032 11
    $otherLength = self::strlen($otherStr, $encoding);
6033
6034
    // Return if either string is empty
6035 11
    if ($strLength == 0 || $otherLength == 0) {
6036 2
      return '';
6037
    }
6038
6039 9
    $len = 0;
6040 9
    $end = 0;
6041 9
    $table = \array_fill(
6042 9
        0,
6043 9
        $strLength + 1,
6044 9
        \array_fill(0, $otherLength + 1, 0)
6045
    );
6046
6047 9
    for ($i = 1; $i <= $strLength; $i++) {
6048 9
      for ($j = 1; $j <= $otherLength; $j++) {
6049 9
        $strChar = self::substr($str, $i - 1, 1, $encoding);
6050 9
        $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6051
6052 9
        if ($strChar == $otherChar) {
6053 8
          $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6054 8
          if ($table[$i][$j] > $len) {
6055 8
            $len = $table[$i][$j];
6056 8
            $end = $i;
6057
          }
6058
        } else {
6059 9
          $table[$i][$j] = 0;
6060
        }
6061
      }
6062
    }
6063
6064 9
    $returnTmp = self::substr($str, $end - $len, $len, $encoding);
6065
6066 9
    return ($returnTmp === false ? '' : $returnTmp);
6067
  }
6068
6069
  /**
6070
   * Returns the longest common suffix between the string and $otherStr.
6071
   *
6072
   * @param string $str
6073
   * @param string $otherStr <p>Second string for comparison.</p>
6074
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6075
   *
6076
   * @return string
6077
   */
6078 10
  public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6079
  {
6080 10
    $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
6081
6082 10
    $longestCommonSuffix = '';
6083 10
    for ($i = 1; $i <= $maxLength; $i++) {
6084 8
      $char = self::substr($str, -$i, 1, $encoding);
6085
6086 8
      if ($char == self::substr($otherStr, -$i, 1, $encoding)) {
6087 6
        $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6087
        $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
6088
      } else {
6089 6
        break;
6090
      }
6091
    }
6092
6093 10
    return $longestCommonSuffix;
6094
  }
6095
6096
  /**
6097
   * Returns true if $str matches the supplied pattern, false otherwise.
6098
   *
6099
   * @param string $str     <p>The input string.</p>
6100
   * @param string $pattern <p>Regex pattern to match against.</p>
6101
   *
6102
   * @return bool Whether or not $str matches the pattern.
6103
   */
6104 126
  public static function str_matches_pattern(string $str, string $pattern): bool
6105
  {
6106 126
    if (\preg_match('/' . $pattern . '/u', $str)) {
6107 87
      return true;
6108
    }
6109
6110 39
    return false;
6111
  }
6112
6113
  /**
6114
   * Returns whether or not a character exists at an index. Offsets may be
6115
   * negative to count from the last character in the string. Implements
6116
   * part of the ArrayAccess interface.
6117
   *
6118
   * @param string $str      <p>The input string.</p>
6119
   * @param int    $offset   <p>The index to check.</p>
6120
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6121
   *
6122
   *
6123
   * @return bool Whether or not the index exists.
6124
   */
6125 6
  public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6126
  {
6127
    // init
6128 6
    $length = self::strlen($str, $encoding);
6129
6130 6
    if ($offset >= 0) {
6131 3
      return ($length > $offset);
6132
    }
6133
6134 3
    return ($length >= \abs($offset));
6135
  }
6136
6137
  /**
6138
   * Returns the character at the given index. Offsets may be negative to
6139
   * count from the last character in the string. Implements part of the
6140
   * ArrayAccess interface, and throws an OutOfBoundsException if the index
6141
   * does not exist.
6142
   *
6143
   * @param string $str      <p>The input string.</p>
6144
   * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6145
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6146
   *
6147
   * @return string The character at the specified index.
6148
   *
6149
   * @throws \OutOfBoundsException If the positive or negative offset does not exist.
6150
   */
6151 2
  public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6152
  {
6153
    // init
6154 2
    $length = self::strlen($str);
6155
6156
    if (
6157 2
        ($index >= 0 && $length <= $index)
6158
        ||
6159 2
        $length < \abs($index)
6160
    ) {
6161 1
      throw new \OutOfBoundsException('No character exists at the index');
6162
    }
6163
6164 1
    return self::char_at($str, $index, $encoding);
6165
  }
6166
6167
  /**
6168
   * Pad a UTF-8 string to given length with another string.
6169
   *
6170
   * @param string $str        <p>The input string.</p>
6171
   * @param int    $pad_length <p>The length of return string.</p>
6172
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
6173
   * @param int    $pad_type   [optional] <p>
6174
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
6175
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
6176
   *                           </p>
6177
   * @param string $encoding   [optional] <p>Default: UTF-8</p>
6178
   *
6179
   * @return string Returns the padded string.
6180
   */
6181 41
  public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
6182
  {
6183 41
    if ('' === $str) {
6184
      return '';
6185
    }
6186
6187 41
    if ($pad_type !== (int)$pad_type) {
6188 13
      if ($pad_type == 'left') {
6189 3
        $pad_type = STR_PAD_LEFT;
6190 10
      } elseif ($pad_type == 'right') {
6191 6
        $pad_type = STR_PAD_RIGHT;
6192 4
      } elseif ($pad_type == 'both') {
6193 3
        $pad_type = STR_PAD_BOTH;
6194
      } else {
6195 1
        throw new \InvalidArgumentException(
6196 1
            'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6197
        );
6198
      }
6199
    }
6200
6201 40
    $str_length = self::strlen($str, $encoding);
6202
6203
    if (
6204 40
        $pad_length > 0
6205
        &&
6206 40
        $pad_length >= $str_length
6207
    ) {
6208 39
      $ps_length = self::strlen($pad_string, $encoding);
6209
6210 39
      $diff = ($pad_length - $str_length);
6211
6212
      switch ($pad_type) {
6213 39
        case STR_PAD_LEFT:
6214 13
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6215 13
          $pre = (string)self::substr($pre, 0, $diff, $encoding);
6216 13
          $post = '';
6217 13
          break;
6218
6219 29
        case STR_PAD_BOTH:
6220 14
          $pre = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6221 14
          $pre = (string)self::substr($pre, 0, (int)\floor($diff / 2), $encoding);
6222 14
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length / 2));
6223 14
          $post = (string)self::substr($post, 0, (int)\ceil($diff / 2), $encoding);
6224 14
          break;
6225
6226 18
        case STR_PAD_RIGHT:
6227
        default:
6228 18
          $post = \str_repeat($pad_string, (int)\ceil($diff / $ps_length));
6229 18
          $post = (string)self::substr($post, 0, $diff, $encoding);
6230 18
          $pre = '';
6231
      }
6232
6233 39
      return $pre . $str . $post;
6234
    }
6235
6236 4
    return $str;
6237
  }
6238
6239
  /**
6240
   * Returns a new string of a given length such that both sides of the
6241
   * string are padded. Alias for pad() with a $padType of 'both'.
6242
   *
6243
   * @param string $str
6244
   * @param int    $length   <p>Desired string length after padding.</p>
6245
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6246
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6247
   *
6248
   * @return string String with padding applied.
6249
   */
6250 11
  public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6251
  {
6252 11
    $padding = $length - self::strlen($str, $encoding);
6253
6254 11
    return self::apply_padding($str, (int)\floor($padding / 2), (int)\ceil($padding / 2), $padStr, $encoding);
6255
  }
6256
6257
  /**
6258
   * Returns a new string of a given length such that the beginning of the
6259
   * string is padded. Alias for pad() with a $padType of 'left'.
6260
   *
6261
   * @param string $str
6262
   * @param int    $length   <p>Desired string length after padding.</p>
6263
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6264
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6265
   *
6266
   * @return string String with left padding.
6267
   */
6268 7
  public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6269
  {
6270 7
    return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6271
  }
6272
6273
  /**
6274
   * Returns a new string of a given length such that the end of the string
6275
   * is padded. Alias for pad() with a $padType of 'right'.
6276
   *
6277
   * @param string $str
6278
   * @param int    $length   <p>Desired string length after padding.</p>
6279
   * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6280
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6281
   *
6282
   * @return string String with right padding.
6283
   */
6284 7
  public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6285
  {
6286 7
    return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6287
  }
6288
6289
  /**
6290
   * Repeat a string.
6291
   *
6292
   * @param string $str        <p>
6293
   *                           The string to be repeated.
6294
   *                           </p>
6295
   * @param int    $multiplier <p>
6296
   *                           Number of time the input string should be
6297
   *                           repeated.
6298
   *                           </p>
6299
   *                           <p>
6300
   *                           multiplier has to be greater than or equal to 0.
6301
   *                           If the multiplier is set to 0, the function
6302
   *                           will return an empty string.
6303
   *                           </p>
6304
   *
6305
   * @return string The repeated string.
6306
   */
6307 9
  public static function str_repeat(string $str, int $multiplier): string
6308
  {
6309 9
    $str = self::filter($str);
6310
6311 9
    return \str_repeat($str, $multiplier);
6312
  }
6313
6314
  /**
6315
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6316
   *
6317
   * Replace all occurrences of the search string with the replacement string
6318
   *
6319
   * @link http://php.net/manual/en/function.str-replace.php
6320
   *
6321
   * @param mixed $search  <p>
6322
   *                       The value being searched for, otherwise known as the needle.
6323
   *                       An array may be used to designate multiple needles.
6324
   *                       </p>
6325
   * @param mixed $replace <p>
6326
   *                       The replacement value that replaces found search
6327
   *                       values. An array may be used to designate multiple replacements.
6328
   *                       </p>
6329
   * @param mixed $subject <p>
6330
   *                       The string or array being searched and replaced on,
6331
   *                       otherwise known as the haystack.
6332
   *                       </p>
6333
   *                       <p>
6334
   *                       If subject is an array, then the search and
6335
   *                       replace is performed with every entry of
6336
   *                       subject, and the return value is an array as
6337
   *                       well.
6338
   *                       </p>
6339
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
6340
   *
6341
   * @return mixed This function returns a string or an array with the replaced values.
6342
   */
6343 92
  public static function str_replace($search, $replace, $subject, int &$count = null)
6344
  {
6345 92
    return \str_replace($search, $replace, $subject, $count);
6346
  }
6347
6348
  /**
6349
   * Replaces all occurrences of $search from the beginning of string with $replacement.
6350
   *
6351
   * @param string $str         <p>The input string.</p>
6352
   * @param string $search      <p>The string to search for.</p>
6353
   * @param string $replacement <p>The replacement.</p>
6354
   *
6355
   * @return string String after the replacements.
6356
   */
6357 16
  public static function str_replace_beginning(string $str, string $search, string $replacement): string
6358
  {
6359 16
    return self::regex_replace(
6360 16
        $str,
6361 16
        '^' . \preg_quote($search, '/'),
6362 16
        self::str_replace('\\', '\\\\', $replacement)
6363
    );
6364
  }
6365
6366
  /**
6367
   * Replaces all occurrences of $search from the ending of string with $replacement.
6368
   *
6369
   * @param string $str         <p>The input string.</p>
6370
   * @param string $search      <p>The string to search for.</p>
6371
   * @param string $replacement <p>The replacement.</p>
6372
   *
6373
   * @return string String after the replacements.
6374
   */
6375 16
  public static function str_replace_ending(string $str, string $search, string $replacement): string
6376
  {
6377 16
    return self::regex_replace(
6378 16
        $str,
6379 16
        \preg_quote($search, '/') . '$',
6380 16
        self::str_replace('\\', '\\\\', $replacement)
6381
    );
6382
  }
6383
6384
  /**
6385
   * Replace the first "$search"-term with the "$replace"-term.
6386
   *
6387
   * @param string $search
6388
   * @param string $replace
6389
   * @param string $subject
6390
   *
6391
   * @return string
6392
   */
6393 2
  public static function str_replace_first(string $search, string $replace, string $subject): string
6394
  {
6395 2
    $pos = self::strpos($subject, $search);
6396 2
    if ($pos !== false) {
6397 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6397
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6398
    }
6399
6400 2
    return $subject;
6401
  }
6402
6403
  /**
6404
   * Replace the last "$search"-term with the "$replace"-term.
6405
   *
6406
   * @param string $search
6407
   * @param string $replace
6408
   * @param string $subject
6409
   *
6410
   * @return string
6411
   */
6412 2
  public static function str_replace_last(string $search, string $replace, string $subject): string
6413
  {
6414 2
    $pos = self::strrpos($subject, $search);
6415 2
    if ($pos !== false) {
6416 2
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer[]|null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6416
      return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6417
    }
6418
6419 2
    return $subject;
6420
  }
6421
6422
  /**
6423
   * Shuffles all the characters in the string.
6424
   *
6425
   * PS: uses random algorithm which is weak for cryptography purposes
6426
   *
6427
   * @param string $str <p>The input string</p>
6428
   *
6429
   * @return string The shuffled string.
6430
   */
6431 5
  public static function str_shuffle(string $str): string
6432
  {
6433 5
    $indexes = \range(0, self::strlen($str) - 1);
6434
    /** @noinspection NonSecureShuffleUsageInspection */
6435 5
    \shuffle($indexes);
6436
6437 5
    $shuffledStr = '';
6438 5
    foreach ($indexes as $i) {
6439 5
      $shuffledStr .= self::substr($str, $i, 1);
6440
    }
6441
6442 5
    return $shuffledStr;
6443
  }
6444
6445
  /**
6446
   * Returns the substring beginning at $start, and up to, but not including
6447
   * the index specified by $end. If $end is omitted, the function extracts
6448
   * the remaining string. If $end is negative, it is computed from the end
6449
   * of the string.
6450
   *
6451
   * @param string $str
6452
   * @param int    $start    <p>Initial index from which to begin extraction.</p>
6453
   * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6454
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6455
   *
6456
   * @return string|false
6457
   *                     <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6458
   *                     characters long, <b>FALSE</b> will be returned.
6459
   */
6460 18
  public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6461
  {
6462 18
    if ($end === null) {
6463 6
      $length = self::strlen($str);
6464 12
    } elseif ($end >= 0 && $end <= $start) {
6465 4
      return '';
6466 8
    } elseif ($end < 0) {
6467 2
      $length = self::strlen($str) + $end - $start;
6468
    } else {
6469 6
      $length = $end - $start;
6470
    }
6471
6472 14
    return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6472
    return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6473
  }
6474
6475
  /**
6476
   * Convert a string to e.g.: "snake_case"
6477
   *
6478
   * @param string $str
6479
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6480
   *
6481
   * @return string String in snake_case.
6482
   */
6483 20
  public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6484
  {
6485 20
    $str = self::normalize_whitespace($str);
6486 20
    $str = \str_replace('-', '_', $str);
6487
6488 20
    $str = (string)\preg_replace_callback(
6489 20
        '/([\d|A-Z])/u',
6490 20
        function ($matches) use ($encoding) {
6491 8
          $match = $matches[1];
6492 8
          $matchInt = (int)$match;
6493
6494 8
          if ((string)$matchInt == $match) {
6495 4
            return '_' . $match . '_';
6496
          }
6497
6498 4
          return '_' . UTF8::strtolower($match, $encoding);
6499 20
        },
6500 20
        $str
6501
    );
6502
6503 20
    $str = (string)\preg_replace(
6504
        [
6505 20
            '/\s+/',        // convert spaces to "_"
6506
            '/^\s+|\s+$/',  // trim leading & trailing spaces
6507
            '/_+/',         // remove double "_"
6508
        ],
6509
        [
6510 20
            '_',
6511
            '',
6512
            '_',
6513
        ],
6514 20
        $str
6515
    );
6516
6517 20
    $str = self::trim($str, '_'); // trim leading & trailing "_"
6518 20
    $str = self::trim($str); // trim leading & trailing whitespace
6519
6520 20
    return $str;
6521
  }
6522
6523
  /**
6524
   * Sort all characters according to code points.
6525
   *
6526
   * @param string $str    <p>A UTF-8 string.</p>
6527
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6528
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6529
   *
6530
   * @return string String of sorted characters.
6531
   */
6532 2
  public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6533
  {
6534 2
    $array = self::codepoints($str);
6535
6536 2
    if ($unique) {
6537 2
      $array = \array_flip(\array_flip($array));
6538
    }
6539
6540 2
    if ($desc) {
6541 2
      \arsort($array);
6542
    } else {
6543 2
      \asort($array);
6544
    }
6545
6546 2
    return self::string($array);
6547
  }
6548
6549
  /**
6550
   * Split a string into an array.
6551
   *
6552
   * @param string|string[] $str
6553
   * @param int             $len
6554
   *
6555
   * @return string[]
6556
   */
6557 25
  public static function str_split($str, int $len = 1): array
6558
  {
6559 25
    if ($len <= 0) {
6560 2
      return [];
6561
    }
6562
6563 25
    if (\is_array($str) === true) {
6564 2
      foreach ($str as $k => $v) {
6565 2
        $str[$k] = self::str_split($v, $len);
6566
      }
6567
6568 2
      return $str;
6569
    }
6570
6571 25
    if ('' === $str) {
6572 4
      return [];
6573
    }
6574
6575
    /** @noinspection NotOptimalRegularExpressionsInspection */
6576 23
    \preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
6577 23
    $a = $a[0];
6578
6579 23
    if ($len === 1) {
6580 23
      return $a;
6581
    }
6582
6583 2
    $arrayOutput = [];
6584 2
    $p = -1;
6585
6586
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
6587 2
    foreach ($a as $l => $a) {
6588 2
      if ($l % $len) {
6589 2
        $arrayOutput[$p] .= $a;
6590
      } else {
6591 2
        $arrayOutput[++$p] = $a;
6592
      }
6593
    }
6594
6595 2
    return $arrayOutput;
6596
  }
6597
6598
  /**
6599
   * Splits the string with the provided regular expression, returning an
6600
   * array of Stringy objects. An optional integer $limit will truncate the
6601
   * results.
6602
   *
6603
   * @param string $str
6604
   * @param string $pattern <p>The regex with which to split the string.</p>
6605
   * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6606
   *
6607
   * @return string[] An array of strings.
6608
   */
6609 16
  public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6610
  {
6611 16
    if ($limit === 0) {
6612 2
      return [];
6613
    }
6614
6615
    // this->split errors when supplied an empty pattern in < PHP 5.4.13
6616
    // and current versions of HHVM (3.8 and below)
6617 14
    if ($pattern === '') {
6618 1
      return [$str];
6619
    }
6620
6621
    // this->split returns the remaining unsplit string in the last index when
6622
    // supplying a limit
6623 13
    if ($limit > 0) {
6624 8
      ++$limit;
6625
    } else {
6626 5
      $limit = -1;
6627
    }
6628
6629 13
    $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6630
6631 13
    if ($array === false) {
6632
      return [];
6633
    }
6634
6635 13
    if ($limit > 0 && \count($array) === $limit) {
6636 4
      \array_pop($array);
6637
    }
6638
6639 13
    return $array;
6640
  }
6641
6642
  /**
6643
   * Check if the string starts with the given substring.
6644
   *
6645
   * @param string $haystack <p>The string to search in.</p>
6646
   * @param string $needle   <p>The substring to search for.</p>
6647
   *
6648
   * @return bool
6649
   */
6650 41
  public static function str_starts_with(string $haystack, string $needle): bool
6651
  {
6652 41
    if ('' === $haystack || '' === $needle) {
6653 4
      return false;
6654
    }
6655
6656 39
    if (\strpos($haystack, $needle) === 0) {
6657 19
      return true;
6658
    }
6659
6660 24
    return false;
6661
  }
6662
6663
  /**
6664
   * Returns true if the string begins with any of $substrings, false otherwise.
6665
   *
6666
   * - case-sensitive
6667
   *
6668
   * @param string $str        <p>The input string.</p>
6669
   * @param array  $substrings <p>Substrings to look for.</p>
6670
   *
6671
   * @return bool Whether or not $str starts with $substring.
6672
   */
6673 8
  public static function str_starts_with_any(string $str, array $substrings): bool
6674
  {
6675 8
    if ('' === $str) {
6676
      return false;
6677
    }
6678
6679 8
    if (empty($substrings)) {
6680
      return false;
6681
    }
6682
6683 8
    foreach ($substrings as $substring) {
6684 8
      if (self::str_starts_with($str, $substring)) {
6685 8
        return true;
6686
      }
6687
    }
6688
6689 6
    return false;
6690
  }
6691
6692
  /**
6693
   * Gets the substring after the first occurrence of a separator.
6694
   *
6695
   * @param string $str       <p>The input string.</p>
6696
   * @param string $separator <p>The string separator.</p>
6697
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6698
   *
6699
   * @return string
6700
   */
6701 1
  public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6702
  {
6703
    if (
6704 1
        $separator === ''
6705
        ||
6706 1
        $str === ''
6707
    ) {
6708 1
      return '';
6709
    }
6710
6711 1
    $offset = self::str_index_first($str, $separator);
6712 1
    if ($offset === false) {
6713 1
      return '';
6714
    }
6715
6716 1
    return (string)self::substr(
6717 1
        $str,
6718 1
        $offset + self::strlen($separator, $encoding),
6719 1
        null,
6720 1
        $encoding
6721
    );
6722
  }
6723
6724
  /**
6725
   * Gets the substring after the last occurrence of a separator.
6726
   *
6727
   * @param string $str       <p>The input string.</p>
6728
   * @param string $separator <p>The string separator.</p>
6729
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6730
   *
6731
   * @return string
6732
   */
6733 1
  public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6734
  {
6735
    if (
6736 1
        $separator === ''
6737
        ||
6738 1
        $str === ''
6739
    ) {
6740 1
      return '';
6741
    }
6742
6743 1
    $offset = self::str_index_last($str, $separator);
6744 1
    if ($offset === false) {
6745 1
      return '';
6746
    }
6747
6748 1
    return (string)self::substr(
6749 1
        $str,
6750 1
        $offset + self::strlen($separator, $encoding),
6751 1
        null,
6752 1
        $encoding
6753
    );
6754
  }
6755
6756
  /**
6757
   * Gets the substring before the first occurrence of a separator.
6758
   *
6759
   * @param string $str       <p>The input string.</p>
6760
   * @param string $separator <p>The string separator.</p>
6761
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6762
   *
6763
   * @return string
6764
   */
6765 1
  public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6766
  {
6767
    if (
6768 1
        $separator === ''
6769
        ||
6770 1
        $str === ''
6771
    ) {
6772 1
      return '';
6773
    }
6774
6775 1
    $offset = self::str_index_first($str, $separator);
6776 1
    if ($offset === false) {
6777 1
      return '';
6778
    }
6779
6780 1
    return (string)self::substr(
6781 1
        $str,
6782 1
        0,
6783 1
        $offset,
6784 1
        $encoding
6785
    );
6786
  }
6787
6788
  /**
6789
   * Gets the substring before the last occurrence of a separator.
6790
   *
6791
   * @param string $str       <p>The input string.</p>
6792
   * @param string $separator <p>The string separator.</p>
6793
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
6794
   *
6795
   * @return string
6796
   */
6797 1
  public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6798
  {
6799
    if (
6800 1
        $separator === ''
6801
        ||
6802 1
        $str === ''
6803
    ) {
6804 1
      return '';
6805
    }
6806
6807 1
    $offset = self::str_index_last($str, $separator);
6808 1
    if ($offset === false) {
6809 1
      return '';
6810
    }
6811
6812 1
    return (string)self::substr(
6813 1
        $str,
6814 1
        0,
6815 1
        $offset,
6816 1
        $encoding
6817
    );
6818
  }
6819
6820
  /**
6821
   * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6822
   *
6823
   * @param string $str          <p>The input string.</p>
6824
   * @param string $needle       <p>The string to look for.</p>
6825
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6826
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6827
   *
6828
   * @return string
6829
   */
6830 2
  public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6831
  {
6832
    if (
6833 2
        '' === $str
6834
        ||
6835 2
        '' === $needle
6836
    ) {
6837 2
      return '';
6838
    }
6839
6840 2
    $part = self::strstr(
6841 2
        $str,
6842 2
        $needle,
6843 2
        $beforeNeedle,
6844 2
        $encoding
6845
    );
6846 2
    if (false === $part) {
6847 2
      return '';
6848
    }
6849
6850 2
    return $part;
6851
  }
6852
6853
  /**
6854
   * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6855
   *
6856
   * @param string $str          <p>The input string.</p>
6857
   * @param string $needle       <p>The string to look for.</p>
6858
   * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6859
   * @param string $encoding     [optional] <p>Default: UTF-8</p>
6860
   *
6861
   * @return string
6862
   */
6863 2
  public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6864
  {
6865
    if (
6866 2
        '' === $str
6867
        ||
6868 2
        '' === $needle
6869
    ) {
6870 2
      return '';
6871
    }
6872
6873 2
    $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6874 2
    if (false === $part) {
6875 2
      return '';
6876
    }
6877
6878 2
    return $part;
6879
  }
6880
6881
  /**
6882
   * Surrounds $str with the given substring.
6883
   *
6884
   * @param string $str
6885
   * @param string $substring <p>The substring to add to both sides.</P>
6886
   *
6887
   * @return string String with the substring both prepended and appended.
6888
   */
6889 5
  public static function str_surround(string $str, string $substring): string
6890
  {
6891 5
    return \implode('', [$substring, $str, $substring]);
6892
  }
6893
6894
  /**
6895
   * Returns a trimmed string with the first letter of each word capitalized.
6896
   * Also accepts an array, $ignore, allowing you to list words not to be
6897
   * capitalized.
6898
   *
6899
   * @param string              $str
6900
   * @param string[]|array|null $ignore   [optional] <p>An array of words not to capitalize or null. Default: null</p>
6901
   * @param string              $encoding [optional] <p>Default: UTF-8</p>
6902
   *
6903
   * @return string The titleized string.
6904
   */
6905 5
  public static function str_titleize(string $str, array $ignore = null, string $encoding = 'UTF-8'): string
6906
  {
6907 5
    $str = self::trim($str);
6908
6909 5
    $str = (string)\preg_replace_callback(
6910 5
        '/([\S]+)/u',
6911 5
        function ($match) use ($encoding, $ignore) {
6912 5
          if ($ignore && \in_array($match[0], $ignore, true)) {
6913 2
            return $match[0];
6914
          }
6915
6916 5
          return self::str_upper_first(self::strtolower($match[0], $encoding));
6917 5
        },
6918 5
        $str
6919
    );
6920
6921 5
    return $str;
6922
  }
6923
6924
  /**
6925
   * Returns a trimmed string in proper title case.
6926
   *
6927
   * Also accepts an array, $ignore, allowing you to list words not to be
6928
   * capitalized.
6929
   *
6930
   * Adapted from John Gruber's script.
6931
   *
6932
   * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6933
   *
6934
   * @param string $str
6935
   * @param array  $ignore   <p>An array of words not to capitalize.</p>
6936
   * @param string $encoding [optional] <p>Default: UTF-8</p>
6937
   *
6938
   * @return string The titleized string.
6939
   */
6940 35
  public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6941
  {
6942 35
    $smallWords = \array_merge(
6943
        [
6944 35
            '(?<!q&)a',
6945
            'an',
6946
            'and',
6947
            'as',
6948
            'at(?!&t)',
6949
            'but',
6950
            'by',
6951
            'en',
6952
            'for',
6953
            'if',
6954
            'in',
6955
            'of',
6956
            'on',
6957
            'or',
6958
            'the',
6959
            'to',
6960
            'v[.]?',
6961
            'via',
6962
            'vs[.]?',
6963
        ],
6964 35
        $ignore
6965
    );
6966
6967 35
    $smallWordsRx = \implode('|', $smallWords);
6968 35
    $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6969
6970 35
    $str = self::trim($str);
6971
6972 35
    if (self::has_lowercase($str) === false) {
6973 2
      $str = self::strtolower($str);
6974
    }
6975
6976
    // The main substitutions
6977 35
    $str = (string)\preg_replace_callback(
6978
        '~\b (_*) (?:                                                              # 1. Leading underscore and
6979
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6980 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6981
                        |
6982 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6983
                        |
6984 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6985
                        |
6986 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6987
                      ) (_*) \b                                                           # 6. With trailing underscore
6988
                    ~ux',
6989 35
        function ($matches) use ($encoding) {
6990
          // Preserve leading underscore
6991 35
          $str = $matches[1];
6992 35
          if ($matches[2]) {
6993
            // Preserve URLs, domains, emails and file paths
6994 5
            $str .= $matches[2];
6995 35
          } elseif ($matches[3]) {
6996
            // Lower-case small words
6997 25
            $str .= self::strtolower($matches[3], $encoding);
6998 35
          } elseif ($matches[4]) {
6999
            // Capitalize word w/o internal caps
7000 34
            $str .= static::str_upper_first($matches[4], $encoding);
7001
          } else {
7002
            // Preserve other kinds of word (iPhone)
7003 7
            $str .= $matches[5];
7004
          }
7005
          // Preserve trailing underscore
7006 35
          $str .= $matches[6];
7007
7008 35
          return $str;
7009 35
        },
7010 35
        $str
7011
    );
7012
7013
    // Exceptions for small words: capitalize at start of title...
7014 35
    $str = (string)\preg_replace_callback(
7015
        '~(  \A [[:punct:]]*                # start of title...
7016
                      |  [:.;?!][ ]+               # or of subsentence...
7017
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7018 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7019
                     ~uxi',
7020 35
        function ($matches) use ($encoding) {
7021 11
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7022 35
        },
7023 35
        $str
7024
    );
7025
7026
    // ...and end of title
7027 35
    $str = (string)\preg_replace_callback(
7028 35
        '~\b ( ' . $smallWordsRx . ' ) # small word...
7029
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7030
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7031
                     ~uxi',
7032 35
        function ($matches) use ($encoding) {
7033 3
          return static::str_upper_first($matches[1], $encoding);
7034 35
        },
7035 35
        $str
7036
    );
7037
7038
    // Exceptions for small words in hyphenated compound words
7039
    // e.g. "in-flight" -> In-Flight
7040 35
    $str = (string)\preg_replace_callback(
7041
        '~\b
7042
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7043 35
                        ( ' . $smallWordsRx . ' )
7044
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7045
                       ~uxi',
7046 35
        function ($matches) use ($encoding) {
7047
          return static::str_upper_first($matches[1], $encoding);
7048 35
        },
7049 35
        $str
7050
    );
7051
7052
    // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7053 35
    $str = (string)\preg_replace_callback(
7054
        '~\b
7055
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7056
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7057 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7058
                      (?!	- )                   # Negative lookahead for another -
7059
                     ~uxi',
7060 35
        function ($matches) use ($encoding) {
7061
          return $matches[1] . static::str_upper_first($matches[2], $encoding);
7062 35
        },
7063 35
        $str
7064
    );
7065
7066 35
    return $str;
7067
  }
7068
7069
  /**
7070
   * Get a binary representation of a specific string.
7071
   *
7072
   * @param string $str <p>The input string.</p>
7073
   *
7074
   * @return string
7075
   */
7076 2
  public static function str_to_binary(string $str): string
7077
  {
7078 2
    $value = \unpack('H*', $str);
7079
7080 2
    return \base_convert($value[1], 16, 2);
7081
  }
7082
7083
  /**
7084
   * @param string   $str
7085
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7086
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7087
   *
7088
   * @return string[]
7089
   */
7090 17
  public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7091
  {
7092 17
    if ('' === $str) {
7093 1
      return ($removeEmptyValues === true ? [] : ['']);
7094
    }
7095
7096 16
    $return = \preg_split("/[\r\n]{1,2}/u", $str);
7097
7098 16
    if ($return === false) {
7099
      return ($removeEmptyValues === true ? [] : ['']);
7100
    }
7101
7102
    if (
7103 16
        $removeShortValues === null
7104
        &&
7105 16
        $removeEmptyValues === false
7106
    ) {
7107 16
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7108
    }
7109
7110
    $tmpReturn = self::reduce_string_array(
7111
        $return,
7112
        $removeEmptyValues,
7113
        $removeShortValues
7114
    );
7115
7116
    return $tmpReturn;
7117
  }
7118
7119
  /**
7120
   * Convert a string into an array of words.
7121
   *
7122
   * @param string   $str
7123
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7124
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7125
   * @param null|int $removeShortValues <p>The min. string length or null to disable</p>
7126
   *
7127
   * @return string[]
7128
   */
7129 14
  public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
7130
  {
7131 14
    if ('' === $str) {
7132 4
      return ($removeEmptyValues === true ? [] : ['']);
7133
    }
7134
7135 14
    $charList = self::rxClass($charList, '\pL');
7136
7137 14
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
7138
7139 14
    if ($return === false) {
7140
      return ($removeEmptyValues === true ? [] : ['']);
7141
    }
7142
7143
    if (
7144 14
        $removeShortValues === null
7145
        &&
7146 14
        $removeEmptyValues === false
7147
    ) {
7148 14
      return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7149
    }
7150
7151 2
    $tmpReturn = self::reduce_string_array(
7152 2
        $return,
7153 2
        $removeEmptyValues,
7154 2
        $removeShortValues
7155
    );
7156
7157 2
    foreach ($tmpReturn as &$item) {
7158 2
      $item = (string)$item;
7159
    }
7160
7161 2
    return $tmpReturn;
7162
  }
7163
7164
  /**
7165
   * alias for "UTF8::to_ascii()"
7166
   *
7167
   * @see UTF8::to_ascii()
7168
   *
7169
   * @param string $str
7170
   * @param string $unknown
7171
   * @param bool   $strict
7172
   *
7173
   * @return string
7174
   */
7175 7
  public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7176
  {
7177 7
    return self::to_ascii($str, $unknown, $strict);
7178
  }
7179
7180
  /**
7181
   * Truncates the string to a given length. If $substring is provided, and
7182
   * truncating occurs, the string is further truncated so that the substring
7183
   * may be appended without exceeding the desired length.
7184
   *
7185
   * @param string $str
7186
   * @param int    $length    <p>Desired length of the truncated string.</p>
7187
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7188
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7189
   *
7190
   * @return string String after truncating.
7191
   */
7192 22
  public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7193
  {
7194
    // init
7195 22
    $str = (string)$str;
7196
7197 22
    if ('' === $str) {
7198
      return '';
7199
    }
7200
7201 22
    if ($length >= self::strlen($str, $encoding)) {
7202 4
      return $str;
7203
    }
7204
7205
    // Need to further trim the string so we can append the substring
7206 18
    $substringLength = self::strlen($substring, $encoding);
7207 18
    $length -= $substringLength;
7208
7209 18
    $truncated = self::substr($str, 0, $length, $encoding);
7210
7211 18
    return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7211
    return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7212
  }
7213
7214
  /**
7215
   * Truncates the string to a given length, while ensuring that it does not
7216
   * split words. If $substring is provided, and truncating occurs, the
7217
   * string is further truncated so that the substring may be appended without
7218
   * exceeding the desired length.
7219
   *
7220
   * @param string $str
7221
   * @param int    $length    <p>Desired length of the truncated string.</p>
7222
   * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7223
   * @param string $encoding  [optional] <p>Default: UTF-8</p>
7224
   *
7225
   * @return string String after truncating.
7226
   */
7227 23
  public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7228
  {
7229 23
    if ($length >= self::strlen($str, $encoding)) {
7230 4
      return $str;
7231
    }
7232
7233
    // need to further trim the string so we can append the substring
7234 19
    $substringLength = self::strlen($substring, $encoding);
7235 19
    $length -= $substringLength;
7236
7237 19
    $truncated = self::substr($str, 0, $length, $encoding);
7238 19
    if ($truncated === false) {
7239
      return '';
7240
    }
7241
7242
    // if the last word was truncated
7243 19
    $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7244 19
    if ($strPosSpace != $length) {
7245
      // find pos of the last occurrence of a space, get up to that
7246 12
      $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7247
7248 12
      if ($lastPos !== false || $strPosSpace !== false) {
7249 11
        $truncated = self::substr($truncated, 0, (int)$lastPos, $encoding);
7250
      }
7251
    }
7252
7253 19
    $str = $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7253
    $str = /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7254
7255 19
    return $str;
7256
  }
7257
7258
  /**
7259
   * Returns a lowercase and trimmed string separated by underscores.
7260
   * Underscores are inserted before uppercase characters (with the exception
7261
   * of the first character of the string), and in place of spaces as well as
7262
   * dashes.
7263
   *
7264
   * @param string $str
7265
   *
7266
   * @return string The underscored string.
7267
   */
7268 16
  public static function str_underscored(string $str): string
7269
  {
7270 16
    return self::str_delimit($str, '_');
7271
  }
7272
7273
  /**
7274
   * Returns an UpperCamelCase version of the supplied string. It trims
7275
   * surrounding spaces, capitalizes letters following digits, spaces, dashes
7276
   * and underscores, and removes spaces, dashes, underscores.
7277
   *
7278
   * @param string $str      <p>The input string.</p>
7279
   * @param string $encoding [optional] <p>Default: UTF-8</p>
7280
   *
7281
   * @return string String in UpperCamelCase.
7282
   */
7283 13
  public static function str_upper_camelize(string $str, string $encoding = 'UTF-8'): string
7284
  {
7285 13
    return self::str_upper_first(self::str_camelize($str, $encoding), $encoding);
7286
  }
7287
7288
  /**
7289
   * alias for "UTF8::ucfirst()"
7290
   *
7291
   * @see UTF8::ucfirst()
7292
   *
7293
   * @param string $str
7294
   * @param string $encoding
7295
   * @param bool   $cleanUtf8
7296
   *
7297
   * @return string
7298
   */
7299 58
  public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
7300
  {
7301 58
    return self::ucfirst($str, $encoding, $cleanUtf8);
7302
  }
7303
7304
  /**
7305
   * Counts number of words in the UTF-8 string.
7306
   *
7307
   * @param string $str      <p>The input string.</p>
7308
   * @param int    $format   [optional] <p>
7309
   *                         <strong>0</strong> => return a number of words (default)<br>
7310
   *                         <strong>1</strong> => return an array of words<br>
7311
   *                         <strong>2</strong> => return an array of words with word-offset as key
7312
   *                         </p>
7313
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7314
   *
7315
   * @return string[]|int The number of words in the string
7316
   */
7317 2
  public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7318
  {
7319 2
    $strParts = self::str_to_words($str, $charlist);
7320
7321 2
    $len = \count($strParts);
7322
7323 2
    if ($format === 1) {
7324
7325 2
      $numberOfWords = [];
7326 2
      for ($i = 1; $i < $len; $i += 2) {
7327 2
        $numberOfWords[] = $strParts[$i];
7328
      }
7329
7330 2
    } elseif ($format === 2) {
7331
7332 2
      $numberOfWords = [];
7333 2
      $offset = self::strlen($strParts[0]);
7334 2
      for ($i = 1; $i < $len; $i += 2) {
7335 2
        $numberOfWords[$offset] = $strParts[$i];
7336 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7337
      }
7338
7339
    } else {
7340
7341 2
      $numberOfWords = (int)(($len - 1) / 2);
7342
7343
    }
7344
7345 2
    return $numberOfWords;
7346
  }
7347
7348
  /**
7349
   * Case-insensitive string comparison.
7350
   *
7351
   * INFO: Case-insensitive version of UTF8::strcmp()
7352
   *
7353
   * @param string $str1     <p>The first string.</p>
7354
   * @param string $str2     <p>The second string.</p>
7355
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7356
   *
7357
   * @return int
7358
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7359
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7360
   *             <strong>0</strong> if they are equal.
7361
   */
7362 23
  public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7363
  {
7364 23
    return self::strcmp(
7365 23
        self::strtocasefold($str1, true, false, $encoding, null, false),
7366 23
        self::strtocasefold($str2, true, false, $encoding, null, false)
7367
    );
7368
  }
7369
7370
  /**
7371
   * alias for "UTF8::strstr()"
7372
   *
7373
   * @see UTF8::strstr()
7374
   *
7375
   * @param string $haystack
7376
   * @param string $needle
7377
   * @param bool   $before_needle
7378
   * @param string $encoding
7379
   * @param bool   $cleanUtf8
7380
   *
7381
   * @return string|false
7382
   */
7383 2
  public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7384
  {
7385 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7386
  }
7387
7388
  /**
7389
   * Case-sensitive string comparison.
7390
   *
7391
   * @param string $str1 <p>The first string.</p>
7392
   * @param string $str2 <p>The second string.</p>
7393
   *
7394
   * @return int
7395
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
7396
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
7397
   *              <strong>0</strong> if they are equal.
7398
   */
7399 29
  public static function strcmp(string $str1, string $str2): int
7400
  {
7401
    /** @noinspection PhpUndefinedClassInspection */
7402 29
    return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7403 24
        \Normalizer::normalize($str1, \Normalizer::NFD),
7404 29
        \Normalizer::normalize($str2, \Normalizer::NFD)
7405
    );
7406
  }
7407
7408
  /**
7409
   * Find length of initial segment not matching mask.
7410
   *
7411
   * @param string $str
7412
   * @param string $charList
7413
   * @param int    $offset
7414
   * @param int    $length
7415
   *
7416
   * @return int|null
7417
   */
7418 15
  public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7419
  {
7420 15
    if ('' === $charList .= '') {
7421 1
      return null;
7422
    }
7423
7424 14
    if ($offset || $length !== null) {
7425 2
      $strTmp = self::substr($str, $offset, $length);
7426 2
      if ($strTmp === false) {
7427
        return null;
7428
      }
7429 2
      $str = (string)$strTmp;
7430
    }
7431
7432 14
    if ('' === $str) {
7433 1
      return null;
7434
    }
7435
7436 13
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept null|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7436
    if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7437 13
      return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7438
    }
7439
7440 1
    return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type null|integer. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7441
  }
7442
7443
  /**
7444
   * alias for "UTF8::stristr()"
7445
   *
7446
   * @see UTF8::stristr()
7447
   *
7448
   * @param string $haystack
7449
   * @param string $needle
7450
   * @param bool   $before_needle
7451
   * @param string $encoding
7452
   * @param bool   $cleanUtf8
7453
   *
7454
   * @return string|false
7455
   */
7456 1
  public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7457
  {
7458 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7459
  }
7460
7461
  /**
7462
   * Create a UTF-8 string from code points.
7463
   *
7464
   * INFO: opposite to UTF8::codepoints()
7465
   *
7466
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7467
   *
7468
   * @return string UTF-8 encoded string.
7469
   */
7470 4
  public static function string(array $array): string
7471
  {
7472 4
    return \implode(
7473 4
        '',
7474 4
        \array_map(
7475
            [
7476 4
                self::class,
7477
                'chr',
7478
            ],
7479 4
            $array
7480
        )
7481
    );
7482
  }
7483
7484
  /**
7485
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7486
   *
7487
   * @param string $str <p>The input string.</p>
7488
   *
7489
   * @return bool
7490
   *              <strong>true</strong> if the string has BOM at the start,<br>
7491
   *              <strong>false</strong> otherwise.
7492
   */
7493 6
  public static function string_has_bom(string $str): bool
7494
  {
7495 6
    foreach (self::$BOM as $bomString => $bomByteLength) {
7496 6
      if (0 === \strpos($str, $bomString)) {
7497 6
        return true;
7498
      }
7499
    }
7500
7501 6
    return false;
7502
  }
7503
7504
  /**
7505
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7506
   *
7507
   * @link http://php.net/manual/en/function.strip-tags.php
7508
   *
7509
   * @param string $str             <p>
7510
   *                                The input string.
7511
   *                                </p>
7512
   * @param string $allowable_tags  [optional] <p>
7513
   *                                You can use the optional second parameter to specify tags which should
7514
   *                                not be stripped.
7515
   *                                </p>
7516
   *                                <p>
7517
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
7518
   *                                can not be changed with allowable_tags.
7519
   *                                </p>
7520
   * @param bool   $cleanUtf8       [optional] <p>Remove non UTF-8 chars from the string.</p>
7521
   *
7522
   * @return string The stripped string.
7523
   */
7524 4
  public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7525
  {
7526 4
    if ('' === $str) {
7527 1
      return '';
7528
    }
7529
7530 4
    if ($cleanUtf8 === true) {
7531 2
      $str = self::clean($str);
7532
    }
7533
7534 4
    return \strip_tags($str, $allowable_tags);
7535
  }
7536
7537
  /**
7538
   * Strip all whitespace characters. This includes tabs and newline
7539
   * characters, as well as multibyte whitespace such as the thin space
7540
   * and ideographic space.
7541
   *
7542
   * @param string $str
7543
   *
7544
   * @return string
7545
   */
7546 36
  public static function strip_whitespace(string $str): string
7547
  {
7548 36
    if ('' === $str) {
7549 3
      return '';
7550
    }
7551
7552 33
    return (string)\preg_replace('/[[:space:]]+/u', '', $str);
7553
  }
7554
7555
  /**
7556
   * Finds position of first occurrence of a string within another, case insensitive.
7557
   *
7558
   * @link http://php.net/manual/en/function.mb-stripos.php
7559
   *
7560
   * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7561
   * @param string $needle    <p>The string to find in haystack.</p>
7562
   * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7563
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7564
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7565
   *
7566
   * @return int|false
7567
   *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7568
   *                   haystack string,<br> or <strong>false</strong> if needle is not found.
7569
   */
7570 75
  public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7571
  {
7572 75
    if ('' === $haystack || '' === $needle) {
7573 5
      return false;
7574
    }
7575
7576 74
    if ($cleanUtf8 === true) {
7577
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7578
      // if invalid characters are found in $haystack before $needle
7579 1
      $haystack = self::clean($haystack);
7580 1
      $needle = self::clean($needle);
7581
    }
7582
7583 74
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7584 23
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7585
    }
7586
7587 74
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7588
      self::checkForSupport();
7589
    }
7590
7591 74
    if (self::$SUPPORT['mbstring'] === true) {
7592 74
      $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7593 74
      if ($returnTmp !== false) {
7594 54
        return $returnTmp;
7595
      }
7596
    }
7597
7598
    if (
7599 31
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7600
        &&
7601 31
        $offset >= 0 // grapheme_stripos() can't handle negative offset
7602
        &&
7603 31
        self::$SUPPORT['intl'] === true
7604
    ) {
7605 31
      $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7606 31
      if ($returnTmp !== false) {
7607
        return $returnTmp;
7608
      }
7609
    }
7610
7611
    //
7612
    // fallback for ascii only
7613
    //
7614
7615 31
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7616 15
      return \stripos($haystack, $needle, $offset);
7617
    }
7618
7619
    //
7620
    // fallback via vanilla php
7621
    //
7622
7623 20
    $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7624 20
    $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7625
7626 20
    return self::strpos($haystack, $needle, $offset, $encoding);
7627
  }
7628
7629
  /**
7630
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
7631
   *
7632
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
7633
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
7634
   * @param bool   $before_needle  [optional] <p>
7635
   *                               If <b>TRUE</b>, it returns the part of the
7636
   *                               haystack before the first occurrence of the needle (excluding the needle).
7637
   *                               </p>
7638
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
7639
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7640
   *
7641
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
7642
   */
7643 19
  public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7644
  {
7645 19
    if ('' === $haystack || '' === $needle) {
7646 6
      return false;
7647
    }
7648
7649 13
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7650 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7651
    }
7652
7653 13
    if ($cleanUtf8 === true) {
7654
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7655
      // if invalid characters are found in $haystack before $needle
7656 1
      $needle = self::clean($needle);
7657 1
      $haystack = self::clean($haystack);
7658
    }
7659
7660 13
    if (!$needle) {
7661
      return $haystack;
7662
    }
7663
7664 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7665
      self::checkForSupport();
7666
    }
7667
7668
    if (
7669 13
        $encoding !== 'UTF-8'
7670
        &&
7671 13
        self::$SUPPORT['mbstring'] === false
7672
    ) {
7673
      \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7674
    }
7675
7676 13
    if (self::$SUPPORT['mbstring'] === true) {
7677 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7678
    }
7679
7680
    if (
7681
        $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7682
        &&
7683
        self::$SUPPORT['intl'] === true
7684
    ) {
7685
      $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7686
      if ($returnTmp !== false) {
7687
        return $returnTmp;
7688
      }
7689
    }
7690
7691
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7692
      return \stristr($haystack, $needle, $before_needle);
7693
    }
7694
7695
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7696
7697
    if (!isset($match[1])) {
7698
      return false;
7699
    }
7700
7701
    if ($before_needle) {
7702
      return $match[1];
7703
    }
7704
7705
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7705
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7706
  }
7707
7708
  /**
7709
   * Get the string length, not the byte-length!
7710
   *
7711
   * @link     http://php.net/manual/en/function.mb-strlen.php
7712
   *
7713
   * @param string $str       <p>The string being checked for length.</p>
7714
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7715
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7716
   *
7717
   * @return int|false
7718
   *             The number <strong>(int)</strong> of characters in the string $str having character encoding $encoding.
7719
   *             (One multi-byte character counted as +1).
7720
   *             <br>
7721
   *             Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars.
7722
   */
7723 262
  public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7724
  {
7725 262
    if ('' === $str) {
7726 37
      return 0;
7727
    }
7728
7729 260
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7730 83
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
7731
    }
7732
7733
    //
7734
    // fallback for binary || ascii only
7735
    //
7736
7737
    if (
7738 260
        $encoding === 'CP850'
7739
        ||
7740 260
        $encoding === 'ASCII'
7741
    ) {
7742 2
      return self::strlen_in_byte($str);
7743
    }
7744
7745 260
    if ($cleanUtf8 === true) {
7746
      // "mb_strlen" and "\iconv_strlen" returns wrong length,
7747
      // if invalid characters are found in $str
7748 4
      $str = self::clean($str);
7749
    }
7750
7751 260
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7752
      self::checkForSupport();
7753
    }
7754
7755
    if (
7756 260
        $encoding !== 'UTF-8'
7757
        &&
7758 260
        self::$SUPPORT['mbstring'] === false
7759
        &&
7760 260
        self::$SUPPORT['iconv'] === false
7761
    ) {
7762 2
      \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
7763
    }
7764
7765
    //
7766
    // fallback via mbstring
7767
    //
7768
7769 260
    if (self::$SUPPORT['mbstring'] === true) {
7770 256
      $returnTmp = \mb_strlen($str, $encoding);
7771 256
      if ($returnTmp !== false) {
7772 256
        return $returnTmp;
7773
      }
7774
    }
7775
7776
    //
7777
    // fallback via iconv
7778
    //
7779
7780 8
    if (self::$SUPPORT['iconv'] === true) {
7781
      $returnTmp = \iconv_strlen($str, $encoding);
7782
      if ($returnTmp !== false) {
7783
        return $returnTmp;
7784
      }
7785
    }
7786
7787
    //
7788
    // fallback via intl
7789
    //
7790
7791
    if (
7792 8
        $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7793
        &&
7794 8
        self::$SUPPORT['intl'] === true
7795
    ) {
7796
      $returnTmp = \grapheme_strlen($str);
7797
      if ($returnTmp !== null) {
7798
        return $returnTmp;
7799
      }
7800
    }
7801
7802
    //
7803
    // fallback for ascii only
7804
    //
7805
7806 8
    if (self::is_ascii($str)) {
7807 4
      return \strlen($str);
7808
    }
7809
7810
    //
7811
    // fallback via vanilla php
7812
    //
7813
7814 8
    \preg_match_all('/./us', $str, $parts);
7815
7816 8
    $returnTmp = \count($parts[0]);
7817 8
    if ($returnTmp === 0 && isset($str[0])) {
7818
      return false;
7819
    }
7820
7821 8
    return $returnTmp;
7822
  }
7823
7824
  /**
7825
   * Get string length in byte.
7826
   *
7827
   * @param string $str
7828
   *
7829
   * @return int
7830
   */
7831 194
  public static function strlen_in_byte(string $str): int
7832
  {
7833 194
    if ($str === '') {
7834
      return 0;
7835
    }
7836
7837 194
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7838
      self::checkForSupport();
7839
    }
7840
7841 194
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7842
      // "mb_" is available if overload is used, so use it ...
7843
      return \mb_strlen($str, 'CP850'); // 8-BIT
7844
    }
7845
7846 194
    return \strlen($str);
7847
  }
7848
7849
  /**
7850
   * Case insensitive string comparisons using a "natural order" algorithm.
7851
   *
7852
   * INFO: natural order version of UTF8::strcasecmp()
7853
   *
7854
   * @param string $str1     <p>The first string.</p>
7855
   * @param string $str2     <p>The second string.</p>
7856
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7857
   *
7858
   * @return int
7859
   *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7860
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7861
   *             <strong>0</strong> if they are equal
7862
   */
7863 2
  public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7864
  {
7865 2
    return self::strnatcmp(
7866 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7867 2
        self::strtocasefold($str2, true, false, $encoding, null, false)
7868
    );
7869
  }
7870
7871
  /**
7872
   * String comparisons using a "natural order" algorithm
7873
   *
7874
   * INFO: natural order version of UTF8::strcmp()
7875
   *
7876
   * @link  http://php.net/manual/en/function.strnatcmp.php
7877
   *
7878
   * @param string $str1 <p>The first string.</p>
7879
   * @param string $str2 <p>The second string.</p>
7880
   *
7881
   * @return int
7882
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7883
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7884
   *             <strong>0</strong> if they are equal
7885
   */
7886 4
  public static function strnatcmp(string $str1, string $str2): int
7887
  {
7888 4
    return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7889
  }
7890
7891
  /**
7892
   * Case-insensitive string comparison of the first n characters.
7893
   *
7894
   * @link  http://php.net/manual/en/function.strncasecmp.php
7895
   *
7896
   * @param string $str1     <p>The first string.</p>
7897
   * @param string $str2     <p>The second string.</p>
7898
   * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7899
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7900
   *
7901
   * @return int
7902
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7903
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7904
   *             <strong>0</strong> if they are equal
7905
   */
7906 2
  public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7907
  {
7908 2
    return self::strncmp(
7909 2
        self::strtocasefold($str1, true, false, $encoding, null, false),
7910 2
        self::strtocasefold($str2, true, false, $encoding, null, false),
7911 2
        $len
7912
    );
7913
  }
7914
7915
  /**
7916
   * String comparison of the first n characters.
7917
   *
7918
   * @link  http://php.net/manual/en/function.strncmp.php
7919
   *
7920
   * @param string $str1 <p>The first string.</p>
7921
   * @param string $str2 <p>The second string.</p>
7922
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
7923
   *
7924
   * @return int
7925
   *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7926
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7927
   *             <strong>0</strong> if they are equal
7928
   */
7929 4
  public static function strncmp(string $str1, string $str2, int $len): int
7930
  {
7931 4
    $str1 = (string)self::substr($str1, 0, $len);
7932 4
    $str2 = (string)self::substr($str2, 0, $len);
7933
7934 4
    return self::strcmp($str1, $str2);
7935
  }
7936
7937
  /**
7938
   * Search a string for any of a set of characters.
7939
   *
7940
   * @link  http://php.net/manual/en/function.strpbrk.php
7941
   *
7942
   * @param string $haystack  <p>The string where char_list is looked for.</p>
7943
   * @param string $char_list <p>This parameter is case sensitive.</p>
7944
   *
7945
   * @return string|false String starting from the character found, or false if it is not found.
7946
   */
7947 2
  public static function strpbrk(string $haystack, string $char_list)
7948
  {
7949 2
    if ('' === $haystack || '' === $char_list) {
7950 2
      return false;
7951
    }
7952
7953 2
    if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7954 2
      return \substr($haystack, (int)\strpos($haystack, $m[0]));
7955
    }
7956
7957 2
    return false;
7958
  }
7959
7960
  /**
7961
   * Find position of first occurrence of string in a string.
7962
   *
7963
   * @link http://php.net/manual/en/function.mb-strpos.php
7964
   *
7965
   * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7966
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7967
   * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7968
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7969
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7970
   *
7971
   * @return int|false
7972
   *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7973
   *                   string.<br> If needle is not found it returns false.
7974
   */
7975 142
  public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7976
  {
7977 142
    if ('' === $haystack) {
7978 4
      return false;
7979
    }
7980
7981
    // iconv and mbstring do not support integer $needle
7982 141
    if ((int)$needle === $needle && $needle >= 0) {
7983
      $needle = (string)self::chr($needle);
7984
    }
7985 141
    $needle = (string)$needle;
7986
7987 141
    if ('' === $needle) {
7988 2
      return false;
7989
    }
7990
7991 141
    if ($cleanUtf8 === true) {
7992
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7993
      // if invalid characters are found in $haystack before $needle
7994 3
      $needle = self::clean($needle);
7995 3
      $haystack = self::clean($haystack);
7996
    }
7997
7998 141
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7999 55
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8000
    }
8001
8002 141
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8003
      self::checkForSupport();
8004
    }
8005
8006
    //
8007
    // fallback for binary || ascii only
8008
    //
8009
8010
    if (
8011 141
        $encoding === 'CP850'
8012
        ||
8013 141
        $encoding === 'ASCII'
8014
    ) {
8015 2
      return self::strpos_in_byte($haystack, $needle, $offset);
8016
    }
8017
8018
    if (
8019 141
        $encoding !== 'UTF-8'
8020
        &&
8021 141
        self::$SUPPORT['iconv'] === false
8022
        &&
8023 141
        self::$SUPPORT['mbstring'] === false
8024
    ) {
8025 2
      \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8026
    }
8027
8028
    //
8029
    // fallback via mbstring
8030
    //
8031
8032 141
    if (self::$SUPPORT['mbstring'] === true) {
8033 141
      $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
8034 141
      if ($returnTmp !== false) {
8035 86
        return $returnTmp;
8036
      }
8037
    }
8038
8039
    //
8040
    // fallback via intl
8041
    //
8042
8043
    if (
8044 69
        $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8045
        &&
8046 69
        $offset >= 0 // grapheme_strpos() can't handle negative offset
8047
        &&
8048 69
        self::$SUPPORT['intl'] === true
8049
    ) {
8050 69
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8051 69
      if ($returnTmp !== false) {
8052
        return $returnTmp;
8053
      }
8054
    }
8055
8056
    //
8057
    // fallback via iconv
8058
    //
8059
8060
    if (
8061 69
        $offset >= 0 // iconv_strpos() can't handle negative offset
8062
        &&
8063 69
        self::$SUPPORT['iconv'] === true
8064
    ) {
8065
      // ignore invalid negative offset to keep compatibility
8066
      // with php < 5.5.35, < 5.6.21, < 7.0.6
8067 69
      $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8068 69
      if ($returnTmp !== false) {
8069
        return $returnTmp;
8070
      }
8071
    }
8072
8073
    //
8074
    // fallback for ascii only
8075
    //
8076
8077 69
    if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
8078 35
      return \strpos($haystack, $needle, $offset);
8079
    }
8080
8081
    //
8082
    // fallback via vanilla php
8083
    //
8084
8085 39
    if ($haystackIsAscii) {
8086
      $haystackTmp = \substr($haystack, $offset);
8087
    } else {
8088 39
      $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8089
    }
8090 39
    if ($haystackTmp === false) {
8091
      $haystackTmp = '';
8092
    }
8093 39
    $haystack = (string)$haystackTmp;
8094
8095 39
    if ($offset < 0) {
8096 2
      $offset = 0;
8097
    }
8098
8099 39
    $pos = \strpos($haystack, $needle);
8100 39
    if ($pos === false) {
8101 39
      return false;
8102
    }
8103
8104 4
    if ($pos) {
8105 4
      return ($offset + (self::strlen(substr($haystack, 0, $pos), $encoding)));
8106
    }
8107
8108 2
    return ($offset + 0);
8109
  }
8110
8111
  /**
8112
   * Find position of first occurrence of string in a string.
8113
   *
8114
   * @param string $haystack <p>
8115
   *                         The string being checked.
8116
   *                         </p>
8117
   * @param string $needle   <p>
8118
   *                         The position counted from the beginning of haystack.
8119
   *                         </p>
8120
   * @param int    $offset   [optional] <p>
8121
   *                         The search offset. If it is not specified, 0 is used.
8122
   *                         </p>
8123
   *
8124
   * @return int|false The numeric position of the first occurrence of needle in the
8125
   *                   haystack string. If needle is not found, it returns false.
8126
   */
8127 77
  public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8128
  {
8129 77
    if ($haystack === '' || $needle === '') {
8130
      return false;
8131
    }
8132
8133 77
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8134
      self::checkForSupport();
8135
    }
8136
8137 77
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8138
      // "mb_" is available if overload is used, so use it ...
8139
      return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8140
    }
8141
8142 77
    return \strpos($haystack, $needle, $offset);
8143
  }
8144
8145
  /**
8146
   * Finds the last occurrence of a character in a string within another.
8147
   *
8148
   * @link http://php.net/manual/en/function.mb-strrchr.php
8149
   *
8150
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8151
   * @param string $needle        <p>The string to find in haystack</p>
8152
   * @param bool   $before_needle [optional] <p>
8153
   *                              Determines which portion of haystack
8154
   *                              this function returns.
8155
   *                              If set to true, it returns all of haystack
8156
   *                              from the beginning to the last occurrence of needle.
8157
   *                              If set to false, it returns all of haystack
8158
   *                              from the last occurrence of needle to the end,
8159
   *                              </p>
8160
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8161
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8162
   *
8163
   * @return string|false The portion of haystack or false if needle is not found.
8164
   */
8165 4
  public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8166
  {
8167 4
    if ('' === $haystack || '' === $needle) {
8168 2
      return false;
8169
    }
8170
8171 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8172 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8173
    }
8174
8175 4
    if ($cleanUtf8 === true) {
8176
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8177
      // if invalid characters are found in $haystack before $needle
8178 2
      $needle = self::clean($needle);
8179 2
      $haystack = self::clean($haystack);
8180
    }
8181
8182 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8183
      self::checkForSupport();
8184
    }
8185
8186
    if (
8187 4
        $encoding !== 'UTF-8'
8188
        &&
8189 4
        self::$SUPPORT['mbstring'] === false
8190
    ) {
8191
      \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8192
    }
8193
8194 4
    if (self::$SUPPORT['mbstring'] === true) {
8195 4
      return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8196
    }
8197
8198
    //
8199
    // fallback for binary || ascii only
8200
    //
8201
8202
    if (
8203
        $before_needle === false
8204
        &&
8205
        (
8206
            $encoding === 'CP850'
8207
            ||
8208
            $encoding === 'ASCII'
8209
        )
8210
    ) {
8211
      return \strrchr($haystack, $needle);
8212
    }
8213
8214
    //
8215
    // fallback via iconv
8216
    //
8217
8218
    if (self::$SUPPORT['iconv'] === true) {
8219
      $needleTmp = self::substr($needle, 0, 1, $encoding);
8220
      if ($needleTmp === false) {
8221
        return false;
8222
      }
8223
      $needle = (string)$needleTmp;
8224
8225
      $pos = \iconv_strrpos($haystack, $needle, $encoding);
8226
      if (false === $pos) {
8227
        return false;
8228
      }
8229
8230
      if ($before_needle) {
8231
        return self::substr($haystack, 0, $pos, $encoding);
8232
      }
8233
8234
      return self::substr($haystack, $pos, null, $encoding);
8235
    }
8236
8237
    //
8238
    // fallback via vanilla php
8239
    //
8240
8241
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8242
    if ($needleTmp === false) {
8243
      return false;
8244
    }
8245
    $needle = (string)$needleTmp;
8246
8247
    $pos = self::strrpos($haystack, $needle, null, $encoding);
8248
    if ($pos === false) {
8249
      return false;
8250
    }
8251
8252
    if ($before_needle) {
8253
      return self::substr($haystack, 0, $pos, $encoding);
8254
    }
8255
8256
    return self::substr($haystack, $pos, null, $encoding);
8257
  }
8258
8259
  /**
8260
   * Reverses characters order in the string.
8261
   *
8262
   * @param string $str <p>The input string.</p>
8263
   *
8264
   * @return string The string with characters in the reverse sequence.
8265
   */
8266 10
  public static function strrev(string $str): string
8267
  {
8268 10
    if ('' === $str) {
8269 4
      return '';
8270
    }
8271
8272 8
    $reversed = '';
8273 8
    $i = self::strlen($str);
8274 8
    while ($i--) {
8275 8
      $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8275
      $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8276
    }
8277
8278 8
    return $reversed;
8279
  }
8280
8281
  /**
8282
   * Finds the last occurrence of a character in a string within another, case insensitive.
8283
   *
8284
   * @link http://php.net/manual/en/function.mb-strrichr.php
8285
   *
8286
   * @param string $haystack       <p>The string from which to get the last occurrence of needle.</p>
8287
   * @param string $needle         <p>The string to find in haystack.</p>
8288
   * @param bool   $before_needle  [optional] <p>
8289
   *                               Determines which portion of haystack
8290
   *                               this function returns.
8291
   *                               If set to true, it returns all of haystack
8292
   *                               from the beginning to the last occurrence of needle.
8293
   *                               If set to false, it returns all of haystack
8294
   *                               from the last occurrence of needle to the end,
8295
   *                               </p>
8296
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8297
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8298
   *
8299
   * @return string|false The portion of haystack or<br>false if needle is not found.
8300
   */
8301 3
  public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8302
  {
8303 3
    if ('' === $haystack || '' === $needle) {
8304 2
      return false;
8305
    }
8306
8307 3
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8308 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8309
    }
8310
8311 3
    if ($cleanUtf8 === true) {
8312
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8313
      // if invalid characters are found in $haystack before $needle
8314 2
      $needle = self::clean($needle);
8315 2
      $haystack = self::clean($haystack);
8316
    }
8317
8318 3
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8319
      self::checkForSupport();
8320
    }
8321
8322
    //
8323
    // fallback via mbstring
8324
    //
8325
8326 3
    if (self::$SUPPORT['mbstring'] === true) {
8327 3
      return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8328
    }
8329
8330
    //
8331
    // fallback via vanilla php
8332
    //
8333
8334
    $needleTmp = self::substr($needle, 0, 1, $encoding);
8335
    if ($needleTmp === false) {
8336
      return false;
8337
    }
8338
    $needle = (string)$needleTmp;
8339
8340
    $pos = self::strripos($haystack, $needle, 0, $encoding);
8341
    if ($pos === false) {
8342
      return false;
8343
    }
8344
8345
    if ($before_needle) {
8346
      return self::substr($haystack, 0, $pos, $encoding);
8347
    }
8348
8349
    return self::substr($haystack, $pos, null, $encoding);
8350
  }
8351
8352
  /**
8353
   * Find position of last occurrence of a case-insensitive string.
8354
   *
8355
   * @param string     $haystack  <p>The string to look in.</p>
8356
   * @param string|int $needle    <p>The string to look for.</p>
8357
   * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8358
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8359
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8360
   *
8361
   * @return int|false
8362
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8363
   *                   string.<br>If needle is not found, it returns false.
8364
   */
8365 4
  public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8366
  {
8367 4
    if ('' === $haystack) {
8368
      return false;
8369
    }
8370
8371
    // iconv and mbstring do not support integer $needle
8372 4
    if ((int)$needle === $needle && $needle >= 0) {
8373
      $needle = (string)self::chr($needle);
8374
    }
8375 4
    $needle = (string)$needle;
8376
8377 4
    if ('' === $needle) {
8378
      return false;
8379
    }
8380
8381 4
    if ($cleanUtf8 === true) {
8382
      // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8383 2
      $needle = self::clean($needle);
8384 2
      $haystack = self::clean($haystack);
8385
    }
8386
8387 4
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8388 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8389
    }
8390
8391
    //
8392
    // fallback for binary || ascii only
8393
    //
8394
8395
    if (
8396 4
        $encoding === 'CP850'
8397
        ||
8398 4
        $encoding === 'ASCII'
8399
    ) {
8400
      return self::strripos_in_byte($haystack, $needle, $offset);
8401
    }
8402
8403 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8404
      self::checkForSupport();
8405
    }
8406
8407
    if (
8408 4
        $encoding !== 'UTF-8'
8409
        &&
8410 4
        self::$SUPPORT['mbstring'] === false
8411
    ) {
8412
      \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8413
    }
8414
8415
    //
8416
    // fallback via mbstrig
8417
    //
8418
8419 4
    if (self::$SUPPORT['mbstring'] === true) {
8420 4
      return \mb_strripos($haystack, $needle, $offset, $encoding);
8421
    }
8422
8423
    //
8424
    // fallback via intl
8425
    //
8426
8427
    if (
8428
        $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8429
        &&
8430
        $offset >= 0 // grapheme_strripos() can't handle negative offset
8431
        &&
8432
        self::$SUPPORT['intl'] === true
8433
    ) {
8434
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8435
      if ($returnTmp !== false) {
8436
        return $returnTmp;
8437
      }
8438
    }
8439
8440
    //
8441
    // fallback for ascii only
8442
    //
8443
8444
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8445
      return self::strripos_in_byte($haystack, $needle, $offset);
8446
    }
8447
8448
    //
8449
    // fallback via vanilla php
8450
    //
8451
8452
    $haystack = self::strtocasefold($haystack, true, false, $encoding);
8453
    $needle = self::strtocasefold($needle, true, false, $encoding);
8454
8455
    return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8456
  }
8457
8458
  /**
8459
   * Finds position of last occurrence of a string within another, case insensitive.
8460
   *
8461
   * @param string $haystack <p>
8462
   *                         The string from which to get the position of the last occurrence
8463
   *                         of needle.
8464
   *                         </p>
8465
   * @param string $needle   <p>
8466
   *                         The string to find in haystack.
8467
   *                         </p>
8468
   * @param int    $offset   [optional] <p>
8469
   *                         The position in haystack
8470
   *                         to start searching.
8471
   *                         </p>
8472
   *
8473
   * @return int|false Return the numeric position of the last occurrence of needle in the
8474
   *                   haystack string, or false if needle is not found.
8475
   */
8476
  public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8477
  {
8478
    if ($haystack === '' || $needle === '') {
8479
      return false;
8480
    }
8481
8482
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8483
      self::checkForSupport();
8484
    }
8485
8486
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8487
      // "mb_" is available if overload is used, so use it ...
8488
      return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8489
    }
8490
8491
    return \strripos($haystack, $needle, $offset);
8492
  }
8493
8494
  /**
8495
   * Find position of last occurrence of a string in a string.
8496
   *
8497
   * @link http://php.net/manual/en/function.mb-strrpos.php
8498
   *
8499
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8500
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8501
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8502
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
8503
   *                              the end of the string.
8504
   *                              </p>
8505
   * @param string     $encoding  [optional] <p>Set the charset.</p>
8506
   * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8507
   *
8508
   * @return int|false
8509
   *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8510
   *                   string.<br>If needle is not found, it returns false.
8511
   */
8512 38
  public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8513
  {
8514 38
    if ('' === $haystack) {
8515 3
      return false;
8516
    }
8517
8518
    // iconv and mbstring do not support integer $needle
8519 37
    if ((int)$needle === $needle && $needle >= 0) {
8520 2
      $needle = (string)self::chr($needle);
8521
    }
8522 37
    $needle = (string)$needle;
8523
8524 37
    if ('' === $needle) {
8525 2
      return false;
8526
    }
8527
8528 37
    if ($cleanUtf8 === true) {
8529
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8530 4
      $needle = self::clean($needle);
8531 4
      $haystack = self::clean($haystack);
8532
    }
8533
8534 37
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8535 14
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8536
    }
8537
8538
    //
8539
    // fallback for binary || ascii only
8540
    //
8541
8542
    if (
8543 37
        $encoding === 'CP850'
8544
        ||
8545 37
        $encoding === 'ASCII'
8546
    ) {
8547 2
      return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8547
      return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8548
    }
8549
8550 37
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8551
      self::checkForSupport();
8552
    }
8553
8554
    if (
8555 37
        $encoding !== 'UTF-8'
8556
        &&
8557 37
        self::$SUPPORT['mbstring'] === false
8558
    ) {
8559
      \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8560
    }
8561
8562
    //
8563
    // fallback via mbstring
8564
    //
8565
8566 37
    if (self::$SUPPORT['mbstring'] === true) {
8567 37
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
8568
    }
8569
8570
    //
8571
    // fallback via intl
8572
    //
8573
8574
    if (
8575
        $offset !== null
8576
        &&
8577
        $offset >= 0 // grapheme_strrpos() can't handle negative offset
8578
        &&
8579
        $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8580
        &&
8581
        self::$SUPPORT['intl'] === true
8582
    ) {
8583
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8584
      if ($returnTmp !== false) {
8585
        return $returnTmp;
8586
      }
8587
    }
8588
8589
    //
8590
    // fallback for ascii only
8591
    //
8592
8593
    if (
8594
        $offset !== null
8595
        &&
8596
        self::is_ascii($haystack)
8597
        &&
8598
        self::is_ascii($needle)
8599
    ) {
8600
      return self::strrpos_in_byte($haystack, $needle, $offset);
8601
    }
8602
8603
    //
8604
    // fallback via vanilla php
8605
    //
8606
8607
    $haystackTmp = null;
8608
    if ($offset > 0) {
8609
      $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8609
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8610
    } elseif ($offset < 0) {
8611
      $haystackTmp = self::substr($haystack, 0, $offset);
8612
      $offset = 0;
8613
    }
8614
8615
    if ($haystackTmp !== null) {
8616
      if ($haystackTmp === false) {
8617
        $haystackTmp = '';
8618
      }
8619
      $haystack = (string)$haystackTmp;
8620
    }
8621
8622
    $pos = self::strrpos_in_byte($haystack, $needle);
8623
    if ($pos === false) {
8624
      return false;
8625
    }
8626
8627
    return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8628
  }
8629
8630
  /**
8631
   * Find position of last occurrence of a string in a string.
8632
   *
8633
   * @param string $haystack <p>
8634
   *                         The string being checked, for the last occurrence
8635
   *                         of needle.
8636
   *                         </p>
8637
   * @param string $needle   <p>
8638
   *                         The string to find in haystack.
8639
   *                         </p>
8640
   * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8641
   *                         the string. Negative values will stop searching at an arbitrary point
8642
   *                         prior to the end of the string.
8643
   *
8644
   * @return int|false The numeric position of the last occurrence of needle in the
8645
   *                   haystack string. If needle is not found, it returns false.
8646
   */
8647 2
  public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8648
  {
8649 2
    if ($haystack === '' || $needle === '') {
8650
      return false;
8651
    }
8652
8653 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8654
      self::checkForSupport();
8655
    }
8656
8657 2
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8658
      // "mb_" is available if overload is used, so use it ...
8659
      return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8660
    }
8661
8662 2
    return \strrpos($haystack, $needle, $offset);
8663
  }
8664
8665
  /**
8666
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8667
   * mask.
8668
   *
8669
   * @param string $str    <p>The input string.</p>
8670
   * @param string $mask   <p>The mask of chars</p>
8671
   * @param int    $offset [optional]
8672
   * @param int    $length [optional]
8673
   *
8674
   * @return int
8675
   */
8676 10
  public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8677
  {
8678 10
    if ($offset || $length !== null) {
8679 2
      $strTmp = self::substr($str, $offset, $length);
8680 2
      if ($strTmp === false) {
8681
        $strTmp = '';
8682
      }
8683 2
      $str = (string)$strTmp;
8684
    }
8685
8686 10
    if ('' === $str || '' === $mask) {
8687 2
      return 0;
8688
    }
8689
8690 8
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug introduced by
$str of type string is incompatible with the type null|array expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8690
    return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
8691
  }
8692
8693
  /**
8694
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8695
   *
8696
   * @param string $haystack       <p>The input string. Must be valid UTF-8.</p>
8697
   * @param string $needle         <p>The string to look for. Must be valid UTF-8.</p>
8698
   * @param bool   $before_needle  [optional] <p>
8699
   *                               If <b>TRUE</b>, strstr() returns the part of the
8700
   *                               haystack before the first occurrence of the needle (excluding the needle).
8701
   *                               </p>
8702
   * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
8703
   * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8704
   *
8705
   * @return string|false
8706
   *                       A sub-string,<br>or <strong>false</strong> if needle is not found.
8707
   */
8708 5
  public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8709
  {
8710 5
    if ('' === $haystack || '' === $needle) {
8711 2
      return false;
8712
    }
8713
8714 5
    if ($cleanUtf8 === true) {
8715
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8716
      // if invalid characters are found in $haystack before $needle
8717
      $needle = self::clean($needle);
8718
      $haystack = self::clean($haystack);
8719
    }
8720
8721 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8722 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8723
    }
8724
8725
    //
8726
    // fallback for binary || ascii only
8727
    //
8728
8729
    if (
8730 5
        $encoding === 'CP850'
8731
        ||
8732 5
        $encoding === 'ASCII'
8733
    ) {
8734
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8735
    }
8736
8737 5
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8738
      self::checkForSupport();
8739
    }
8740
8741
    if (
8742 5
        $encoding !== 'UTF-8'
8743
        &&
8744 5
        self::$SUPPORT['mbstring'] === false
8745
    ) {
8746
      \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
8747
    }
8748
8749
    //
8750
    // fallback via mbstring
8751
    //
8752
8753 5
    if (self::$SUPPORT['mbstring'] === true) {
8754 5
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8755
    }
8756
8757
    //
8758
    // fallback via intl
8759
    //
8760
8761
    if (
8762
        $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8763
        &&
8764
        self::$SUPPORT['intl'] === true
8765
    ) {
8766
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8767
      if ($returnTmp !== false) {
8768
        return $returnTmp;
8769
      }
8770
    }
8771
8772
    //
8773
    // fallback for ascii only
8774
    //
8775
8776
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8777
      return self::strstr_in_byte($haystack, $needle, $before_needle);
8778
    }
8779
8780
    //
8781
    // fallback via vanilla php
8782
    //
8783
8784
    \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8785
8786
    if (!isset($match[1])) {
8787
      return false;
8788
    }
8789
8790
    if ($before_needle) {
8791
      return $match[1];
8792
    }
8793
8794
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8794
    return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8795
  }
8796
8797
  /**
8798
   *  * Finds first occurrence of a string within another.
8799
   *
8800
   * @param string $haystack      <p>
8801
   *                              The string from which to get the first occurrence
8802
   *                              of needle.
8803
   *                              </p>
8804
   * @param string $needle        <p>
8805
   *                              The string to find in haystack.
8806
   *                              </p>
8807
   * @param bool   $before_needle [optional] <p>
8808
   *                              Determines which portion of haystack
8809
   *                              this function returns.
8810
   *                              If set to true, it returns all of haystack
8811
   *                              from the beginning to the first occurrence of needle.
8812
   *                              If set to false, it returns all of haystack
8813
   *                              from the first occurrence of needle to the end,
8814
   *                              </p>
8815
   *
8816
   * @return string|false The portion of haystack,
8817
   *                      or false if needle is not found.
8818
   */
8819
  public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8820
  {
8821
    if ($haystack === '' || $needle === '') {
8822
      return false;
8823
    }
8824
8825
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8826
      self::checkForSupport();
8827
    }
8828
8829
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
8830
      // "mb_" is available if overload is used, so use it ...
8831
      return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8832
    }
8833
8834
    return \strstr($haystack, $needle, $before_needle);
8835
  }
8836
8837
  /**
8838
   * Unicode transformation for case-less matching.
8839
   *
8840
   * @link http://unicode.org/reports/tr21/tr21-5.html
8841
   *
8842
   * @param string      $str       <p>The input string.</p>
8843
   * @param bool        $full      [optional] <p>
8844
   *                               <b>true</b>, replace full case folding chars (default)<br>
8845
   *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8846
   *                               </p>
8847
   * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8848
   * @param string      $encoding  [optional] <p>Set the charset.</p>
8849
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8850
   * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase is
8851
   *                               for some languages better ...</p>
8852
   *
8853
   * @return string
8854
   */
8855 53
  public static function strtocasefold(
8856
      string $str,
8857
      bool $full = true,
8858
      bool $cleanUtf8 = false,
8859
      string $encoding = 'UTF-8',
8860
      string $lang = null,
8861
      $lower = true
8862
  ): string
8863
  {
8864 53
    if ('' === $str) {
8865 5
      return '';
8866
    }
8867
8868 52
    $str = self::fixStrCaseHelper($str, $lower, $full);
8869
8870 52
    if ($lower === true) {
8871 2
      return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8872
    }
8873
8874 50
    return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8875
  }
8876
8877
  /**
8878
   * Make a string lowercase.
8879
   *
8880
   * @link http://php.net/manual/en/function.mb-strtolower.php
8881
   *
8882
   * @param string      $str                   <p>The string being lowercased.</p>
8883
   * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8884
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8885
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8886
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8887
   *
8888
   * @return string String with all alphabetic characters converted to lowercase.
8889
   */
8890 151
  public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8891
  {
8892
    // init
8893 151
    $str = (string)$str;
8894
8895 151
    if ('' === $str) {
8896 4
      return '';
8897
    }
8898
8899 149
    if ($cleanUtf8 === true) {
8900
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8901
      // if invalid characters are found in $haystack before $needle
8902 4
      $str = self::clean($str);
8903
    }
8904
8905 149
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8906 94
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8907
    }
8908
8909
    // hack for old php version or for the polyfill ...
8910 149
    if ($tryToKeepStringLength === true) {
8911
      $str = self::fixStrCaseHelper($str, true);
8912
    }
8913
8914 149
    if ($lang !== null) {
8915
8916 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8917
        self::checkForSupport();
8918
      }
8919
8920 2
      if (self::$SUPPORT['intl'] === true) {
8921
8922 2
        $langCode = $lang . '-Lower';
8923 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8924
          \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, E_USER_WARNING);
8925
8926
          $langCode = 'Any-Lower';
8927
        }
8928
8929
        /** @noinspection PhpComposerExtensionStubsInspection */
8930 2
        return transliterator_transliterate($langCode, $str);
8931
      }
8932
8933
      \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, E_USER_WARNING);
8934
    }
8935
8936
    // always fallback via symfony polyfill
8937 149
    return \mb_strtolower($str, $encoding);
8938
  }
8939
8940
  /**
8941
   * Generic case sensitive transformation for collation matching.
8942
   *
8943
   * @param string $str <p>The input string</p>
8944
   *
8945
   * @return string
8946
   */
8947 6
  private static function strtonatfold(string $str): string
8948
  {
8949
    /** @noinspection PhpUndefinedClassInspection */
8950 6
    return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
8951
  }
8952
8953
  /**
8954
   * Make a string uppercase.
8955
   *
8956
   * @link http://php.net/manual/en/function.mb-strtoupper.php
8957
   *
8958
   * @param string      $str                   <p>The string being uppercased.</p>
8959
   * @param string      $encoding              [optional] <p>Set the charset.</p>
8960
   * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8961
   * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8962
   * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8963
   *
8964
   * @return string String with all alphabetic characters converted to uppercase.
8965
   */
8966 160
  public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8967
  {
8968
    // init
8969 160
    $str = (string)$str;
8970
8971 160
    if ('' === $str) {
8972 4
      return '';
8973
    }
8974
8975 158
    if ($cleanUtf8 === true) {
8976
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8977
      // if invalid characters are found in $haystack before $needle
8978 3
      $str = self::clean($str);
8979
    }
8980
8981 158
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8982 72
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
8983
    }
8984
8985
    // hack for old php version or for the polyfill ...
8986 158
    if ($tryToKeepStringLength === true) {
8987 2
      $str = self::fixStrCaseHelper($str, false);
8988
    }
8989
8990 158
    if ($lang !== null) {
8991
8992 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8993
        self::checkForSupport();
8994
      }
8995
8996 2
      if (self::$SUPPORT['intl'] === true) {
8997
8998 2
        $langCode = $lang . '-Upper';
8999 2
        if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9000
          \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
9001
9002
          $langCode = 'Any-Upper';
9003
        }
9004
9005
        /** @noinspection PhpComposerExtensionStubsInspection */
9006 2
        return transliterator_transliterate($langCode, $str);
9007
      }
9008
9009
      \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
9010
    }
9011
9012
    // always fallback via symfony polyfill
9013 158
    return \mb_strtoupper($str, $encoding);
9014
  }
9015
9016
  /**
9017
   * Translate characters or replace sub-strings.
9018
   *
9019
   * @link  http://php.net/manual/en/function.strtr.php
9020
   *
9021
   * @param string          $str  <p>The string being translated.</p>
9022
   * @param string|string[] $from <p>The string replacing from.</p>
9023
   * @param string|string[] $to   <p>The string being translated to to.</p>
9024
   *
9025
   * @return string
9026
   *                This function returns a copy of str, translating all occurrences of each character in from to the
9027
   *                corresponding character in to.
9028
   */
9029 2
  public static function strtr(string $str, $from, $to = INF): string
9030
  {
9031 2
    if ('' === $str) {
9032
      return '';
9033
    }
9034
9035 2
    if ($from === $to) {
9036
      return $str;
9037
    }
9038
9039 2
    if (INF !== $to) {
9040 2
      $from = self::str_split($from);
9041 2
      $to = self::str_split($to);
9042 2
      $countFrom = \count($from);
9043 2
      $countTo = \count($to);
9044
9045 2
      if ($countFrom > $countTo) {
9046 2
        $from = \array_slice($from, 0, $countTo);
9047 2
      } elseif ($countFrom < $countTo) {
9048 2
        $to = \array_slice($to, 0, $countFrom);
9049
      }
9050
9051 2
      $from = \array_combine($from, $to);
9052
    }
9053
9054 2
    if (\is_string($from)) {
9055 2
      return \str_replace($from, '', $str);
9056
    }
9057
9058 2
    return \strtr($str, $from);
9059
  }
9060
9061
  /**
9062
   * Return the width of a string.
9063
   *
9064
   * @param string $str       <p>The input string.</p>
9065
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9066
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9067
   *
9068
   * @return int
9069
   */
9070 2
  public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
9071
  {
9072 2
    if ('' === $str) {
9073 2
      return 0;
9074
    }
9075
9076 2
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9077 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9078
    }
9079
9080 2
    if ($cleanUtf8 === true) {
9081
      // iconv and mbstring are not tolerant to invalid encoding
9082
      // further, their behaviour is inconsistent with that of PHP's substr
9083 2
      $str = self::clean($str);
9084
    }
9085
9086 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9087
      self::checkForSupport();
9088
    }
9089
9090
    //
9091
    // fallback via mbstring
9092
    //
9093
9094 2
    if (self::$SUPPORT['mbstring'] === true) {
9095 2
      return \mb_strwidth($str, $encoding);
9096
    }
9097
9098
    //
9099
    // fallback via vanilla php
9100
    //
9101
9102
    if ('UTF-8' !== $encoding) {
9103
      $str = self::encode('UTF-8', $str, false, $encoding);
9104
    }
9105
9106
    $wide = 0;
9107
    $str = (string)preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
9108
9109
    return ($wide << 1) + self::strlen($str, 'UTF-8');
9110
  }
9111
9112
  /**
9113
   * Get part of a string.
9114
   *
9115
   * @link http://php.net/manual/en/function.mb-substr.php
9116
   *
9117
   * @param string $str       <p>The string being checked.</p>
9118
   * @param int    $offset    <p>The first position used in str.</p>
9119
   * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
9120
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9121
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9122
   *
9123
   * @return string|false
9124
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9125
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9126
   *                      characters long, <b>FALSE</b> will be returned.
9127
   */
9128 394
  public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
9129
  {
9130 394
    if ('' === $str) {
9131 19
      return '';
9132
    }
9133
9134
    // Empty string
9135 388
    if ($length === 0) {
9136 14
      return '';
9137
    }
9138
9139 385
    if ($cleanUtf8 === true) {
9140
      // iconv and mbstring are not tolerant to invalid encoding
9141
      // further, their behaviour is inconsistent with that of PHP's substr
9142 2
      $str = self::clean($str);
9143
    }
9144
9145
    // Whole string
9146 385
    if (!$offset && $length === null) {
9147 40
      return $str;
9148
    }
9149
9150 356
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9151 157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9152
    }
9153
9154 356
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9155
      self::checkForSupport();
9156
    }
9157
9158
    //
9159
    // fallback for binary || ascii only
9160
    //
9161
9162
    if (
9163 356
        $encoding === 'CP850'
9164
        ||
9165 356
        $encoding === 'ASCII'
9166
    ) {
9167 5
      return self::substr_in_byte($str, $offset, $length);
9168
    }
9169
9170
    //
9171
    // fallback via mbstring
9172
    //
9173
9174 351
    if (self::$SUPPORT['mbstring'] === true) {
9175 351
      return \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
9176
    }
9177
9178
    // otherwise we need the string-length and can't fake it via "2147483647"
9179 4
    $str_length = 0;
9180 4
    if ($offset || $length === null) {
9181 4
      $str_length = self::strlen($str, $encoding);
9182
    }
9183
9184
    // e.g.: invalid chars + mbstring not installed
9185 4
    if ($str_length === false) {
9186
      return false;
9187
    }
9188
9189
    // Empty string
9190 4
    if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type null|integer is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
9191
      return '';
9192
    }
9193
9194
    // Impossible
9195 4
    if ($offset && $offset > $str_length) {
9196
      // "false" is the php native return type here,
9197
      //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9198
      return '';
9199
9200
    }
9201
9202 4
    if ($length === null) {
9203 4
      $length = (int)$str_length;
9204
    } else {
9205 2
      $length = (int)$length;
9206
    }
9207
9208
    if (
9209 4
        $encoding !== 'UTF-8'
9210
        &&
9211 4
        self::$SUPPORT['mbstring'] === false
9212
    ) {
9213 2
      \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9214
    }
9215
9216
    //
9217
    // fallback via intl
9218
    //
9219
9220
    if (
9221 4
        $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9222
        &&
9223 4
        $offset >= 0 // grapheme_substr() can't handle negative offset
9224
        &&
9225 4
        self::$SUPPORT['intl'] === true
9226
    ) {
9227
      $returnTmp = \grapheme_substr($str, $offset, $length);
9228
      if ($returnTmp !== false) {
9229
        return $returnTmp;
9230
      }
9231
    }
9232
9233
    //
9234
    // fallback via iconv
9235
    //
9236
9237
    if (
9238 4
        $length >= 0 // "iconv_substr()" can't handle negative length
9239
        &&
9240 4
        self::$SUPPORT['iconv'] === true
9241
    ) {
9242
      $returnTmp = \iconv_substr($str, $offset, $length);
9243
      if ($returnTmp !== false) {
9244
        return $returnTmp;
9245
      }
9246
    }
9247
9248
    //
9249
    // fallback for ascii only
9250
    //
9251
9252 4
    if (self::is_ascii($str)) {
9253
      return \substr($str, $offset, $length);
9254
    }
9255
9256
    //
9257
    // fallback via vanilla php
9258
    //
9259
9260
    // split to array, and remove invalid characters
9261 4
    $array = self::split($str);
9262
9263
    // extract relevant part, and join to make sting again
9264 4
    return \implode('', \array_slice($array, $offset, $length));
9265
  }
9266
9267
  /**
9268
   * Binary safe comparison of two strings from an offset, up to length characters.
9269
   *
9270
   * @param string   $str1               <p>The main string being compared.</p>
9271
   * @param string   $str2               <p>The secondary string being compared.</p>
9272
   * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9273
   *                                     counting from the end of the string.</p>
9274
   * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest of
9275
   *                                     the length of the str compared to the length of main_str less the offset.</p>
9276
   * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9277
   *                                     insensitive.</p>
9278
   *
9279
   * @return int
9280
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9281
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9282
   *             <strong>0</strong> if they are equal.
9283
   */
9284 2
  public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9285
  {
9286
    if (
9287 2
        $offset !== 0
9288
        ||
9289 2
        $length !== null
9290
    ) {
9291 2
      $str1Tmp = self::substr($str1, $offset, $length);
9292 2
      if ($str1Tmp === false) {
9293
        $str1Tmp = '';
9294
      }
9295 2
      $str1 = (string)$str1Tmp;
9296
9297 2
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept null|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9297
      $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9298 2
      if ($str2Tmp === false) {
9299
        $str2Tmp = '';
9300
      }
9301 2
      $str2 = (string)$str2Tmp;
9302
    }
9303
9304 2
    if ($case_insensitivity === true) {
9305 2
      return self::strcasecmp($str1, $str2);
9306
    }
9307
9308 2
    return self::strcmp($str1, $str2);
9309
  }
9310
9311
  /**
9312
   * Count the number of substring occurrences.
9313
   *
9314
   * @link  http://php.net/manual/en/function.substr-count.php
9315
   *
9316
   * @param string $haystack   <p>The string to search in.</p>
9317
   * @param string $needle     <p>The substring to search for.</p>
9318
   * @param int    $offset     [optional] <p>The offset where to start counting.</p>
9319
   * @param int    $length     [optional] <p>
9320
   *                           The maximum length after the specified offset to search for the
9321
   *                           substring. It outputs a warning if the offset plus the length is
9322
   *                           greater than the haystack length.
9323
   *                           </p>
9324
   * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9325
   * @param bool   $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
9326
   *
9327
   * @return int|false This functions returns an integer or false if there isn't a string.
9328
   */
9329 18
  public static function substr_count(
9330
      string $haystack,
9331
      string $needle,
9332
      int $offset = 0,
9333
      int $length = null,
9334
      string $encoding = 'UTF-8',
9335
      bool $cleanUtf8 = false
9336
  )
9337
  {
9338 18
    if ('' === $haystack || '' === $needle) {
9339 2
      return false;
9340
    }
9341
9342 18
    if ($offset || $length !== null) {
9343
9344 2
      if ($length === null) {
9345 2
        $lengthTmp = self::strlen($haystack);
9346 2
        if ($lengthTmp === false) {
9347
          return false;
9348
        }
9349 2
        $length = (int)$lengthTmp;
9350
      }
9351
9352
      if (
9353
          (
9354 2
              $length !== 0
9355
              &&
9356 2
              $offset !== 0
9357
          )
9358
          &&
9359 2
          ($length + $offset) <= 0
9360
          &&
9361 2
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9362
      ) {
9363 2
        return false;
9364
      }
9365
9366 2
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9367 2
      if ($haystackTmp === false) {
9368
        $haystackTmp = '';
9369
      }
9370 2
      $haystack = (string)$haystackTmp;
9371
    }
9372
9373 18
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9374 8
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9375
    }
9376
9377 18
    if ($cleanUtf8 === true) {
9378
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9379
      // if invalid characters are found in $haystack before $needle
9380
      $needle = self::clean($needle);
9381
      $haystack = self::clean($haystack);
9382
    }
9383
9384 18
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9385
      self::checkForSupport();
9386
    }
9387
9388
    if (
9389 18
        $encoding !== 'UTF-8'
9390
        &&
9391 18
        self::$SUPPORT['mbstring'] === false
9392
    ) {
9393
      \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
9394
    }
9395
9396 18
    if (self::$SUPPORT['mbstring'] === true) {
9397 18
      return \mb_substr_count($haystack, $needle, $encoding);
9398
    }
9399
9400
    \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
9401
9402
    return \count($matches);
9403
  }
9404
9405
  /**
9406
   * Count the number of substring occurrences.
9407
   *
9408
   * @param string $haystack <p>
9409
   *                         The string being checked.
9410
   *                         </p>
9411
   * @param string $needle   <p>
9412
   *                         The string being found.
9413
   *                         </p>
9414
   * @param int    $offset   [optional] <p>
9415
   *                         The offset where to start counting
9416
   *                         </p>
9417
   * @param int    $length   [optional] <p>
9418
   *                         The maximum length after the specified offset to search for the
9419
   *                         substring. It outputs a warning if the offset plus the length is
9420
   *                         greater than the haystack length.
9421
   *                         </p>
9422
   *
9423
   * @return int|false The number of times the
9424
   *                   needle substring occurs in the
9425
   *                   haystack string.
9426
   */
9427 38
  public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9428
  {
9429 38
    if ($haystack === '' || $needle === '') {
9430
      return 0;
9431
    }
9432
9433 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9434
      self::checkForSupport();
9435
    }
9436
9437
    if (
9438 38
        ($offset || $length !== null)
9439
        &&
9440 38
        self::$SUPPORT['mbstring_func_overload'] === true
9441
    ) {
9442
9443
      if ($length === null) {
9444
        $lengthTmp = self::strlen($haystack);
9445
        if ($lengthTmp === false) {
9446
          return false;
9447
        }
9448
        $length = (int)$lengthTmp;
9449
      }
9450
9451
      if (
9452
          (
9453
              $length !== 0
9454
              &&
9455
              $offset !== 0
9456
          )
9457
          &&
9458
          ($length + $offset) <= 0
9459
          &&
9460
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9461
      ) {
9462
        return false;
9463
      }
9464
9465
      $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9466
      if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9467
        $haystackTmp = '';
9468
      }
9469
      $haystack = (string)$haystackTmp;
9470
    }
9471
9472 38
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9473
      // "mb_" is available if overload is used, so use it ...
9474
      return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9475
    }
9476
9477 38
    return \substr_count($haystack, $needle, $offset, $length);
9478
  }
9479
9480
  /**
9481
   * Returns the number of occurrences of $substring in the given string.
9482
   * By default, the comparison is case-sensitive, but can be made insensitive
9483
   * by setting $caseSensitive to false.
9484
   *
9485
   * @param string $str           <p>The input string.</p>
9486
   * @param string $substring     <p>The substring to search for.</p>
9487
   * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9488
   * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9489
   *
9490
   * @return int
9491
   */
9492 15
  public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9493
  {
9494 15
    if ('' === $str || '' === $substring) {
9495 2
      return 0;
9496
    }
9497
9498
    // only a fallback to prevent BC in the api ...
9499 13
    if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9500 4
      $encoding = (string)$caseSensitive;
9501
    }
9502
9503 13
    if (!$caseSensitive) {
9504 6
      $str = self::strtocasefold($str, true, false, $encoding, null, false);
9505 6
      $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9506
    }
9507
9508 13
    return (int)self::substr_count($str, $substring, 0, null, $encoding);
9509
  }
9510
9511
  /**
9512
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9513
   *
9514
   * @param string $haystack <p>The string to search in.</p>
9515
   * @param string $needle   <p>The substring to search for.</p>
9516
   *
9517
   * @return string Return the sub-string.
9518
   */
9519 2
  public static function substr_ileft(string $haystack, string $needle): string
9520
  {
9521 2
    if ('' === $haystack) {
9522 2
      return '';
9523
    }
9524
9525 2
    if ('' === $needle) {
9526 2
      return $haystack;
9527
    }
9528
9529 2
    if (self::str_istarts_with($haystack, $needle) === true) {
9530 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9530
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9531 2
      if ($haystackTmp === false) {
9532
        $haystackTmp = '';
9533
      }
9534 2
      $haystack = (string)$haystackTmp;
9535
    }
9536
9537 2
    return $haystack;
9538
  }
9539
9540
  /**
9541
   * Get part of a string process in bytes.
9542
   *
9543
   * @param string $str    <p>The string being checked.</p>
9544
   * @param int    $offset <p>The first position used in str.</p>
9545
   * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9546
   *
9547
   * @return string|false
9548
   *                      The portion of <i>str</i> specified by the <i>offset</i> and
9549
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9550
   *                      characters long, <b>FALSE</b> will be returned.
9551
   */
9552 55
  public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9553
  {
9554 55
    if ($str === '') {
9555
      return '';
9556
    }
9557
9558
    // Empty string
9559 55
    if ($length === 0) {
9560
      return '';
9561
    }
9562
9563
    // Whole string
9564 55
    if (!$offset && $length === null) {
9565
      return $str;
9566
    }
9567
9568 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9569
      self::checkForSupport();
9570
    }
9571
9572 55
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
9573
      // "mb_" is available if overload is used, so use it ...
9574
      return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9575
    }
9576
9577 55
    return \substr($str, $offset, $length ?? 2147483647);
9578
  }
9579
9580
  /**
9581
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9582
   *
9583
   * @param string $haystack <p>The string to search in.</p>
9584
   * @param string $needle   <p>The substring to search for.</p>
9585
   *
9586
   * @return string Return the sub-string.
9587
   */
9588 2
  public static function substr_iright(string $haystack, string $needle): string
9589
  {
9590 2
    if ('' === $haystack) {
9591 2
      return '';
9592
    }
9593
9594 2
    if ('' === $needle) {
9595 2
      return $haystack;
9596
    }
9597
9598 2
    if (self::str_iends_with($haystack, $needle) === true) {
9599 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9600 2
      if ($haystackTmp === false) {
9601
        $haystackTmp = '';
9602
      }
9603 2
      $haystack = (string)$haystackTmp;
9604
    }
9605
9606 2
    return $haystack;
9607
  }
9608
9609
  /**
9610
   * Removes an prefix ($needle) from start of the string ($haystack).
9611
   *
9612
   * @param string $haystack <p>The string to search in.</p>
9613
   * @param string $needle   <p>The substring to search for.</p>
9614
   *
9615
   * @return string Return the sub-string.
9616
   */
9617 2
  public static function substr_left(string $haystack, string $needle): string
9618
  {
9619 2
    if ('' === $haystack) {
9620 2
      return '';
9621
    }
9622
9623 2
    if ('' === $needle) {
9624 2
      return $haystack;
9625
    }
9626
9627 2
    if (self::str_starts_with($haystack, $needle) === true) {
9628 2
      $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9628
      $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9629 2
      if ($haystackTmp === false) {
9630
        $haystackTmp = '';
9631
      }
9632 2
      $haystack = (string)$haystackTmp;
9633
    }
9634
9635 2
    return $haystack;
9636
  }
9637
9638
  /**
9639
   * Replace text within a portion of a string.
9640
   *
9641
   * source: https://gist.github.com/stemar/8287074
9642
   *
9643
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
9644
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
9645
   * @param int|int[]       $offset           <p>
9646
   *                                          If start is positive, the replacing will begin at the start'th offset
9647
   *                                          into string.
9648
   *                                          <br><br>
9649
   *                                          If start is negative, the replacing will begin at the start'th character
9650
   *                                          from the end of string.
9651
   *                                          </p>
9652
   * @param int|int[]|null  $length           [optional] <p>If given and is positive, it represents the length of the
9653
   *                                          portion of string which is to be replaced. If it is negative, it
9654
   *                                          represents the number of characters from the end of string at which to
9655
   *                                          stop replacing. If it is not given, then it will default to strlen(
9656
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
9657
   *                                          length is zero then this function will have the effect of inserting
9658
   *                                          replacement into string at the given start offset.</p>
9659
   *
9660
   * @return string|string[] The result string is returned. If string is an array then array is returned.
9661
   */
9662 10
  public static function substr_replace($str, $replacement, $offset, $length = null)
9663
  {
9664 10
    if (\is_array($str) === true) {
9665 1
      $num = \count($str);
9666
9667
      // the replacement
9668 1
      if (\is_array($replacement) === true) {
9669 1
        $replacement = \array_slice($replacement, 0, $num);
9670
      } else {
9671 1
        $replacement = \array_pad([$replacement], $num, $replacement);
9672
      }
9673
9674
      // the offset
9675 1
      if (\is_array($offset) === true) {
9676 1
        $offset = \array_slice($offset, 0, $num);
9677 1
        foreach ($offset as &$valueTmp) {
9678 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
9679
        }
9680 1
        unset($valueTmp);
9681
      } else {
9682 1
        $offset = \array_pad([$offset], $num, $offset);
9683
      }
9684
9685
      // the length
9686 1
      if (null === $length) {
9687 1
        $length = \array_fill(0, $num, 0);
9688 1
      } elseif (\is_array($length) === true) {
9689 1
        $length = \array_slice($length, 0, $num);
9690 1
        foreach ($length as &$valueTmpV2) {
9691 1
          if (null !== $valueTmpV2) {
9692 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9693
          } else {
9694 1
            $valueTmpV2 = 0;
9695
          }
9696
        }
9697 1
        unset($valueTmpV2);
9698
      } else {
9699 1
        $length = \array_pad([$length], $num, $length);
9700
      }
9701
9702
      // recursive call
9703 1
      return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9704
    }
9705
9706 10
    if (\is_array($replacement) === true) {
9707 1
      if (\count($replacement) > 0) {
9708 1
        $replacement = $replacement[0];
9709
      } else {
9710 1
        $replacement = '';
9711
      }
9712
    }
9713
9714
    // init
9715 10
    $str = (string)$str;
9716 10
    $replacement = (string)$replacement;
9717
9718 10
    if ('' === $str) {
9719 1
      return $replacement;
9720
    }
9721
9722 9
    if (self::is_ascii($str)) {
9723 6
      return ($length === null) ?
9724
          \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9724
          \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9725 6
          \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9725
          \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9726
    }
9727
9728 8
    \preg_match_all('/./us', $str, $smatches);
9729 8
    \preg_match_all('/./us', $replacement, $rmatches);
9730
9731 8
    if ($length === null) {
9732 3
      $lengthTmp = self::strlen($str);
9733 3
      if ($lengthTmp === false) {
9734
        // e.g.: non mbstring support + invalid chars
9735
        return '';
9736
      }
9737 3
      $length = (int)$lengthTmp;
9738
    }
9739
9740 8
    \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9740
    \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9740
    \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9741
9742 8
    return \implode('', $smatches[0]);
9743
  }
9744
9745
  /**
9746
   * Removes an suffix ($needle) from end of the string ($haystack).
9747
   *
9748
   * @param string $haystack <p>The string to search in.</p>
9749
   * @param string $needle   <p>The substring to search for.</p>
9750
   *
9751
   * @return string Return the sub-string.
9752
   */
9753 2
  public static function substr_right(string $haystack, string $needle): string
9754
  {
9755 2
    if ('' === $haystack) {
9756 2
      return '';
9757
    }
9758
9759 2
    if ('' === $needle) {
9760 2
      return $haystack;
9761
    }
9762
9763 2
    if (self::str_ends_with($haystack, $needle) === true) {
9764 2
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9765 2
      if ($haystackTmp === false) {
9766
        $haystackTmp = '';
9767
      }
9768 2
      $haystack = (string)$haystackTmp;
9769
    }
9770
9771 2
    return $haystack;
9772
  }
9773
9774
  /**
9775
   * Returns a case swapped version of the string.
9776
   *
9777
   * @param string $str       <p>The input string.</p>
9778
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9779
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9780
   *
9781
   * @return string Each character's case swapped.
9782
   */
9783 6
  public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9784
  {
9785 6
    if ('' === $str) {
9786 1
      return '';
9787
    }
9788
9789 6
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9790 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9791
    }
9792
9793 6
    if ($cleanUtf8 === true) {
9794
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9795
      // if invalid characters are found in $haystack before $needle
9796 2
      $str = self::clean($str);
9797
    }
9798
9799 6
    return (string)(self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9800
  }
9801
9802
  /**
9803
   * Checks whether mbstring is available on the server.
9804
   *
9805
   * @return bool
9806
   *              <strong>true</strong> if available, <strong>false</strong> otherwise.
9807
   */
9808
  public static function symfony_polyfill_used(): bool
9809
  {
9810
    // init
9811
    $return = false;
9812
9813
    $returnTmp = \extension_loaded('mbstring') ? true : false;
9814
    if ($returnTmp === false && \function_exists('mb_strlen')) {
9815
      $return = true;
9816
    }
9817
9818
    $returnTmp = \extension_loaded('iconv') ? true : false;
9819
    if ($returnTmp === false && \function_exists('iconv')) {
9820
      $return = true;
9821
    }
9822
9823
    return $return;
9824
  }
9825
9826
  /**
9827
   * @param string $str
9828
   * @param int    $tabLength
9829
   *
9830
   * @return string
9831
   */
9832 6
  public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9833
  {
9834 6
    return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9835
  }
9836
9837
  /**
9838
   * Converts the first character of each word in the string to uppercase
9839
   * and all other chars to lowercase.
9840
   *
9841
   * @param string $str      <p>The input string.</p>
9842
   * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9843
   *
9844
   * @return string String with all characters of $str being title-cased.
9845
   */
9846 5
  public static function titlecase(string $str, string $encoding = 'UTF-8'): string
9847
  {
9848 5
    if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9849 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
9850
    }
9851
9852
    // always fallback via symfony polyfill
9853 5
    return \mb_convert_case($str, MB_CASE_TITLE, $encoding);
9854
  }
9855
9856
  /**
9857
   * alias for "UTF8::to_ascii()"
9858
   *
9859
   * @see        UTF8::to_ascii()
9860
   *
9861
   * @param string $str
9862
   * @param string $subst_chr
9863
   * @param bool   $strict
9864
   *
9865
   * @return string
9866
   *
9867
   * @deprecated <p>use "UTF8::to_ascii()"</p>
9868
   */
9869 7
  public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9870
  {
9871 7
    return self::to_ascii($str, $subst_chr, $strict);
9872
  }
9873
9874
  /**
9875
   * alias for "UTF8::to_iso8859()"
9876
   *
9877
   * @see        UTF8::to_iso8859()
9878
   *
9879
   * @param string|string[] $str
9880
   *
9881
   * @return string|string[]
9882
   *
9883
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
9884
   */
9885 2
  public static function toIso8859($str)
9886
  {
9887 2
    return self::to_iso8859($str);
9888
  }
9889
9890
  /**
9891
   * alias for "UTF8::to_latin1()"
9892
   *
9893
   * @see        UTF8::to_latin1()
9894
   *
9895
   * @param string|string[] $str
9896
   *
9897
   * @return string|string[]
9898
   *
9899
   * @deprecated <p>use "UTF8::to_latin1()"</p>
9900
   */
9901 2
  public static function toLatin1($str)
9902
  {
9903 2
    return self::to_latin1($str);
9904
  }
9905
9906
  /**
9907
   * alias for "UTF8::to_utf8()"
9908
   *
9909
   * @see        UTF8::to_utf8()
9910
   *
9911
   * @param string|string[] $str
9912
   *
9913
   * @return string|string[]
9914
   *
9915
   * @deprecated <p>use "UTF8::to_utf8()"</p>
9916
   */
9917 2
  public static function toUTF8($str)
9918
  {
9919 2
    return self::to_utf8($str);
9920
  }
9921
9922
  /**
9923
   * Convert a string into ASCII.
9924
   *
9925
   * @param string $str     <p>The input string.</p>
9926
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9927
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9928
   *                        performance</p>
9929
   *
9930
   * @return string
9931
   */
9932 37
  public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9933
  {
9934 37
    static $UTF8_TO_ASCII;
9935
9936 37
    if ('' === $str) {
9937 3
      return '';
9938
    }
9939
9940
    // check if we only have ASCII, first (better performance)
9941 34
    if (self::is_ascii($str) === true) {
9942 6
      return $str;
9943
    }
9944
9945 29
    $str = self::clean(
9946 29
        $str,
9947 29
        true,
9948 29
        true,
9949 29
        true,
9950 29
        false,
9951 29
        true,
9952 29
        true
9953
    );
9954
9955
    // check again, if we only have ASCII, now ...
9956 29
    if (self::is_ascii($str) === true) {
9957 12
      return $str;
9958
    }
9959
9960 18
    if ($strict === true) {
9961
9962 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9963
        self::checkForSupport();
9964
      }
9965
9966 1
      if (self::$SUPPORT['intl'] === true) {
9967
        // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9968
        /** @noinspection PhpComposerExtensionStubsInspection */
9969 1
        $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9970
9971
        // check again, if we only have ASCII, now ...
9972 1
        if (self::is_ascii($str) === true) {
9973 1
          return $str;
9974
        }
9975
9976
      }
9977
    }
9978
9979 18
    if (self::$ORD === null) {
9980
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
9981
    }
9982
9983 18
    \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9984 18
    $chars = $ar[0];
9985 18
    $ord = null;
9986 18
    foreach ($chars as &$c) {
9987
9988 18
      $ordC0 = self::$ORD[$c[0]];
9989
9990 18
      if ($ordC0 >= 0 && $ordC0 <= 127) {
9991 14
        continue;
9992
      }
9993
9994 18
      $ordC1 = self::$ORD[$c[1]];
9995
9996
      // ASCII - next please
9997 18
      if ($ordC0 >= 192 && $ordC0 <= 223) {
9998 16
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9999
      }
10000
10001 18
      if ($ordC0 >= 224) {
10002 7
        $ordC2 = self::$ORD[$c[2]];
10003
10004 7
        if ($ordC0 <= 239) {
10005 6
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
10006
        }
10007
10008 7
        if ($ordC0 >= 240) {
10009 2
          $ordC3 = self::$ORD[$c[3]];
10010
10011 2
          if ($ordC0 <= 247) {
10012 2
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
10013
          }
10014
10015 2
          if ($ordC0 >= 248) {
10016
            $ordC4 = self::$ORD[$c[4]];
10017
10018
            if ($ordC0 <= 251) {
10019
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
10020
            }
10021
10022
            if ($ordC0 >= 252) {
10023
              $ordC5 = self::$ORD[$c[5]];
10024
10025
              if ($ordC0 <= 253) {
10026
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
10027
              }
10028
            }
10029
          }
10030
        }
10031
      }
10032
10033 18
      if ($ordC0 === 254 || $ordC0 === 255) {
10034
        $c = $unknown;
10035
        continue;
10036
      }
10037
10038 18
      if ($ord === null) {
10039
        $c = $unknown;
10040
        continue;
10041
      }
10042
10043 18
      $bank = $ord >> 8;
10044 18
      if (!isset($UTF8_TO_ASCII[$bank])) {
10045 9
        $UTF8_TO_ASCII[$bank] = self::getData(\sprintf('x%02x', $bank));
10046 9
        if ($UTF8_TO_ASCII[$bank] === false) {
10047 2
          $UTF8_TO_ASCII[$bank] = [];
10048
        }
10049
      }
10050
10051 18
      $newchar = $ord & 255;
10052
10053 18
      if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
10054
10055
        // keep for debugging
10056
        /*
10057
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10058
        echo "char: " . $c . "\n";
10059
        echo "ord: " . $ord . "\n";
10060
        echo "newchar: " . $newchar . "\n";
10061
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
10062
        echo "bank:" . $bank . "\n\n";
10063
        */
10064
10065 17
        $c = $UTF8_TO_ASCII[$bank][$newchar];
10066
      } else {
10067
10068
        // keep for debugging missing chars
10069
        /*
10070
        echo "file: " . sprintf('x%02x', $bank) . "\n";
10071
        echo "char: " . $c . "\n";
10072
        echo "ord: " . $ord . "\n";
10073
        echo "newchar: " . $newchar . "\n";
10074
        echo "bank:" . $bank . "\n\n";
10075
        */
10076
10077 18
        $c = $unknown;
10078
      }
10079
    }
10080
10081 18
    return \implode('', $chars);
10082
  }
10083
10084
  /**
10085
   * @param mixed $str
10086
   *
10087
   * @return bool
10088
   */
10089 19
  public static function to_boolean($str): bool
10090
  {
10091
    // init
10092 19
    $str = (string)$str;
10093
10094 19
    if ('' === $str) {
10095 2
      return false;
10096
    }
10097
10098 17
    $key = \strtolower($str);
10099
10100
    // Info: http://php.net/manual/en/filter.filters.validate.php
10101
    $map = [
10102 17
        'true'  => true,
10103
        '1'     => true,
10104
        'on'    => true,
10105
        'yes'   => true,
10106
        'false' => false,
10107
        '0'     => false,
10108
        'off'   => false,
10109
        'no'    => false,
10110
    ];
10111
10112 17
    if (isset($map[$key])) {
10113 13
      return $map[$key];
10114
    }
10115
10116
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10117 4
    if (\is_numeric($str)) {
10118 2
      return (((float)$str + 0) > 0);
10119
    }
10120
10121 2
    return (bool)self::trim($str);
10122
  }
10123
10124
  /**
10125
   * Convert a string into "ISO-8859"-encoding (Latin-1).
10126
   *
10127
   * @param string|string[] $str
10128
   *
10129
   * @return string|string[]
10130
   */
10131 8
  public static function to_iso8859($str)
10132
  {
10133 8
    if (\is_array($str) === true) {
10134 2
      foreach ($str as $k => $v) {
10135 2
        $str[$k] = self::to_iso8859($v);
10136
      }
10137
10138 2
      return $str;
10139
    }
10140
10141 8
    $str = (string)$str;
10142 8
    if ('' === $str) {
10143 2
      return '';
10144
    }
10145
10146 8
    return self::utf8_decode($str);
10147
  }
10148
10149
  /**
10150
   * alias for "UTF8::to_iso8859()"
10151
   *
10152
   * @see UTF8::to_iso8859()
10153
   *
10154
   * @param string|string[] $str
10155
   *
10156
   * @return string|string[]
10157
   */
10158 2
  public static function to_latin1($str)
10159
  {
10160 2
    return self::to_iso8859($str);
10161
  }
10162
10163
  /**
10164
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10165
   *
10166
   * <ul>
10167
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10168
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10169
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10170
   * case.</li>
10171
   * </ul>
10172
   *
10173
   * @param string|string[] $str                    <p>Any string or array.</p>
10174
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10175
   *
10176
   * @return string|string[] The UTF-8 encoded string.
10177
   */
10178 38
  public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10179
  {
10180 38
    if (\is_array($str) === true) {
10181 4
      foreach ($str as $k => $v) {
10182 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10183
      }
10184
10185 4
      return $str;
10186
    }
10187
10188 38
    $str = (string)$str;
10189 38
    if ('' === $str) {
10190 6
      return $str;
10191
    }
10192
10193 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10194
      self::checkForSupport();
10195
    }
10196
10197 38
    $max = self::strlen_in_byte($str);
10198 38
    $buf = '';
10199
10200
    /** @noinspection ForeachInvariantsInspection */
10201 38
    for ($i = 0; $i < $max; $i++) {
10202 38
      $c1 = $str[$i];
10203
10204 38
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10205
10206 34
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10207
10208 31
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10209
10210 31
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10211 17
            $buf .= $c1 . $c2;
10212 17
            $i++;
10213
          } else { // not valid UTF8 - convert it
10214 31
            $buf .= self::to_utf8_convert_helper($c1);
10215
          }
10216
10217 34
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10218
10219 32
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10220 32
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10221
10222 32
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10223 14
            $buf .= $c1 . $c2 . $c3;
10224 14
            $i += 2;
10225
          } else { // not valid UTF8 - convert it
10226 32
            $buf .= self::to_utf8_convert_helper($c1);
10227
          }
10228
10229 26
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10230
10231 26
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10232 26
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10233 26
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10234
10235 26
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10236 8
            $buf .= $c1 . $c2 . $c3 . $c4;
10237 8
            $i += 3;
10238
          } else { // not valid UTF8 - convert it
10239 26
            $buf .= self::to_utf8_convert_helper($c1);
10240
          }
10241
10242
        } else { // doesn't look like UTF8, but should be converted
10243 34
          $buf .= self::to_utf8_convert_helper($c1);
10244
        }
10245
10246 35
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10247
10248 4
        $buf .= self::to_utf8_convert_helper($c1);
10249
10250
      } else { // it doesn't need conversion
10251 35
        $buf .= $c1;
10252
      }
10253
    }
10254
10255
    // decode unicode escape sequences
10256 38
    $buf = \preg_replace_callback(
10257 38
        '/\\\\u([0-9a-f]{4})/i',
10258 38
        function ($match) {
10259
          // always fallback via symfony polyfill
10260 8
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10261 38
        },
10262 38
        $buf
10263
    );
10264
10265
    // decode UTF-8 codepoints
10266 38
    if ($decodeHtmlEntityToUtf8 === true) {
10267 2
      $buf = self::html_entity_decode($buf);
10268
    }
10269
10270 38
    return $buf;
10271
  }
10272
10273
  /**
10274
   * @param int|string $input
10275
   *
10276
   * @return string
10277
   */
10278 30
  private static function to_utf8_convert_helper($input): string
10279
  {
10280
    // init
10281 30
    $buf = '';
10282
10283 30
    if (self::$ORD === null) {
10284 1
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10285
    }
10286
10287 30
    if (self::$CHR === null) {
10288 1
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10289
    }
10290
10291 30
    if (self::$WIN1252_TO_UTF8 === null) {
10292 1
      self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10293
    }
10294
10295 30
    $ordC1 = self::$ORD[$input];
10296 30
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
10297 30
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
10298
    } else {
10299 2
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
10300 2
      $cc2 = ((string)$input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
10301 2
      $buf .= $cc1 . $cc2;
10302
    }
10303
10304 30
    return $buf;
10305
  }
10306
10307
  /**
10308
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10309
   *
10310
   * INFO: This is slower then "trim()"
10311
   *
10312
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
10313
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10314
   *
10315
   * @param string $str   <p>The string to be trimmed</p>
10316
   * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10317
   *
10318
   * @return string The trimmed string.
10319
   */
10320 214
  public static function trim(string $str = '', $chars = INF): string
10321
  {
10322 214
    if ('' === $str) {
10323 11
      return '';
10324
    }
10325
10326
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10327 206
    if ($chars === INF || !$chars) {
10328 179
      $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10329
    } else {
10330 47
      $chars = \preg_quote($chars, '/');
10331 47
      $pattern = "^[$chars]+|[$chars]+\$";
10332
    }
10333
10334 206
    return self::regex_replace($str, $pattern, '', '', '/');
10335
  }
10336
10337
  /**
10338
   * Makes string's first char uppercase.
10339
   *
10340
   * @param string $str       <p>The input string.</p>
10341
   * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10342
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10343
   *
10344
   * @return string The resulting string.
10345
   */
10346 76
  public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10347
  {
10348 76
    if ($cleanUtf8 === true) {
10349
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10350
      // if invalid characters are found in $haystack before $needle
10351 1
      $str = self::clean($str);
10352
    }
10353
10354 76
    $strPartTwo = self::substr($str, 1, null, $encoding);
10355 76
    if ($strPartTwo === false) {
10356
      $strPartTwo = '';
10357
    }
10358
10359 76
    $strPartOne = self::strtoupper(
10360 76
        (string)self::substr($str, 0, 1, $encoding),
10361 76
        $encoding,
10362 76
        $cleanUtf8
10363
    );
10364
10365 76
    return $strPartOne . $strPartTwo;
10366
  }
10367
10368
  /**
10369
   * alias for "UTF8::ucfirst()"
10370
   *
10371
   * @see UTF8::ucfirst()
10372
   *
10373
   * @param string $str
10374
   * @param string $encoding
10375
   * @param bool   $cleanUtf8
10376
   *
10377
   * @return string
10378
   */
10379 1
  public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10380
  {
10381 1
    return self::ucfirst($str, $encoding, $cleanUtf8);
10382
  }
10383
10384
  /**
10385
   * Uppercase for all words in the string.
10386
   *
10387
   * @param string   $str        <p>The input string.</p>
10388
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10389
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
10390
   * @param string   $encoding   [optional] <p>Set the charset.</p>
10391
   * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10392
   *
10393
   * @return string
10394
   */
10395 9
  public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10396
  {
10397 9
    if (!$str) {
10398 2
      return '';
10399
    }
10400
10401
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
10402
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10403
10404 8
    if ($cleanUtf8 === true) {
10405
      // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10406
      // if invalid characters are found in $haystack before $needle
10407 1
      $str = self::clean($str);
10408
    }
10409
10410 8
    $usePhpDefaultFunctions = !(bool)($charlist . \implode('', $exceptions));
10411
10412
    if (
10413 8
        $usePhpDefaultFunctions === true
10414
        &&
10415 8
        self::is_ascii($str) === true
10416
    ) {
10417
      return \ucwords($str);
10418
    }
10419
10420 8
    $words = self::str_to_words($str, $charlist);
10421 8
    $newWords = [];
10422
10423 8
    if (\count($exceptions) > 0) {
10424 1
      $useExceptions = true;
10425
    } else {
10426 8
      $useExceptions = false;
10427
    }
10428
10429 8
    foreach ($words as $word) {
10430
10431 8
      if (!$word) {
10432 8
        continue;
10433
      }
10434
10435
      if (
10436 8
          $useExceptions === false
10437
          ||
10438
          (
10439 1
              $useExceptions === true
10440
              &&
10441 8
              !\in_array($word, $exceptions, true)
10442
          )
10443
      ) {
10444 8
        $word = self::ucfirst($word, $encoding);
10445
      }
10446
10447 8
      $newWords[] = $word;
10448
    }
10449
10450 8
    return \implode('', $newWords);
10451
  }
10452
10453
  /**
10454
   * Multi decode html entity & fix urlencoded-win1252-chars.
10455
   *
10456
   * e.g:
10457
   * 'test+test'                     => 'test test'
10458
   * 'D&#252;sseldorf'               => 'Düsseldorf'
10459
   * 'D%FCsseldorf'                  => 'Düsseldorf'
10460
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10461
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10462
   * 'Düsseldorf'                   => 'Düsseldorf'
10463
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10464
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10465
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10466
   *
10467
   * @param string $str          <p>The input string.</p>
10468
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
10469
   *
10470
   * @return string
10471
   */
10472 2
  public static function urldecode(string $str, bool $multi_decode = true): string
10473
  {
10474 2
    if ('' === $str) {
10475 2
      return '';
10476
    }
10477
10478 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
10479 2
    if (\preg_match($pattern, $str)) {
10480 2
      $str = (string)\preg_replace($pattern, '&#x\\1;', \urldecode($str));
10481
    }
10482
10483 2
    $flags = ENT_QUOTES | ENT_HTML5;
10484
10485
    do {
10486 2
      $str_compare = $str;
10487
10488 2
      $str = self::fix_simple_utf8(
10489 2
          \urldecode(
10490 2
              self::html_entity_decode(
10491 2
                  self::to_utf8($str),
10492 2
                  $flags
10493
              )
10494
          )
10495
      );
10496
10497 2
    } while ($multi_decode === true && $str_compare !== $str);
10498
10499 2
    return $str;
10500
  }
10501
10502
  /**
10503
   * Return a array with "urlencoded"-win1252 -> UTF-8
10504
   *
10505
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10506
   *
10507
   * @return string[]
10508
   */
10509 2
  public static function urldecode_fix_win1252_chars(): array
10510
  {
10511
    return [
10512 2
        '%20' => ' ',
10513
        '%21' => '!',
10514
        '%22' => '"',
10515
        '%23' => '#',
10516
        '%24' => '$',
10517
        '%25' => '%',
10518
        '%26' => '&',
10519
        '%27' => "'",
10520
        '%28' => '(',
10521
        '%29' => ')',
10522
        '%2A' => '*',
10523
        '%2B' => '+',
10524
        '%2C' => ',',
10525
        '%2D' => '-',
10526
        '%2E' => '.',
10527
        '%2F' => '/',
10528
        '%30' => '0',
10529
        '%31' => '1',
10530
        '%32' => '2',
10531
        '%33' => '3',
10532
        '%34' => '4',
10533
        '%35' => '5',
10534
        '%36' => '6',
10535
        '%37' => '7',
10536
        '%38' => '8',
10537
        '%39' => '9',
10538
        '%3A' => ':',
10539
        '%3B' => ';',
10540
        '%3C' => '<',
10541
        '%3D' => '=',
10542
        '%3E' => '>',
10543
        '%3F' => '?',
10544
        '%40' => '@',
10545
        '%41' => 'A',
10546
        '%42' => 'B',
10547
        '%43' => 'C',
10548
        '%44' => 'D',
10549
        '%45' => 'E',
10550
        '%46' => 'F',
10551
        '%47' => 'G',
10552
        '%48' => 'H',
10553
        '%49' => 'I',
10554
        '%4A' => 'J',
10555
        '%4B' => 'K',
10556
        '%4C' => 'L',
10557
        '%4D' => 'M',
10558
        '%4E' => 'N',
10559
        '%4F' => 'O',
10560
        '%50' => 'P',
10561
        '%51' => 'Q',
10562
        '%52' => 'R',
10563
        '%53' => 'S',
10564
        '%54' => 'T',
10565
        '%55' => 'U',
10566
        '%56' => 'V',
10567
        '%57' => 'W',
10568
        '%58' => 'X',
10569
        '%59' => 'Y',
10570
        '%5A' => 'Z',
10571
        '%5B' => '[',
10572
        '%5C' => '\\',
10573
        '%5D' => ']',
10574
        '%5E' => '^',
10575
        '%5F' => '_',
10576
        '%60' => '`',
10577
        '%61' => 'a',
10578
        '%62' => 'b',
10579
        '%63' => 'c',
10580
        '%64' => 'd',
10581
        '%65' => 'e',
10582
        '%66' => 'f',
10583
        '%67' => 'g',
10584
        '%68' => 'h',
10585
        '%69' => 'i',
10586
        '%6A' => 'j',
10587
        '%6B' => 'k',
10588
        '%6C' => 'l',
10589
        '%6D' => 'm',
10590
        '%6E' => 'n',
10591
        '%6F' => 'o',
10592
        '%70' => 'p',
10593
        '%71' => 'q',
10594
        '%72' => 'r',
10595
        '%73' => 's',
10596
        '%74' => 't',
10597
        '%75' => 'u',
10598
        '%76' => 'v',
10599
        '%77' => 'w',
10600
        '%78' => 'x',
10601
        '%79' => 'y',
10602
        '%7A' => 'z',
10603
        '%7B' => '{',
10604
        '%7C' => '|',
10605
        '%7D' => '}',
10606
        '%7E' => '~',
10607
        '%7F' => '',
10608
        '%80' => '`',
10609
        '%81' => '',
10610
        '%82' => '‚',
10611
        '%83' => 'ƒ',
10612
        '%84' => '„',
10613
        '%85' => '…',
10614
        '%86' => '†',
10615
        '%87' => '‡',
10616
        '%88' => 'ˆ',
10617
        '%89' => '‰',
10618
        '%8A' => 'Š',
10619
        '%8B' => '‹',
10620
        '%8C' => 'Œ',
10621
        '%8D' => '',
10622
        '%8E' => 'Ž',
10623
        '%8F' => '',
10624
        '%90' => '',
10625
        '%91' => '‘',
10626
        '%92' => '’',
10627
        '%93' => '“',
10628
        '%94' => '”',
10629
        '%95' => '•',
10630
        '%96' => '–',
10631
        '%97' => '—',
10632
        '%98' => '˜',
10633
        '%99' => '™',
10634
        '%9A' => 'š',
10635
        '%9B' => '›',
10636
        '%9C' => 'œ',
10637
        '%9D' => '',
10638
        '%9E' => 'ž',
10639
        '%9F' => 'Ÿ',
10640
        '%A0' => '',
10641
        '%A1' => '¡',
10642
        '%A2' => '¢',
10643
        '%A3' => '£',
10644
        '%A4' => '¤',
10645
        '%A5' => '¥',
10646
        '%A6' => '¦',
10647
        '%A7' => '§',
10648
        '%A8' => '¨',
10649
        '%A9' => '©',
10650
        '%AA' => 'ª',
10651
        '%AB' => '«',
10652
        '%AC' => '¬',
10653
        '%AD' => '',
10654
        '%AE' => '®',
10655
        '%AF' => '¯',
10656
        '%B0' => '°',
10657
        '%B1' => '±',
10658
        '%B2' => '²',
10659
        '%B3' => '³',
10660
        '%B4' => '´',
10661
        '%B5' => 'µ',
10662
        '%B6' => '¶',
10663
        '%B7' => '·',
10664
        '%B8' => '¸',
10665
        '%B9' => '¹',
10666
        '%BA' => 'º',
10667
        '%BB' => '»',
10668
        '%BC' => '¼',
10669
        '%BD' => '½',
10670
        '%BE' => '¾',
10671
        '%BF' => '¿',
10672
        '%C0' => 'À',
10673
        '%C1' => 'Á',
10674
        '%C2' => 'Â',
10675
        '%C3' => 'Ã',
10676
        '%C4' => 'Ä',
10677
        '%C5' => 'Å',
10678
        '%C6' => 'Æ',
10679
        '%C7' => 'Ç',
10680
        '%C8' => 'È',
10681
        '%C9' => 'É',
10682
        '%CA' => 'Ê',
10683
        '%CB' => 'Ë',
10684
        '%CC' => 'Ì',
10685
        '%CD' => 'Í',
10686
        '%CE' => 'Î',
10687
        '%CF' => 'Ï',
10688
        '%D0' => 'Ð',
10689
        '%D1' => 'Ñ',
10690
        '%D2' => 'Ò',
10691
        '%D3' => 'Ó',
10692
        '%D4' => 'Ô',
10693
        '%D5' => 'Õ',
10694
        '%D6' => 'Ö',
10695
        '%D7' => '×',
10696
        '%D8' => 'Ø',
10697
        '%D9' => 'Ù',
10698
        '%DA' => 'Ú',
10699
        '%DB' => 'Û',
10700
        '%DC' => 'Ü',
10701
        '%DD' => 'Ý',
10702
        '%DE' => 'Þ',
10703
        '%DF' => 'ß',
10704
        '%E0' => 'à',
10705
        '%E1' => 'á',
10706
        '%E2' => 'â',
10707
        '%E3' => 'ã',
10708
        '%E4' => 'ä',
10709
        '%E5' => 'å',
10710
        '%E6' => 'æ',
10711
        '%E7' => 'ç',
10712
        '%E8' => 'è',
10713
        '%E9' => 'é',
10714
        '%EA' => 'ê',
10715
        '%EB' => 'ë',
10716
        '%EC' => 'ì',
10717
        '%ED' => 'í',
10718
        '%EE' => 'î',
10719
        '%EF' => 'ï',
10720
        '%F0' => 'ð',
10721
        '%F1' => 'ñ',
10722
        '%F2' => 'ò',
10723
        '%F3' => 'ó',
10724
        '%F4' => 'ô',
10725
        '%F5' => 'õ',
10726
        '%F6' => 'ö',
10727
        '%F7' => '÷',
10728
        '%F8' => 'ø',
10729
        '%F9' => 'ù',
10730
        '%FA' => 'ú',
10731
        '%FB' => 'û',
10732
        '%FC' => 'ü',
10733
        '%FD' => 'ý',
10734
        '%FE' => 'þ',
10735
        '%FF' => 'ÿ',
10736
    ];
10737
  }
10738
10739
  /**
10740
   * Decodes an UTF-8 string to ISO-8859-1.
10741
   *
10742
   * @param string $str <p>The input string.</p>
10743
   * @param bool   $keepUtf8Chars
10744
   *
10745
   * @return string
10746
   */
10747 14
  public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10748
  {
10749 14
    if ('' === $str) {
10750 5
      return '';
10751
    }
10752
10753 14
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10754 14
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10755
10756 14
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10757
10758 1
      if (self::$WIN1252_TO_UTF8 === null) {
10759
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10760
      }
10761
10762 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10762
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10763 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10763
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10764
    }
10765
10766
    /** @noinspection PhpInternalEntityUsedInspection */
10767 14
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10768
10769 14
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10770
      self::checkForSupport();
10771
    }
10772
10773
    // save for later comparision
10774 14
    $str_backup = $str;
10775 14
    $len = self::strlen_in_byte($str);
10776
10777 14
    if (self::$ORD === null) {
10778
      self::$ORD = self::getData('ord');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('ord') can also be of type false. However, the property $ORD is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10779
    }
10780
10781 14
    if (self::$CHR === null) {
10782
      self::$CHR = self::getData('chr');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('chr') can also be of type false. However, the property $CHR is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10783
    }
10784
10785 14
    $noCharFound = '?';
10786
    /** @noinspection ForeachInvariantsInspection */
10787 14
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10788 14
      switch ($str[$i] & "\xF0") {
10789 14
        case "\xC0":
10790 12
        case "\xD0":
10791 14
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10792 14
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10793 14
          break;
10794
10795
        /** @noinspection PhpMissingBreakStatementInspection */
10796 12
        case "\xF0":
10797
          ++$i;
0 ignored issues
show
Coding Style Comprehensibility introduced by
Consider adding a comment if this fall-through is intended.
Loading history...
10798 12
        case "\xE0":
10799 10
          $str[$j] = $noCharFound;
10800 10
          $i += 2;
10801 10
          break;
10802
10803
        default:
10804 12
          $str[$j] = $str[$i];
10805
      }
10806
    }
10807
10808 14
    $return = self::substr_in_byte($str, 0, $j);
10809 14
    if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10810
      $return = '';
10811
    }
10812
10813
    if (
10814 14
        $keepUtf8Chars === true
10815
        &&
10816 14
        self::strlen($return) >= self::strlen($str_backup)
10817
    ) {
10818 2
      return $str_backup;
10819
    }
10820
10821 14
    return $return;
10822
  }
10823
10824
  /**
10825
   * Encodes an ISO-8859-1 string to UTF-8.
10826
   *
10827
   * @param string $str <p>The input string.</p>
10828
   *
10829
   * @return string
10830
   */
10831 14
  public static function utf8_encode(string $str): string
10832
  {
10833 14
    if ('' === $str) {
10834 13
      return '';
10835
    }
10836
10837 14
    $str = \utf8_encode($str);
10838
10839
    // the polyfill maybe return false
10840
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10841 14
    if ($str === false) {
10842
      return '';
10843
    }
10844
10845 14
    if (false === \strpos($str, "\xC2")) {
10846 6
      return $str;
10847
    }
10848
10849 12
    static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10850 12
    static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10851
10852 12
    if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10853
10854 1
      if (self::$WIN1252_TO_UTF8 === null) {
10855
        self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
0 ignored issues
show
Documentation Bug introduced by
It seems like self::getData('win1252_to_utf8') can also be of type false. However, the property $WIN1252_TO_UTF8 is declared as type null|array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
10856
      }
10857
10858 1
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_keys() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10858
      $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10859 1
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
0 ignored issues
show
Bug introduced by
It seems like self::WIN1252_TO_UTF8 can also be of type false; however, parameter $input of array_values() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

10859
      $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(/** @scrutinizer ignore-type */ self::$WIN1252_TO_UTF8);
Loading history...
10860
    }
10861
10862 12
    return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10863
  }
10864
10865
  /**
10866
   * fix -> utf8-win1252 chars
10867
   *
10868
   * @param string $str <p>The input string.</p>
10869
   *
10870
   * @return string
10871
   *
10872
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10873
   */
10874 2
  public static function utf8_fix_win1252_chars(string $str): string
10875
  {
10876 2
    return self::fix_simple_utf8($str);
10877
  }
10878
10879
  /**
10880
   * Returns an array with all utf8 whitespace characters.
10881
   *
10882
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10883
   *
10884
   * @author: Derek E. [email protected]
10885
   *
10886
   * @return string[]
10887
   *                 An array with all known whitespace characters as values and the type of whitespace as keys
10888
   *                 as defined in above URL.
10889
   */
10890 2
  public static function whitespace_table(): array
10891
  {
10892 2
    return self::$WHITESPACE_TABLE;
10893
  }
10894
10895
  /**
10896
   * Limit the number of words in a string.
10897
   *
10898
   * @param string $str      <p>The input string.</p>
10899
   * @param int    $limit    <p>The limit of words as integer.</p>
10900
   * @param string $strAddOn <p>Replacement for the striped string.</p>
10901
   *
10902
   * @return string
10903
   */
10904 2
  public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10905
  {
10906 2
    if ('' === $str) {
10907 2
      return '';
10908
    }
10909
10910 2
    if ($limit < 1) {
10911 2
      return '';
10912
    }
10913
10914 2
    \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10915
10916
    if (
10917 2
        !isset($matches[0])
10918
        ||
10919 2
        self::strlen($str) === self::strlen($matches[0])
10920
    ) {
10921 2
      return $str;
10922
    }
10923
10924 2
    return self::rtrim($matches[0]) . $strAddOn;
10925
  }
10926
10927
  /**
10928
   * Wraps a string to a given number of characters
10929
   *
10930
   * @link  http://php.net/manual/en/function.wordwrap.php
10931
   *
10932
   * @param string $str   <p>The input string.</p>
10933
   * @param int    $width [optional] <p>The column width.</p>
10934
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10935
   * @param bool   $cut   [optional] <p>
10936
   *                      If the cut is set to true, the string is
10937
   *                      always wrapped at or before the specified width. So if you have
10938
   *                      a word that is larger than the given width, it is broken apart.
10939
   *                      </p>
10940
   *
10941
   * @return string The given string wrapped at the specified column.
10942
   */
10943 10
  public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10944
  {
10945 10
    if ('' === $str || '' === $break) {
10946 3
      return '';
10947
    }
10948
10949 8
    $w = '';
10950 8
    $strSplit = \explode($break, $str);
10951 8
    if ($strSplit === false) {
10952
      $count = 0;
10953
    } else {
10954 8
      $count = \count($strSplit);
10955
    }
10956
10957 8
    $chars = [];
10958
    /** @noinspection ForeachInvariantsInspection */
10959 8
    for ($i = 0; $i < $count; ++$i) {
10960
10961 8
      if ($i) {
10962 1
        $chars[] = $break;
10963 1
        $w .= '#';
10964
      }
10965
10966 8
      $c = $strSplit[$i];
10967 8
      unset($strSplit[$i]);
10968
10969 8
      if ($c !== null) {
10970 8
        foreach (self::split($c) as $c) {
10971 8
          $chars[] = $c;
10972 8
          $w .= ' ' === $c ? ' ' : '?';
10973
        }
10974
      }
10975
    }
10976
10977 8
    $strReturn = '';
10978 8
    $j = 0;
10979 8
    $b = $i = -1;
10980 8
    $w = \wordwrap($w, $width, '#', $cut);
10981
10982 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
10983 6
      for (++$i; $i < $b; ++$i) {
10984 6
        $strReturn .= $chars[$j];
10985 6
        unset($chars[$j++]);
10986
      }
10987
10988 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
10989 3
        unset($chars[$j++]);
10990
      }
10991
10992 6
      $strReturn .= $break;
10993
    }
10994
10995 8
    return $strReturn . \implode('', $chars);
10996
  }
10997
10998
  /**
10999
   * Line-Wrap the string after $limit, but also after the next word.
11000
   *
11001
   * @param string $str
11002
   * @param int    $limit
11003
   *
11004
   * @return string
11005
   */
11006 1
  public static function wordwrap_per_line(string $str, int $limit): string
11007
  {
11008 1
    $strings = (array)\preg_split('/\\r\\n|\\r|\\n/', $str);
11009
11010 1
    $string = '';
11011 1
    foreach ($strings as $value) {
11012 1
      if ($value === false) {
11013
        continue;
11014
      }
11015
11016 1
      $string .= wordwrap($value, $limit);
11017 1
      $string .= "\n";
11018
    }
11019
11020 1
    return $string;
11021
  }
11022
11023
  /**
11024
   * Returns an array of Unicode White Space characters.
11025
   *
11026
   * @return string[] An array with numeric code point as key and White Space Character as value.
11027
   */
11028 2
  public static function ws(): array
11029
  {
11030 2
    return self::$WHITESPACE;
11031
  }
11032
11033
11034
}
11035