Completed
Push — master ( cff60c...e9ec3c )
by Lars
04:33
created

UTF8   D

Complexity

Total Complexity 868

Size/Duplication

Total Lines 7110
Duplicated Lines 10.76 %

Coupling/Cohesion

Components 2
Dependencies 3

Test Coverage

Coverage 85.33%

Importance

Changes 0
Metric Value
wmc 868
lcom 2
cbo 3
dl 765
loc 7110
ccs 1570
cts 1840
cp 0.8533
rs 4.4102
c 0
b 0
f 0

166 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A access() 0 4 1
A add_bom_to_string() 0 8 2
A binary_to_str() 0 4 1
A bom() 0 4 1
A callback() 0 4 1
A checkForSupport() 0 22 2
C chr() 0 49 10
A chr_map() 0 6 1
A chr_size_list() 0 8 2
B chr_to_decimal() 0 32 6
A chr_to_hex() 0 4 1
A chunk_split() 0 4 1
B clean() 0 35 4
A cleanup() 0 20 2
B codepoints() 0 26 3
A count_chars() 0 4 1
A decimal_to_chr() 0 8 1
C encode() 9 77 20
B file_get_contents() 0 35 6
A file_has_bom() 0 4 1
C filter() 11 52 13
A filter_input() 10 10 2
A filter_input_array() 10 10 2
A filter_var() 10 10 2
A filter_var_array() 10 10 2
A fits_inside() 0 4 1
A fix_simple_utf8() 19 19 3
B fix_utf8() 0 24 4
D getCharDirection() 0 112 119
A getData() 0 10 2
A hasBom() 0 4 1
A hex_to_int() 0 12 3
A html_decode() 0 4 1
B html_encode() 0 38 5
C html_entity_decode() 0 65 12
B htmlentities() 0 28 6
A htmlspecialchars() 0 8 2
A iconv_loaded() 0 15 3
A int_to_hex() 0 12 3
A intlChar_loaded() 0 4 2
A intl_loaded() 0 4 2
A isAscii() 0 4 1
A isBase64() 0 4 1
A isBinary() 0 4 1
A isBom() 0 4 1
A isHtml() 0 4 1
A isJson() 0 4 1
A isUtf16() 0 4 1
A isUtf32() 0 4 1
A isUtf8() 0 4 1
A is_ascii() 0 10 2
A is_base64() 0 14 3
B is_binary() 0 17 5
A is_binary_file() 0 12 2
A is_bom() 0 10 3
A is_html() 0 19 3
A is_json() 0 18 4
C is_utf16() 48 48 12
C is_utf32() 48 48 12
D is_utf8() 21 134 25
A json_decode() 0 12 2
A json_encode() 0 12 2
A lcfirst() 0 4 1
A ltrim() 15 15 4
A max() 8 8 2
A max_chr_width() 0 9 2
A mbstring_loaded() 0 10 3
A min() 8 8 2
A normalizeEncoding() 0 4 1
B normalize_encoding() 0 49 6
A normalize_msword() 19 19 3
B normalize_whitespace() 0 36 6
B number_format() 0 25 3
C ord() 0 47 15
A parse_str() 0 13 4
A pcre_utf8_support() 0 5 1
D range() 14 38 9
B rawurldecode() 31 31 6
A removeBOM() 0 4 1
A remove_bom() 0 10 3
A remove_duplicates() 0 15 4
A remove_invisible_characters() 0 20 3
B replace_diamond_question_mark() 0 30 3
A rtrim() 15 15 4
C rxClass() 0 40 8
A showSupport() 0 10 3
B single_chr_html_encode() 0 23 5
C split() 12 77 23
C str_detect_encoding() 0 82 11
A str_ends_with() 15 15 3
A str_iends_with() 15 15 3
A str_ireplace() 0 18 3
A str_istarts_with() 15 15 3
B str_limit_after_word() 0 31 5
C str_pad() 9 41 7
A str_repeat() 0 6 1
A str_replace() 0 4 1
A str_shuffle() 0 8 1
A str_sort() 0 16 3
B str_split() 0 36 6
A str_starts_with() 15 15 3
A str_to_binary() 0 8 1
A str_to_words() 0 12 2
A str_transliterate() 0 4 1
B str_word_count() 0 30 5
A strcasecmp() 0 4 1
A strchr() 0 4 1
A strcmp() 0 8 2
B strcspn() 0 22 6
A strichr() 0 4 1
A string() 0 13 1
A string_has_bom() 0 10 3
A strip_tags() 0 8 2
D stripos() 9 43 10
C stristr() 7 52 11
C strlen() 16 68 17
A strnatcasecmp() 0 4 1
A strnatcmp() 0 4 2
A strncasecmp() 0 4 1
A strncmp() 0 7 1
A strpbrk() 0 15 3
F strpos() 22 89 20
A strrchr() 15 16 3
A strrev() 0 10 2
A strrichr() 15 15 3
C strripos() 32 63 15
F strrpos() 38 77 19
B strspn() 0 17 5
D strstr() 7 58 13
B strtocasefold() 0 37 6
A strtolower() 21 21 4
A strtonatfold() 0 5 1
A strtoupper() 20 20 4
A strtr() 0 19 4
A strwidth() 0 15 3
F substr() 16 76 18
A substr_compare() 0 7 2
C substr_count() 7 56 12
A substr_ileft() 19 20 4
A substr_iright() 19 20 4
A substr_left() 19 20 4
C substr_replace() 20 73 15
A substr_right() 19 19 4
B swapCase() 0 34 5
A toAscii() 0 4 1
A toIso8859() 0 4 1
A toLatin1() 0 4 1
A toUTF8() 0 4 1
F to_ascii() 6 116 27
B to_iso8859() 0 22 4
A to_latin1() 0 4 1
D to_utf8() 20 102 27
A trim() 0 15 4
A ucfirst() 0 4 1
A ucword() 0 4 1
C ucwords() 0 38 8
B urldecode() 31 31 6
B urldecode_fix_win1252_chars() 0 231 1
A utf8_decode() 0 22 3
B utf8_encode() 0 26 4
A utf8_fix_win1252_chars() 0 4 1
A whitespace_table() 0 4 1
B words_limit() 0 26 5
C wordwrap() 0 51 10
A ws() 0 4 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complex Class

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $utf8ToWin1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $utf8MSWord = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $iconvEncoding = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $support = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    return self::substr($str, $pos, 1);
828
  }
829
830
  /**
831
   * Prepends UTF-8 BOM character to the string and returns the whole string.
832
   *
833
   * INFO: If BOM already existed there, the Input string is returned.
834 1
   *
835
   * @param string $str <p>The input string.</p>
836 1
   *
837 1
   * @return string <p>The output string that contains BOM.</p>
838 1
   */
839
  public static function add_bom_to_string($str)
840 1
  {
841
    if (self::string_has_bom($str) === false) {
842
      $str = self::bom() . $str;
843
    }
844
845
    return $str;
846
  }
847
848
  /**
849
   * Convert binary into an string.
850 1
   *
851
   * @param mixed $bin 1|0
852 1
   *
853
   * @return string
854
   */
855
  public static function binary_to_str($bin)
856
  {
857
    return pack('H*', base_convert($bin, 2, 16));
858
  }
859
860 2
  /**
861
   * Returns the UTF-8 Byte Order Mark Character.
862 2
   *
863
   * @return string UTF-8 Byte Order Mark
864
   */
865
  public static function bom()
866
  {
867
    return "\xEF\xBB\xBF";
868
  }
869
870
  /**
871
   * @alias of UTF8::chr_map()
872
   *
873
   * @see   UTF8::chr_map()
874 1
   *
875
   * @param string|array $callback
876 1
   * @param string       $str
877
   *
878
   * @return array
879
   */
880
  public static function callback($callback, $str)
881
  {
882
    return self::chr_map($callback, $str);
883
  }
884 2
885
  /**
886 2
   * This method will auto-detect your server environment for UTF-8 support.
887
   *
888 1
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
889
   */
890 1
  public static function checkForSupport()
891 1
  {
892 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
893 1
894 1
      self::$support['already_checked_via_portable_utf8'] = true;
895 1
896 2
      // http://php.net/manual/en/book.mbstring.php
897
      self::$support['mbstring'] = self::mbstring_loaded();
898
899
      // http://php.net/manual/en/book.iconv.php
900
      self::$support['iconv'] = self::iconv_loaded();
901
902
      // http://php.net/manual/en/book.intl.php
903
      self::$support['intl'] = self::intl_loaded();
904
905
      // http://php.net/manual/en/class.intlchar.php
906
      self::$support['intlChar'] = self::intlChar_loaded();
907 9
908
      // http://php.net/manual/en/book.pcre.php
909 9
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
910 9
    }
911 1
  }
912
913
  /**
914 9
   * Generates a UTF-8 encoded character from the given code point.
915
   *
916
   * INFO: opposite to UTF8::ord()
917
   *
918 9
   * @param int    $code_point <p>The code point for which to generate a character.</p>
919
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
920
   *
921
   * @return string|null <p>Multi-Byte character, returns null on failure to encode.</p>
922
   */
923 9
  public static function chr($code_point, $encoding = 'UTF-8')
924 9
  {
925 8
    $i = (int)$code_point;
926
    if ($i !== $code_point) {
927
      return null;
928
    }
929 8
930 6
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
931
      self::checkForSupport();
932
    }
933 7
934 6
    if ($encoding !== 'UTF-8') {
935 6
      $encoding = self::normalize_encoding($encoding);
936
    } elseif (self::$support['intlChar'] === true) {
937
      return \IntlChar::chr($code_point);
938 7
    }
939 7
940 7
    // use static cache, if there is no support for "IntlChar"
941 7
    static $cache = array();
942
    $cacheKey = $code_point . $encoding;
943
    if (isset($cache[$cacheKey]) === true) {
944 1
      return $cache[$cacheKey];
945 1
    }
946 1
947 1
    if (0x80 > $code_point %= 0x200000) {
948 1
      $str = chr($code_point);
949
    } elseif (0x800 > $code_point) {
950
      $str = chr(0xC0 | $code_point >> 6) .
951
             chr(0x80 | $code_point & 0x3F);
952
    } elseif (0x10000 > $code_point) {
953
      $str = chr(0xE0 | $code_point >> 12) .
954
             chr(0x80 | $code_point >> 6 & 0x3F) .
955
             chr(0x80 | $code_point & 0x3F);
956
    } else {
957
      $str = chr(0xF0 | $code_point >> 18) .
958
             chr(0x80 | $code_point >> 12 & 0x3F) .
959
             chr(0x80 | $code_point >> 6 & 0x3F) .
960
             chr(0x80 | $code_point & 0x3F);
961
    }
962
963 1
    if ($encoding !== 'UTF-8') {
964
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
965 1
    }
966
967 1
    // add into static cache
968
    $cache[$cacheKey] = $str;
969
970
    return $str;
971
  }
972
973
  /**
974
   * Applies callback to all characters of a string.
975
   *
976
   * @param string|array $callback <p>The callback function.</p>
977
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
978
   *
979
   * @return array <p>The outcome of callback.</p>
980
   */
981
  public static function chr_map($callback, $str)
982 4
  {
983
    $chars = self::split($str);
984 4
985 3
    return array_map($callback, $chars);
986
  }
987
988 4
  /**
989
   * Generates an array of byte length of each character of a Unicode string.
990
   *
991
   * 1 byte => U+0000  - U+007F
992
   * 2 byte => U+0080  - U+07FF
993
   * 3 byte => U+0800  - U+FFFF
994
   * 4 byte => U+10000 - U+10FFFF
995
   *
996
   * @param string $str <p>The original Unicode string.</p>
997
   *
998 2
   * @return array <p>An array of byte lengths of each character.</p>
999
   */
1000 2
  public static function chr_size_list($str)
1001 2
  {
1002 2
    if (!$str) {
1003
      return array();
1004 2
    }
1005
1006 2
    return array_map('strlen', self::split($str));
1007
  }
1008
1009 2
  /**
1010
   * Get a decimal code representation of a specific character.
1011 2
   *
1012 2
   * @param string $char <p>The input character.</p>
1013 2
   *
1014
   * @return int
1015 1
   */
1016 1
  public static function chr_to_decimal($char)
1017 1
  {
1018
    $char = (string)$char;
1019
    $code = self::ord($char[0]);
1020
    $bytes = 1;
1021
1022
    if (!($code & 0x80)) {
1023 2
      // 0xxxxxxx
1024
      return $code;
1025 2
    }
1026 2
1027
    if (($code & 0xe0) === 0xc0) {
1028 2
      // 110xxxxx
1029
      $bytes = 2;
1030
      $code &= ~0xc0;
1031
    } elseif (($code & 0xf0) === 0xe0) {
1032
      // 1110xxxx
1033
      $bytes = 3;
1034
      $code &= ~0xe0;
1035
    } elseif (($code & 0xf8) === 0xf0) {
1036
      // 11110xxx
1037
      $bytes = 4;
1038
      $code &= ~0xf0;
1039 1
    }
1040
1041 1
    for ($i = 2; $i <= $bytes; $i++) {
1042
      // 10xxxxxx
1043
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1044
    }
1045
1046
    return $code;
1047
  }
1048
1049
  /**
1050
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1051
   *
1052
   * @param string $char <p>The input character</p>
1053 1
   * @param string $pfix [optional]
1054
   *
1055 1
   * @return string <p>The code point encoded as U+xxxx<p>
1056
   */
1057
  public static function chr_to_hex($char, $pfix = 'U+')
1058
  {
1059
    return self::int_to_hex(self::ord($char), $pfix);
1060
  }
1061
1062
  /**
1063
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1064
   *
1065
   * @param string $body     <p>The original string to be split.</p>
1066
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1067
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1068
   *
1069
   * @return string <p>The chunked string</p>
1070
   */
1071 44
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1072
  {
1073
    return implode($end, self::split($body, $chunklen));
1074
  }
1075
1076
  /**
1077
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1078
   *
1079
   * @param string $str                     <p>The string to be sanitized.</p>
1080
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1081
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1082
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1083
   *                                        => "..."</p>
1084
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1085
   *                                        $normalize_whitespace</p>
1086 44
   *
1087 44
   * @return string <p>Clean UTF-8 encoded string.</p>
1088
   */
1089 44
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1090 44
  {
1091
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1092 44
    // caused connection reset problem on larger strings
1093 17
1094 17
    $regx = '/
1095
      (
1096 44
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1097 12
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1098 12
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1099
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1100 44
        ){1,100}                      # ...one or more times
1101 5
      )
1102 5
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1103
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1104 44
    /x';
1105
    $str = preg_replace($regx, '$1', $str);
1106
1107
    $str = self::replace_diamond_question_mark($str, '');
1108
    $str = self::remove_invisible_characters($str);
1109
1110
    if ($normalize_whitespace === true) {
1111
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1112
    }
1113
1114 4
    if ($normalize_msword === true) {
1115
      $str = self::normalize_msword($str);
1116 4
    }
1117
1118 4
    if ($remove_bom === true) {
1119 1
      $str = self::remove_bom($str);
1120
    }
1121
1122
    return $str;
1123 4
  }
1124
1125
  /**
1126
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1127
   *
1128
   * @param string $str <p>The input string.</p>
1129
   *
1130 4
   * @return string
1131
   */
1132 4
  public static function cleanup($str)
1133
  {
1134
    $str = (string)$str;
1135
1136
    if (!isset($str[0])) {
1137
      return '';
1138
    }
1139
1140
    // fixed ISO <-> UTF-8 Errors
1141
    $str = self::fix_simple_utf8($str);
1142
1143
    // remove all none UTF-8 symbols
1144
    // && remove diamond question mark (�)
1145
    // && remove remove invisible characters (e.g. "\0")
1146 5
    // && remove BOM
1147
    // && normalize whitespace chars (but keep non-breaking-spaces)
1148 5
    $str = self::clean($str, true, true, false, true);
1149 5
1150 5
    return (string)$str;
1151
  }
1152 5
1153
  /**
1154 5
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1155 5
   *
1156 5
   * INFO: opposite to UTF8::string()
1157
   *
1158 5
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1159
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1160 5
   *                                    default, code points will be returned as integers.</p>
1161 1
   *
1162
   * @return array <p>The array of code points.</p>
1163 1
   */
1164 1
  public static function codepoints($arg, $u_style = false)
1165 1
  {
1166
    if (is_string($arg)) {
1167 1
      $arg = self::split($arg);
1168 1
    }
1169
1170 5
    $arg = array_map(
1171
        array(
1172
            '\\voku\\helper\\UTF8',
1173
            'ord',
1174
        ),
1175
        $arg
1176
    );
1177
1178
    if ($u_style) {
1179
      $arg = array_map(
1180
          array(
1181
              '\\voku\\helper\\UTF8',
1182 6
              'int_to_hex',
1183
          ),
1184 6
          $arg
1185
      );
1186
    }
1187
1188
    return $arg;
1189
  }
1190
1191
  /**
1192
   * Returns count of characters used in a string.
1193
   *
1194 1
   * @param string $str       <p>The input string.</p>
1195
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1196 1
   *
1197 1
   * @return array <p>An associative array of Character as keys and
1198 1
   *               their count as values.</p>
1199
   */
1200 1
  public static function count_chars($str, $cleanUtf8 = false)
1201
  {
1202
    return array_count_values(self::split($str, 1, $cleanUtf8));
1203
  }
1204
1205
  /**
1206
   * Get a UTF-8 character from its decimal code representation.
1207
   *
1208
   * @param int $code
1209
   *
1210
   * @return string
1211
   */
1212
  public static function decimal_to_chr($code)
1213
  {
1214
    return \mb_convert_encoding(
1215
        '&#x' . dechex($code) . ';',
1216 11
        'UTF-8',
1217
        'HTML-ENTITIES'
1218 11
    );
1219 11
  }
1220
1221 11
  /**
1222 5
   * Encode a string with a new charset-encoding.
1223
   *
1224
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1225 11
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1226 1
   *
1227 1
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1228
   * @param string $str      <p>The input string</p>
1229 11
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1230
   *                         /> otherwise we auto-detect the current string-encoding</p>
1231
   *
1232
   * @return string
1233 11
   */
1234
  public static function encode($encoding, $str, $force = true)
1235
  {
1236 11
    $str = (string)$str;
1237
    $encoding = (string)$encoding;
1238 1
1239 11
    if (!isset($str[0], $encoding[0])) {
1240
      return $str;
1241
    }
1242
1243 11
    if ($encoding !== 'UTF-8') {
1244
      $encoding = self::normalize_encoding($encoding);
1245
    }
1246 11
1247 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1248 1
      self::checkForSupport();
1249 1
    }
1250 11
1251 11
    $encodingDetected = self::str_detect_encoding($str);
1252
1253
    if (
1254
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1255
        &&
1256 2
        (
1257
            $force === true
1258
            ||
1259 1
            $encodingDetected !== $encoding
1260
        )
1261
    ) {
1262 2
1263 1
      if (
1264
          $encoding === 'UTF-8'
1265
          &&
1266 2
          (
1267 2
              $force === true
1268 2
              || $encodingDetected === 'UTF-8'
1269
              || $encodingDetected === 'WINDOWS-1252'
1270 2
              || $encodingDetected === 'ISO-8859-1'
1271
          )
1272 2
      ) {
1273 2
        return self::to_utf8($str);
1274
      }
1275
1276
      if (
1277 1
          $encoding === 'ISO-8859-1'
1278
          &&
1279
          (
1280
              $force === true
1281
              || $encodingDetected === 'ISO-8859-1'
1282
              || $encodingDetected === 'UTF-8'
1283
          )
1284
      ) {
1285
        return self::to_iso8859($str);
1286
      }
1287
1288 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1289
          $encoding !== 'UTF-8'
1290
          &&
1291
          $encoding !== 'WINDOWS-1252'
1292
          &&
1293
          self::$support['mbstring'] === false
1294
      ) {
1295
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1296
      }
1297
1298
      $strEncoded = \mb_convert_encoding(
1299
          $str,
1300
          $encoding,
1301
          $encodingDetected
1302
      );
1303
1304
      if ($strEncoded) {
1305
        return $strEncoded;
1306
      }
1307
    }
1308
1309
    return $str;
1310
  }
1311
1312
  /**
1313
   * Reads entire file into a string.
1314
   *
1315
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1316
   *
1317
   * @link http://php.net/manual/en/function.file-get-contents.php
1318
   *
1319
   * @param string        $filename      <p>
1320
   *                                     Name of the file to read.
1321
   *                                     </p>
1322
   * @param int|null      $flags         [optional] <p>
1323
   *                                     Prior to PHP 6, this parameter is called
1324
   *                                     use_include_path and is a bool.
1325
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1326
   *                                     to trigger include path
1327
   *                                     search.
1328
   *                                     </p>
1329
   *                                     <p>
1330
   *                                     The value of flags can be any combination of
1331
   *                                     the following flags (with some restrictions), joined with the
1332
   *                                     binary OR (|)
1333
   *                                     operator.
1334
   *                                     </p>
1335
   *                                     <p>
1336
   *                                     <table>
1337
   *                                     Available flags
1338
   *                                     <tr valign="top">
1339
   *                                     <td>Flag</td>
1340
   *                                     <td>Description</td>
1341
   *                                     </tr>
1342
   *                                     <tr valign="top">
1343
   *                                     <td>
1344
   *                                     FILE_USE_INCLUDE_PATH
1345
   *                                     </td>
1346
   *                                     <td>
1347
   *                                     Search for filename in the include directory.
1348
   *                                     See include_path for more
1349
   *                                     information.
1350
   *                                     </td>
1351
   *                                     </tr>
1352
   *                                     <tr valign="top">
1353
   *                                     <td>
1354
   *                                     FILE_TEXT
1355
   *                                     </td>
1356
   *                                     <td>
1357
   *                                     As of PHP 6, the default encoding of the read
1358
   *                                     data is UTF-8. You can specify a different encoding by creating a
1359
   *                                     custom context or by changing the default using
1360
   *                                     stream_default_encoding. This flag cannot be
1361
   *                                     used with FILE_BINARY.
1362 2
   *                                     </td>
1363
   *                                     </tr>
1364
   *                                     <tr valign="top">
1365 2
   *                                     <td>
1366 2
   *                                     FILE_BINARY
1367
   *                                     </td>
1368 2
   *                                     <td>
1369 2
   *                                     With this flag, the file is read in binary mode. This is the default
1370
   *                                     setting and cannot be used with FILE_TEXT.
1371
   *                                     </td>
1372
   *                                     </tr>
1373 2
   *                                     </table>
1374 2
   *                                     </p>
1375
   * @param resource|null $context       [optional] <p>
1376 2
   *                                     A valid context resource created with
1377 2
   *                                     stream_context_create. If you don't need to use a
1378
   *                                     custom context, you can skip this parameter by &null;.
1379 2
   *                                     </p>
1380 1
   * @param int|null      $offset        [optional] <p>
1381 1
   *                                     The offset where the reading starts.
1382 2
   *                                     </p>
1383
   * @param int|null      $maxlen        [optional] <p>
1384
   *                                     Maximum length of data read. The default is to read until end
1385
   *                                     of file is reached.
1386 2
   *                                     </p>
1387 1
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1388
   *
1389
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1390 1
   *                                     or pdf, because they used non default utf-8 chars</p>
1391 1
   *
1392 1
   * @return string <p>The function returns the read data or false on failure.</p>
1393 1
   */
1394
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1395 1
  {
1396
    // init
1397
    $timeout = (int)$timeout;
1398
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1399
1400
    if ($timeout && $context === null) {
1401
      $context = stream_context_create(
1402
          array(
1403
              'http' =>
1404
                  array(
1405 1
                      'timeout' => $timeout,
1406
                  ),
1407 1
          )
1408
      );
1409
    }
1410
1411
    if (is_int($maxlen)) {
1412
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1413
    } else {
1414
      $data = file_get_contents($filename, $flags, $context, $offset);
1415
    }
1416
1417
    // return false on error
1418
    if ($data === false) {
1419 9
      return false;
1420
    }
1421 9
1422 9
    if ($convertToUtf8 === true) {
1423 3
      $data = self::encode('UTF-8', $data, false);
1424
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1425 3
    }
1426 3
1427 3
    return $data;
1428 9
  }
1429 2
1430 2
  /**
1431 2
   * Checks if a file starts with BOM (Byte Order Mark) character.
1432 2
   *
1433 9
   * @param string $file_path <p>Path to a valid file.</p>
1434
   *
1435 8
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1436
   */
1437 2
  public static function file_has_bom($file_path)
1438 2
  {
1439
    return self::string_has_bom(file_get_contents($file_path));
1440 8
  }
1441
1442 8
  /**
1443 6
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1444 6
   *
1445 6
   * @param mixed  $var
1446
   * @param int    $normalization_form
1447 6
   * @param string $leading_combining
1448 3
   *
1449 3
   * @return mixed
1450 5
   */
1451
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1452
  {
1453
    switch (gettype($var)) {
1454 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1455 8
        foreach ($var as $k => $v) {
1456 8
          /** @noinspection AlterInForeachInspection */
1457 5
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1458 8
        }
1459
        break;
1460 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1461 2
        foreach ($var as $k => $v) {
1462 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1463 8
        }
1464 8
        break;
1465 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1466
1467 9
        if (false !== strpos($var, "\r")) {
1468
          // Workaround https://bugs.php.net/65732
1469
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1470
        }
1471
1472
        if (self::is_ascii($var) === false) {
1473
          /** @noinspection PhpUndefinedClassInspection */
1474
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1475
            $n = '-';
1476
          } else {
1477
            /** @noinspection PhpUndefinedClassInspection */
1478
            $n = \Normalizer::normalize($var, $normalization_form);
1479
1480
            if (isset($n[0])) {
1481
              $var = $n;
1482
            } else {
1483
              $var = self::encode('UTF-8', $var);
1484
            }
1485
          }
1486
1487
          if (
1488
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1489
              &&
1490
              preg_match('/^\p{Mn}/u', $var)
1491
          ) {
1492
            // Prevent leading combining chars
1493
            // for NFC-safe concatenations.
1494
            $var = $leading_combining . $var;
1495
          }
1496
        }
1497
1498
        break;
1499
    }
1500
1501
    return $var;
1502
  }
1503
1504
  /**
1505
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1506
   *
1507
   * Gets a specific external variable by name and optionally filters it
1508
   *
1509
   * @link  http://php.net/manual/en/function.filter-input.php
1510
   *
1511
   * @param int    $type          <p>
1512
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1513
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1514
   *                              <b>INPUT_ENV</b>.
1515
   *                              </p>
1516
   * @param string $variable_name <p>
1517
   *                              Name of a variable to get.
1518
   *                              </p>
1519
   * @param int    $filter        [optional] <p>
1520 1
   *                              The ID of the filter to apply. The
1521
   *                              manual page lists the available filters.
1522 1
   *                              </p>
1523 1
   * @param mixed  $options       [optional] <p>
1524 1
   *                              Associative array of options or bitwise disjunction of flags. If filter
1525 1
   *                              accepts options, flags can be provided in "flags" field of array.
1526
   *                              </p>
1527
   *
1528 1
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1529
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1530
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1531
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1532
   * @since 5.2.0
1533
   */
1534 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1535
  {
1536
    if (4 > func_num_args()) {
1537
      $var = filter_input($type, $variable_name, $filter);
1538
    } else {
1539
      $var = filter_input($type, $variable_name, $filter, $options);
1540 1
    }
1541
1542 1
    return self::filter($var);
1543 1
  }
1544 1
1545 1
  /**
1546
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1547
   *
1548 1
   * Gets external variables and optionally filters them
1549
   *
1550
   * @link  http://php.net/manual/en/function.filter-input-array.php
1551
   *
1552
   * @param int   $type       <p>
1553
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1554
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1555
   *                          <b>INPUT_ENV</b>.
1556
   *                          </p>
1557
   * @param mixed $definition [optional] <p>
1558
   *                          An array defining the arguments. A valid key is a string
1559 1
   *                          containing a variable name and a valid value is either a filter type, or an array
1560
   *                          optionally specifying the filter, flags and options. If the value is an
1561 1
   *                          array, valid keys are filter which specifies the
1562
   *                          filter type,
1563
   *                          flags which specifies any flags that apply to the
1564
   *                          filter, and options which specifies any options that
1565
   *                          apply to the filter. See the example below for a better understanding.
1566
   *                          </p>
1567
   *                          <p>
1568
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1569
   *                          input array are filtered by this filter.
1570
   *                          </p>
1571
   * @param bool  $add_empty  [optional] <p>
1572
   *                          Add missing keys as <b>NULL</b> to the return value.
1573
   *                          </p>
1574
   *
1575
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1576
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1577 7
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1578
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1579 7
   * fails.
1580 7
   * @since 5.2.0
1581
   */
1582 7 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1583
  {
1584 7
    if (2 > func_num_args()) {
1585 2
      $a = filter_input_array($type);
1586
    } else {
1587
      $a = filter_input_array($type, $definition, $add_empty);
1588 7
    }
1589 1
1590 1
    return self::filter($a);
1591 1
  }
1592
1593 7
  /**
1594
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1595
   *
1596
   * Filters a variable with a specified filter
1597
   *
1598
   * @link  http://php.net/manual/en/function.filter-var.php
1599
   *
1600
   * @param mixed $variable <p>
1601
   *                        Value to filter.
1602
   *                        </p>
1603 1
   * @param int   $filter   [optional] <p>
1604
   *                        The ID of the filter to apply. The
1605 1
   *                        manual page lists the available filters.
1606
   *                        </p>
1607 1
   * @param mixed $options  [optional] <p>
1608
   *                        Associative array of options or bitwise disjunction of flags. If filter
1609
   *                        accepts options, flags can be provided in "flags" field of array. For
1610 1
   *                        the "callback" filter, callable type should be passed. The
1611 1
   *                        callback must accept one argument, the value to be filtered, and return
1612
   *                        the value after filtering/sanitizing it.
1613 1
   *                        </p>
1614
   *                        <p>
1615
   *                        <code>
1616 1
   *                        // for filters that accept options, use this format
1617 1
   *                        $options = array(
1618 1
   *                        'options' => array(
1619 1
   *                        'default' => 3, // value to return if the filter fails
1620 1
   *                        // other options here
1621
   *                        'min_range' => 0
1622 1
   *                        ),
1623
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1624
   *                        );
1625
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1626
   *                        // for filter that only accept flags, you can pass them directly
1627
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1628
   *                        // for filter that only accept flags, you can also pass as an array
1629
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1630
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1631
   *                        // callback validate filter
1632 1
   *                        function foo($value)
1633
   *                        {
1634 1
   *                        // Expected format: Surname, GivenNames
1635
   *                        if (strpos($value, ", ") === false) return false;
1636
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1637
   *                        $empty = (empty($surname) || empty($givennames));
1638 1
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1639
   *                        if ($empty || $notstrings) {
1640
   *                        return false;
1641
   *                        } else {
1642
   *                        return $value;
1643
   *                        }
1644
   *                        }
1645
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1646
   *                        </code>
1647
   *                        </p>
1648
   *
1649
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1650
   * @since 5.2.0
1651
   */
1652 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1653
  {
1654 1
    if (3 > func_num_args()) {
1655
      $variable = filter_var($variable, $filter);
1656 1
    } else {
1657 1
      $variable = filter_var($variable, $filter, $options);
1658
    }
1659
1660 1
    return self::filter($variable);
1661
  }
1662 1
1663 1
  /**
1664 1
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1665 1
   *
1666 1
   * Gets multiple variables and optionally filters them
1667 1
   *
1668 1
   * @link  http://php.net/manual/en/function.filter-var-array.php
1669 1
   *
1670 1
   * @param array $data       <p>
1671 1
   *                          An array with string keys containing the data to filter.
1672 1
   *                          </p>
1673
   * @param mixed $definition [optional] <p>
1674
   *                          An array defining the arguments. A valid key is a string
1675
   *                          containing a variable name and a valid value is either a
1676
   *                          filter type, or an
1677
   *                          array optionally specifying the filter, flags and options.
1678
   *                          If the value is an array, valid keys are filter
1679
   *                          which specifies the filter type,
1680
   *                          flags which specifies any flags that apply to the
1681
   *                          filter, and options which specifies any options that
1682
   *                          apply to the filter. See the example below for a better understanding.
1683
   *                          </p>
1684
   *                          <p>
1685
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1686
   *                          input array are filtered by this filter.
1687
   *                          </p>
1688
   * @param bool  $add_empty  [optional] <p>
1689
   *                          Add missing keys as <b>NULL</b> to the return value.
1690
   *                          </p>
1691
   *
1692 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1693 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1694
   * the variable is not set.
1695
   * @since 5.2.0
1696
   */
1697 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1698
  {
1699
    if (2 > func_num_args()) {
1700
      $a = filter_var_array($data);
1701
    } else {
1702
      $a = filter_var_array($data, $definition, $add_empty);
1703
    }
1704
1705
    return self::filter($a);
1706
  }
1707
1708
  /**
1709
   * Check if the number of unicode characters are not more than the specified integer.
1710
   *
1711
   * @param string $str      The original string to be checked.
1712
   * @param int    $box_size The size in number of chars to be checked against string.
1713
   *
1714
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1715
   */
1716
  public static function fits_inside($str, $box_size)
1717
  {
1718
    return (self::strlen($str) <= $box_size);
1719
  }
1720
1721
  /**
1722
   * Try to fix simple broken UTF-8 strings.
1723
   *
1724
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1725
   *
1726
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1727
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1728
   * See: http://en.wikipedia.org/wiki/Windows-1252
1729
   *
1730
   * @param string $str <p>The input string</p>
1731
   *
1732
   * @return string
1733
   */
1734 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1735
  {
1736
    // init
1737
    $str = (string)$str;
1738
1739
    if (!isset($str[0])) {
1740
      return '';
1741
    }
1742
1743
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1744
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1745
1746
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1747
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1748
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1749
    }
1750
1751
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1752 1
  }
1753
1754 1
  /**
1755 1
   * Fix a double (or multiple) encoded UTF8 string.
1756
   *
1757 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1758
   *
1759
   * @return mixed
1760
   */
1761
  public static function fix_utf8($str)
1762
  {
1763
    if (is_array($str)) {
1764
1765
      /** @noinspection ForeachSourceInspection */
1766
      foreach ($str as $k => $v) {
1767
        /** @noinspection AlterInForeachInspection */
1768
        /** @noinspection OffsetOperationsInspection */
1769
        $str[$k] = self::fix_utf8($v);
1770
      }
1771
1772 1
      return $str;
1773
    }
1774 1
1775
    $last = '';
1776
    while ($last !== $str) {
1777
      $last = $str;
1778
      $str = self::to_utf8(
1779
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1778 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1780
      );
1781
    }
1782
1783
    return $str;
1784
  }
1785
1786 1
  /**
1787
   * Get character of a specific character.
1788 1
   *
1789 1
   * @param string $char
1790
   *
1791
   * @return string <p>'RTL' or 'LTR'</p>
1792 1
   */
1793 1
  public static function getCharDirection($char)
1794
  {
1795
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1796 1
      self::checkForSupport();
1797
    }
1798
1799
    if (self::$support['intlChar'] === true) {
1800
      $tmpReturn = \IntlChar::charDirection($char);
1801
1802
      // from "IntlChar"-Class
1803
      $charDirection = array(
1804
          'RTL' => array(1, 13, 14, 15, 21),
1805
          'LTR' => array(0, 11, 12, 20),
1806
      );
1807
1808
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1809
        return 'LTR';
1810 1
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1811
        return 'RTL';
1812 1
      }
1813
    }
1814
1815
    $c = static::chr_to_decimal($char);
1816
1817
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1818
      return 'LTR';
1819
    }
1820
1821
    if (0x85e >= $c) {
1822
1823
      if (0x5be === $c ||
1824
          0x5c0 === $c ||
1825
          0x5c3 === $c ||
1826 2
          0x5c6 === $c ||
1827
          (0x5d0 <= $c && 0x5ea >= $c) ||
1828
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1829 2
          0x608 === $c ||
1830
          0x60b === $c ||
1831 2
          0x60d === $c ||
1832 2
          0x61b === $c ||
1833 1
          (0x61e <= $c && 0x64a >= $c) ||
1834 1
          (0x66d <= $c && 0x66f >= $c) ||
1835
          (0x671 <= $c && 0x6d5 >= $c) ||
1836 2
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1837 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1838 1
          (0x6fa <= $c && 0x70d >= $c) ||
1839
          0x710 === $c ||
1840 2
          (0x712 <= $c && 0x72f >= $c) ||
1841 2
          (0x74d <= $c && 0x7a5 >= $c) ||
1842 2
          0x7b1 === $c ||
1843
          (0x7c0 <= $c && 0x7ea >= $c) ||
1844 2
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1845
          0x7fa === $c ||
1846
          (0x800 <= $c && 0x815 >= $c) ||
1847
          0x81a === $c ||
1848
          0x824 === $c ||
1849
          0x828 === $c ||
1850
          (0x830 <= $c && 0x83e >= $c) ||
1851
          (0x840 <= $c && 0x858 >= $c) ||
1852
          0x85e === $c
1853
      ) {
1854
        return 'RTL';
1855
      }
1856
1857
    } elseif (0x200f === $c) {
1858
1859
      return 'RTL';
1860
1861
    } elseif (0xfb1d <= $c) {
1862
1863
      if (0xfb1d === $c ||
1864
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1865
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1866
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1867
          0xfb3e === $c ||
1868
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1869
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1870
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1871
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1872
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1873
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1874
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1875
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1876
          (0xfe76 <= $c && 0xfefc >= $c) ||
1877
          (0x10800 <= $c && 0x10805 >= $c) ||
1878
          0x10808 === $c ||
1879
          (0x1080a <= $c && 0x10835 >= $c) ||
1880
          (0x10837 <= $c && 0x10838 >= $c) ||
1881
          0x1083c === $c ||
1882
          (0x1083f <= $c && 0x10855 >= $c) ||
1883
          (0x10857 <= $c && 0x1085f >= $c) ||
1884
          (0x10900 <= $c && 0x1091b >= $c) ||
1885
          (0x10920 <= $c && 0x10939 >= $c) ||
1886
          0x1093f === $c ||
1887
          0x10a00 === $c ||
1888
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1889
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1890
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1891
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1892
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1893
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1894
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1895
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1896
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1897
          (0x10b78 <= $c && 0x10b7f >= $c)
1898
      ) {
1899
        return 'RTL';
1900
      }
1901
    }
1902
1903
    return 'LTR';
1904
  }
1905
1906
  /**
1907
   * get data from "/data/*.ser"
1908
   *
1909
   * @param string $file
1910
   *
1911
   * @return bool|string|array|int <p>Will return false on error.</p>
1912
   */
1913
  private static function getData($file)
1914
  {
1915
    $file = __DIR__ . '/data/' . $file . '.php';
1916
    if (file_exists($file)) {
1917
      /** @noinspection PhpIncludeInspection */
1918
      return require $file;
1919
    } else {
1920
      return false;
1921
    }
1922
  }
1923
1924
  /**
1925
   * alias for "UTF8::string_has_bom()"
1926 9
   *
1927
   * @see UTF8::string_has_bom()
1928 9
   *
1929
   * @param string $str
1930 9
   *
1931 6
   * @return bool
1932
   *
1933
   * @deprecated
1934 9
   */
1935 7
  public static function hasBom($str)
1936
  {
1937
    return self::string_has_bom($str);
1938
  }
1939 9
1940 9
  /**
1941
   * Converts hexadecimal U+xxxx code point representation to integer.
1942 9
   *
1943 9
   * INFO: opposite to UTF8::int_to_hex()
1944 9
   *
1945 9
   * @param string $str <p>The hexadecimal code point representation.</p>
1946 9
   *
1947 6
   * @return int|false <p>The code point, or false on failure.</p>
1948
   */
1949
  public static function hex_to_int($str)
1950 9
  {
1951 2
    if (!$str) {
1952 2
      return false;
1953
    }
1954 9
1955 4
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
1956 4
      return intval($match[1], 16);
1957 4
    }
1958
1959
    return false;
1960 4
  }
1961
1962
  /**
1963 9
   * alias for "UTF8::html_entity_decode()"
1964
   *
1965 9
   * @see UTF8::html_entity_decode()
1966 9
   *
1967
   * @param string $str
1968 7
   * @param int    $flags
1969
   * @param string $encoding
1970 7
   *
1971 6
   * @return string
1972
   */
1973 4
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1974
  {
1975 9
    return self::html_entity_decode($str, $flags, $encoding);
1976
  }
1977 9
1978
  /**
1979
   * Converts a UTF-8 string to a series of HTML numbered entities.
1980 9
   *
1981 9
   * INFO: opposite to UTF8::html_decode()
1982 9
   *
1983
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1984 9
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1985
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1986 9
   *
1987
   * @return string <p>HTML numbered entities.</p>
1988 9
   */
1989
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1990
  {
1991
    // init
1992
    $str = (string)$str;
1993
1994
    if (!isset($str[0])) {
1995
      return '';
1996
    }
1997
1998
    if ($encoding !== 'UTF-8') {
1999
      $encoding = self::normalize_encoding($encoding);
2000
    }
2001
2002
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2003
    if (function_exists('mb_encode_numericentity')) {
2004
2005
      $startCode = 0x00;
2006
      if ($keepAsciiChars === true) {
2007
        $startCode = 0x80;
2008
      }
2009
2010
      return mb_encode_numericentity(
2011
          $str,
2012
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2013
          $encoding
2014
      );
2015
    }
2016
2017
    return implode(
2018
        '',
2019
        array_map(
2020
            function ($data) use ($keepAsciiChars, $encoding) {
2021
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2022
            },
2023
            self::split($str)
2024
        )
2025
    );
2026
  }
2027
2028
  /**
2029
   * UTF-8 version of html_entity_decode()
2030
   *
2031
   * The reason we are not using html_entity_decode() by itself is because
2032
   * while it is not technically correct to leave out the semicolon
2033
   * at the end of an entity most browsers will still interpret the entity
2034
   * correctly. html_entity_decode() does not convert entities without
2035
   * semicolons, so we are left with our own little solution here. Bummer.
2036
   *
2037
   * Convert all HTML entities to their applicable characters
2038
   *
2039
   * INFO: opposite to UTF8::html_encode()
2040
   *
2041
   * @link http://php.net/manual/en/function.html-entity-decode.php
2042
   *
2043
   * @param string $str      <p>
2044
   *                         The input string.
2045
   *                         </p>
2046
   * @param int    $flags    [optional] <p>
2047
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2048
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2049
   *                         <table>
2050
   *                         Available <i>flags</i> constants
2051
   *                         <tr valign="top">
2052
   *                         <td>Constant Name</td>
2053
   *                         <td>Description</td>
2054
   *                         </tr>
2055
   *                         <tr valign="top">
2056
   *                         <td><b>ENT_COMPAT</b></td>
2057
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2058
   *                         </tr>
2059
   *                         <tr valign="top">
2060
   *                         <td><b>ENT_QUOTES</b></td>
2061
   *                         <td>Will convert both double and single quotes.</td>
2062
   *                         </tr>
2063
   *                         <tr valign="top">
2064
   *                         <td><b>ENT_NOQUOTES</b></td>
2065
   *                         <td>Will leave both double and single quotes unconverted.</td>
2066
   *                         </tr>
2067
   *                         <tr valign="top">
2068
   *                         <td><b>ENT_HTML401</b></td>
2069
   *                         <td>
2070
   *                         Handle code as HTML 4.01.
2071
   *                         </td>
2072
   *                         </tr>
2073
   *                         <tr valign="top">
2074
   *                         <td><b>ENT_XML1</b></td>
2075
   *                         <td>
2076
   *                         Handle code as XML 1.
2077
   *                         </td>
2078
   *                         </tr>
2079
   *                         <tr valign="top">
2080
   *                         <td><b>ENT_XHTML</b></td>
2081
   *                         <td>
2082
   *                         Handle code as XHTML.
2083
   *                         </td>
2084
   *                         </tr>
2085
   *                         <tr valign="top">
2086
   *                         <td><b>ENT_HTML5</b></td>
2087
   *                         <td>
2088
   *                         Handle code as HTML 5.
2089
   *                         </td>
2090
   *                         </tr>
2091
   *                         </table>
2092
   *                         </p>
2093
   * @param string $encoding [optional] <p>Encoding to use.</p>
2094 2
   *
2095
   * @return string <p>The decoded string.</p>
2096 2
   */
2097 1
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2098 1
  {
2099
    // init
2100 2
    $str = (string)$str;
2101
2102 2
    if (!isset($str[0])) {
2103 1
      return '';
2104
    }
2105
2106 2
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2107 2
      return $str;
2108 2
    }
2109 2
2110 2
    if (
2111 1
        strpos($str, '&') === false
2112
        ||
2113 1
        (
2114 1
            strpos($str, '&#') === false
2115 1
            &&
2116 1
            strpos($str, ';') === false
2117 1
        )
2118 2
    ) {
2119
      return $str;
2120 2
    }
2121
2122
    if ($encoding !== 'UTF-8') {
2123
      $encoding = self::normalize_encoding($encoding);
2124
    }
2125
2126
    if ($flags === null) {
2127
      if (Bootup::is_php('5.4') === true) {
2128
        $flags = ENT_COMPAT | ENT_HTML5;
2129
      } else {
2130
        $flags = ENT_COMPAT;
2131
      }
2132
    }
2133
2134
    do {
2135
      $str_compare = $str;
2136
2137
      $str = preg_replace_callback(
2138
          "/&#\d{2,6};/",
2139
          function ($matches) use ($encoding) {
2140
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2141
2142
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2143
              return $returnTmp;
2144
            } else {
2145
              return $matches[0];
2146
            }
2147
          },
2148
          $str
2149
      );
2150
2151
      // decode numeric & UTF16 two byte entities
2152
      $str = html_entity_decode(
2153
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2154
          $flags,
2155
          $encoding
2156
      );
2157
2158
    } while ($str_compare !== $str);
2159
2160
    return $str;
2161
  }
2162
2163
  /**
2164
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2165
   *
2166
   * @link http://php.net/manual/en/function.htmlentities.php
2167
   *
2168
   * @param string $str           <p>
2169
   *                              The input string.
2170
   *                              </p>
2171
   * @param int    $flags         [optional] <p>
2172
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2173
   *                              invalid code unit sequences and the used document type. The default is
2174
   *                              ENT_COMPAT | ENT_HTML401.
2175
   *                              <table>
2176
   *                              Available <i>flags</i> constants
2177
   *                              <tr valign="top">
2178
   *                              <td>Constant Name</td>
2179
   *                              <td>Description</td>
2180
   *                              </tr>
2181
   *                              <tr valign="top">
2182
   *                              <td><b>ENT_COMPAT</b></td>
2183
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2184
   *                              </tr>
2185
   *                              <tr valign="top">
2186
   *                              <td><b>ENT_QUOTES</b></td>
2187
   *                              <td>Will convert both double and single quotes.</td>
2188
   *                              </tr>
2189
   *                              <tr valign="top">
2190
   *                              <td><b>ENT_NOQUOTES</b></td>
2191
   *                              <td>Will leave both double and single quotes unconverted.</td>
2192
   *                              </tr>
2193
   *                              <tr valign="top">
2194
   *                              <td><b>ENT_IGNORE</b></td>
2195
   *                              <td>
2196
   *                              Silently discard invalid code unit sequences instead of returning
2197
   *                              an empty string. Using this flag is discouraged as it
2198
   *                              may have security implications.
2199
   *                              </td>
2200
   *                              </tr>
2201
   *                              <tr valign="top">
2202
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2203
   *                              <td>
2204
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2205
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2206
   *                              </td>
2207
   *                              </tr>
2208
   *                              <tr valign="top">
2209
   *                              <td><b>ENT_DISALLOWED</b></td>
2210
   *                              <td>
2211
   *                              Replace invalid code points for the given document type with a
2212
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2213
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2214
   *                              instance, to ensure the well-formedness of XML documents with
2215
   *                              embedded external content.
2216
   *                              </td>
2217
   *                              </tr>
2218
   *                              <tr valign="top">
2219
   *                              <td><b>ENT_HTML401</b></td>
2220
   *                              <td>
2221
   *                              Handle code as HTML 4.01.
2222
   *                              </td>
2223
   *                              </tr>
2224
   *                              <tr valign="top">
2225
   *                              <td><b>ENT_XML1</b></td>
2226
   *                              <td>
2227
   *                              Handle code as XML 1.
2228
   *                              </td>
2229
   *                              </tr>
2230
   *                              <tr valign="top">
2231
   *                              <td><b>ENT_XHTML</b></td>
2232 1
   *                              <td>
2233
   *                              Handle code as XHTML.
2234 1
   *                              </td>
2235
   *                              </tr>
2236
   *                              <tr valign="top">
2237
   *                              <td><b>ENT_HTML5</b></td>
2238 1
   *                              <td>
2239
   *                              Handle code as HTML 5.
2240
   *                              </td>
2241
   *                              </tr>
2242
   *                              </table>
2243
   *                              </p>
2244
   * @param string $encoding      [optional] <p>
2245
   *                              Like <b>htmlspecialchars</b>,
2246 1
   *                              <b>htmlentities</b> takes an optional third argument
2247
   *                              <i>encoding</i> which defines encoding used in
2248 1
   *                              conversion.
2249
   *                              Although this argument is technically optional, you are highly
2250
   *                              encouraged to specify the correct value for your code.
2251
   *                              </p>
2252
   * @param bool   $double_encode [optional] <p>
2253
   *                              When <i>double_encode</i> is turned off PHP will not
2254
   *                              encode existing html entities. The default is to convert everything.
2255
   *                              </p>
2256
   *
2257
   *
2258
   * @return string the encoded string.
2259
   * </p>
2260
   * <p>
2261 3
   * If the input <i>string</i> contains an invalid code unit
2262
   * sequence within the given <i>encoding</i> an empty string
2263 3
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2264 3
   * <b>ENT_SUBSTITUTE</b> flags are set.
2265
   */
2266 3
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2267
  {
2268 3
    if ($encoding !== 'UTF-8') {
2269
      $encoding = self::normalize_encoding($encoding);
2270
    }
2271
2272
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2273
2274
    if ($encoding !== 'UTF-8') {
2275
      return $str;
2276
    }
2277
2278
    $byteLengths = self::chr_size_list($str);
2279 1
    $search = array();
2280
    $replacements = array();
2281 1
    foreach ($byteLengths as $counter => $byteLength) {
2282
      if ($byteLength >= 3) {
2283
        $char = self::access($str, $counter);
2284
2285
        if (!isset($replacements[$char])) {
2286
          $search[$char] = $char;
2287
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2283 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2288
        }
2289 2
      }
2290
    }
2291 2
2292
    return str_replace($search, $replacements, $str);
2293
  }
2294
2295
  /**
2296
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2297
   *
2298
   * INFO: Take a look at "UTF8::htmlentities()"
2299
   *
2300
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2301
   *
2302
   * @param string $str           <p>
2303 2
   *                              The string being converted.
2304
   *                              </p>
2305 2
   * @param int    $flags         [optional] <p>
2306
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2307
   *                              invalid code unit sequences and the used document type. The default is
2308
   *                              ENT_COMPAT | ENT_HTML401.
2309
   *                              <table>
2310
   *                              Available <i>flags</i> constants
2311
   *                              <tr valign="top">
2312
   *                              <td>Constant Name</td>
2313
   *                              <td>Description</td>
2314
   *                              </tr>
2315
   *                              <tr valign="top">
2316
   *                              <td><b>ENT_COMPAT</b></td>
2317 1
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2318
   *                              </tr>
2319 1
   *                              <tr valign="top">
2320
   *                              <td><b>ENT_QUOTES</b></td>
2321
   *                              <td>Will convert both double and single quotes.</td>
2322
   *                              </tr>
2323
   *                              <tr valign="top">
2324
   *                              <td><b>ENT_NOQUOTES</b></td>
2325
   *                              <td>Will leave both double and single quotes unconverted.</td>
2326
   *                              </tr>
2327
   *                              <tr valign="top">
2328
   *                              <td><b>ENT_IGNORE</b></td>
2329
   *                              <td>
2330
   *                              Silently discard invalid code unit sequences instead of returning
2331
   *                              an empty string. Using this flag is discouraged as it
2332
   *                              may have security implications.
2333
   *                              </td>
2334
   *                              </tr>
2335
   *                              <tr valign="top">
2336
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2337
   *                              <td>
2338
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2339
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2340
   *                              </td>
2341
   *                              </tr>
2342
   *                              <tr valign="top">
2343
   *                              <td><b>ENT_DISALLOWED</b></td>
2344
   *                              <td>
2345
   *                              Replace invalid code points for the given document type with a
2346
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2347
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2348
   *                              instance, to ensure the well-formedness of XML documents with
2349
   *                              embedded external content.
2350
   *                              </td>
2351
   *                              </tr>
2352
   *                              <tr valign="top">
2353
   *                              <td><b>ENT_HTML401</b></td>
2354
   *                              <td>
2355
   *                              Handle code as HTML 4.01.
2356
   *                              </td>
2357
   *                              </tr>
2358
   *                              <tr valign="top">
2359 1
   *                              <td><b>ENT_XML1</b></td>
2360
   *                              <td>
2361 1
   *                              Handle code as XML 1.
2362
   *                              </td>
2363
   *                              </tr>
2364
   *                              <tr valign="top">
2365
   *                              <td><b>ENT_XHTML</b></td>
2366
   *                              <td>
2367
   *                              Handle code as XHTML.
2368
   *                              </td>
2369
   *                              </tr>
2370
   *                              <tr valign="top">
2371
   *                              <td><b>ENT_HTML5</b></td>
2372
   *                              <td>
2373
   *                              Handle code as HTML 5.
2374
   *                              </td>
2375
   *                              </tr>
2376
   *                              </table>
2377
   *                              </p>
2378
   * @param string $encoding      [optional] <p>
2379
   *                              Defines encoding used in conversion.
2380
   *                              </p>
2381
   *                              <p>
2382
   *                              For the purposes of this function, the encodings
2383
   *                              ISO-8859-1, ISO-8859-15,
2384
   *                              UTF-8, cp866,
2385
   *                              cp1251, cp1252, and
2386
   *                              KOI8-R are effectively equivalent, provided the
2387 1
   *                              <i>string</i> itself is valid for the encoding, as
2388
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2389 1
   *                              the same positions in all of these encodings.
2390
   *                              </p>
2391
   * @param bool   $double_encode [optional] <p>
2392
   *                              When <i>double_encode</i> is turned off PHP will not
2393
   *                              encode existing html entities, the default is to convert everything.
2394
   *                              </p>
2395
   *
2396
   * @return string The converted string.
2397
   * </p>
2398
   * <p>
2399
   * If the input <i>string</i> contains an invalid code unit
2400
   * sequence within the given <i>encoding</i> an empty string
2401 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2402
   * <b>ENT_SUBSTITUTE</b> flags are set.
2403 1
   */
2404
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2405
  {
2406
    if ($encoding !== 'UTF-8') {
2407
      $encoding = self::normalize_encoding($encoding);
2408
    }
2409
2410
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2411
  }
2412
2413
  /**
2414
   * Checks whether iconv is available on the server.
2415
   *
2416 16
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2417
   */
2418 16
  public static function iconv_loaded()
2419
  {
2420
    $return = extension_loaded('iconv') ? true : false;
2421
2422
    // INFO: "default_charset" is already set by the "Bootup"-class
2423
2424
    if (!Bootup::is_php('5.6')) {
2425
      // INFO: "iconv_set_encoding" is deprecated cince PHP 5.6
2426
      iconv_set_encoding('input_encoding', 'UTF-8');
2427
      iconv_set_encoding('output_encoding', 'UTF-8');
2428
      iconv_set_encoding('internal_encoding', 'UTF-8');
2429
    }
2430
2431 28
    return $return;
2432
  }
2433 28
2434
  /**
2435 28
   * Converts Integer to hexadecimal U+xxxx code point representation.
2436 5
   *
2437
   * INFO: opposite to UTF8::hex_to_int()
2438
   *
2439 28
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2440
   * @param string $pfix [optional]
2441
   *
2442
   * @return string <p>The code point, or empty string on failure.</p>
2443
   */
2444
  public static function int_to_hex($int, $pfix = 'U+')
2445
  {
2446
    if (ctype_digit((string)$int)) {
2447
      $hex = dechex((int)$int);
2448
2449 1
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2450
2451 1
      return $pfix . $hex;
2452
    }
2453 1
2454 1
    return '';
2455
  }
2456
2457 1
  /**
2458 1
   * Checks whether intl-char is available on the server.
2459
   *
2460 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2461
   */
2462
  public static function intlChar_loaded()
2463
  {
2464
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2465
  }
2466
2467
  /**
2468
   * Checks whether intl is available on the server.
2469
   *
2470
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2471 16
   */
2472
  public static function intl_loaded()
2473
  {
2474 16
    return extension_loaded('intl') ? true : false;
2475
  }
2476
2477 16
  /**
2478
   * alias for "UTF8::is_ascii()"
2479 16
   *
2480 16
   * @see UTF8::is_ascii()
2481 15
   *
2482 16
   * @param string $str
2483 6
   *
2484
   * @return boolean
2485 15
   *
2486
   * @deprecated
2487
   */
2488
  public static function isAscii($str)
2489
  {
2490
    return self::is_ascii($str);
2491
  }
2492
2493
  /**
2494
   * alias for "UTF8::is_base64()"
2495
   *
2496
   * @see UTF8::is_base64()
2497
   *
2498
   * @param string $str
2499
   *
2500
   * @return bool
2501
   *
2502
   * @deprecated
2503
   */
2504
  public static function isBase64($str)
2505
  {
2506
    return self::is_base64($str);
2507
  }
2508
2509
  /**
2510
   * alias for "UTF8::is_binary()"
2511
   *
2512
   * @see UTF8::is_binary()
2513
   *
2514
   * @param string $str
2515
   *
2516
   * @return bool
2517
   *
2518
   * @deprecated
2519
   */
2520
  public static function isBinary($str)
2521
  {
2522
    return self::is_binary($str);
2523
  }
2524
2525
  /**
2526
   * alias for "UTF8::is_bom()"
2527
   *
2528
   * @see UTF8::is_bom()
2529
   *
2530
   * @param string $utf8_chr
2531
   *
2532
   * @return boolean
2533
   *
2534
   * @deprecated
2535
   */
2536 1
  public static function isBom($utf8_chr)
2537
  {
2538 1
    return self::is_bom($utf8_chr);
2539
  }
2540 1
2541
  /**
2542
   * alias for "UTF8::is_html()"
2543
   *
2544
   * @see UTF8::is_html()
2545 1
   *
2546
   * @param string $str
2547 1
   *
2548
   * @return boolean
2549 1
   *
2550 1
   * @deprecated
2551
   */
2552 1
  public static function isHtml($str)
2553
  {
2554
    return self::is_html($str);
2555
  }
2556
2557
  /**
2558
   * alias for "UTF8::is_json()"
2559
   *
2560
   * @see UTF8::is_json()
2561
   *
2562
   * @param string $str
2563 1
   *
2564
   * @return bool
2565 1
   *
2566
   * @deprecated
2567 1
   */
2568
  public static function isJson($str)
2569
  {
2570
    return self::is_json($str);
2571
  }
2572 1
2573 1
  /**
2574 1
   * alias for "UTF8::is_utf16()"
2575 1
   *
2576 1
   * @see UTF8::is_utf16()
2577
   *
2578 1
   * @param string $str
2579
   *
2580
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2581
   *
2582
   * @deprecated
2583
   */
2584
  public static function isUtf16($str)
2585
  {
2586
    return self::is_utf16($str);
2587
  }
2588
2589
  /**
2590
   * alias for "UTF8::is_utf32()"
2591
   *
2592
   * @see UTF8::is_utf32()
2593 4
   *
2594
   * @param string $str
2595 4
   *
2596
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2597 4
   *
2598
   * @deprecated
2599 4
   */
2600 4
  public static function isUtf32($str)
2601 4
  {
2602 4
    return self::is_utf32($str);
2603 4
  }
2604 4
2605 4
  /**
2606 4
   * alias for "UTF8::is_utf8()"
2607 4
   *
2608 2
   * @see UTF8::is_utf8()
2609 2
   *
2610 4
   * @param string $str
2611 4
   * @param bool   $strict
2612 4
   *
2613
   * @return bool
2614 4
   *
2615 4
   * @deprecated
2616 4
   */
2617 4
  public static function isUtf8($str, $strict = false)
2618 4
  {
2619 4
    return self::is_utf8($str, $strict);
2620 4
  }
2621 4
2622 4
  /**
2623 3
   * Checks if a string is 7 bit ASCII.
2624 3
   *
2625 4
   * @param string $str <p>The string to check.</p>
2626 4
   *
2627 4
   * @return bool <p>
2628
   *              <strong>true</strong> if it is ASCII<br />
2629 4
   *              <strong>false</strong> otherwise
2630 3
   *              </p>
2631 2
   */
2632
  public static function is_ascii($str)
2633 3
  {
2634
    $str = (string)$str;
2635
2636
    if (!isset($str[0])) {
2637 3
      return true;
2638
    }
2639 3
2640
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2641
  }
2642
2643
  /**
2644
   * Returns true if the string is base64 encoded, false otherwise.
2645
   *
2646
   * @param string $str <p>The input string.</p>
2647
   *
2648
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2649
   */
2650
  public static function is_base64($str)
2651
  {
2652
    $str = (string)$str;
2653 3
2654
    if (!isset($str[0])) {
2655 3
      return false;
2656
    }
2657 3
2658
    if (base64_encode(base64_decode($str, true)) === $str) {
2659 3
      return true;
2660 3
    } else {
2661 3
      return false;
2662 3
    }
2663 3
  }
2664 3
2665 3
  /**
2666 3
   * Check if the input is binary... (is look like a hack).
2667 3
   *
2668 1
   * @param mixed $input
2669 1
   *
2670 3
   * @return bool
2671 3
   */
2672 3
  public static function is_binary($input)
2673
  {
2674 3
2675 3
    $testLength = strlen($input);
2676 3
2677 3
    if (
2678 3
        preg_match('~^[01]+$~', $input)
2679 3
        ||
2680 3
        substr_count($input, "\x00") > 0
2681 3
        ||
2682 3
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2683 1
    ) {
2684 1
      return true;
2685 3
    } else {
2686 3
      return false;
2687 3
    }
2688
  }
2689 3
2690 1
  /**
2691 1
   * Check if the file is binary.
2692
   *
2693 1
   * @param string $file
2694
   *
2695
   * @return boolean
2696
   */
2697 3
  public static function is_binary_file($file)
2698
  {
2699 3
    try {
2700
      $fp = fopen($file, 'r');
2701
      $block = fread($fp, 512);
2702
      fclose($fp);
2703
    } catch (\Exception $e) {
2704
      $block = '';
2705
    }
2706
2707
    return self::is_binary($block);
2708
  }
2709
2710
  /**
2711
   * Checks if the given string is equal to any "Byte Order Mark".
2712 43
   *
2713
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2714 43
   *
2715
   * @param string $str <p>The input string.</p>
2716 43
   *
2717 3
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2718
   */
2719
  public static function is_bom($str)
2720 41
  {
2721 1
    foreach (self::$bom as $bomString => $bomByteLength) {
2722 1
      if ($str === $bomString) {
2723
        return true;
2724
      }
2725
    }
2726
2727
    return false;
2728
  }
2729
2730 41
  /**
2731
   * Check if the string contains any html-tags <lall>.
2732
   *
2733
   * @param string $str <p>The input string.</p>
2734
   *
2735
   * @return boolean
2736
   */
2737
  public static function is_html($str)
2738
  {
2739
    $str = (string)$str;
2740 41
2741
    if (!isset($str[0])) {
2742 41
      return false;
2743 41
    }
2744 41
2745
    // init
2746
    $matches = array();
2747 41
2748 41
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2749 41
2750
    if (count($matches) == 0) {
2751
      return false;
2752 41
    } else {
2753
      return true;
2754 36
    }
2755 41
  }
2756
2757 34
  /**
2758 34
   * Try to check if "$str" is an json-string.
2759 34
   *
2760 34
   * @param string $str <p>The input string.</p>
2761 39
   *
2762
   * @return bool
2763 21
   */
2764 21
  public static function is_json($str)
2765 21
  {
2766 21
    $str = (string)$str;
2767 33
2768
    if (!isset($str[0])) {
2769 9
      return false;
2770 9
    }
2771 9
2772 9
    if (
2773 16
        is_object(self::json_decode($str))
2774
        &&
2775
        json_last_error() === JSON_ERROR_NONE
2776
    ) {
2777
      return true;
2778
    } else {
2779
      return false;
2780
    }
2781
  }
2782 3
2783 3
  /**
2784 3
   * Check if the string is UTF-16.
2785 3
   *
2786 9
   * @param string $str <p>The input string.</p>
2787
   *
2788 3
   * @return int|false <p>
2789 3
   *                   <strong>false</strong> if is't not UTF-16,<br />
2790 3
   *                   <strong>1</strong> for UTF-16LE,<br />
2791 3
   *                   <strong>2</strong> for UTF-16BE.
2792 3
   *                   </p>
2793
   */
2794 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2795
  {
2796 5
    $str = self::remove_bom($str);
2797
2798 41
    if (self::is_binary($str)) {
2799
2800
      $maybeUTF16LE = 0;
2801 36
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2802
      if ($test) {
2803 33
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2804 33
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2805 33
        if ($test3 === $test) {
2806 33
          $strChars = self::count_chars($str, true);
2807
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2808
            if (in_array($test3char, $strChars, true) === true) {
2809
              $maybeUTF16LE++;
2810
            }
2811 33
          }
2812
        }
2813
      }
2814
2815
      $maybeUTF16BE = 0;
2816
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2817 33
      if ($test) {
2818 33
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2819 33
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2820 33
        if ($test3 === $test) {
2821
          $strChars = self::count_chars($str, true);
2822 33
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2823
            if (in_array($test3char, $strChars, true) === true) {
2824 33
              $maybeUTF16BE++;
2825 33
            }
2826 5
          }
2827
        }
2828
      }
2829 33
2830 33
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2831 33
        if ($maybeUTF16LE > $maybeUTF16BE) {
2832 33
          return 1;
2833 33
        } else {
2834
          return 2;
2835
        }
2836
      }
2837
2838 18
    }
2839
2840
    return false;
2841 41
  }
2842
2843 20
  /**
2844
   * Check if the string is UTF-32.
2845
   *
2846
   * @param string $str
2847
   *
2848
   * @return int|false <p>
2849
   *                   <strong>false</strong> if is't not UTF-16,<br />
2850
   *                   <strong>1</strong> for UTF-32LE,<br />
2851
   *                   <strong>2</strong> for UTF-32BE.
2852
   *                   </p>
2853
   */
2854 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2855
  {
2856
    $str = self::remove_bom($str);
2857
2858
    if (self::is_binary($str)) {
2859
2860
      $maybeUTF32LE = 0;
2861
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2862
      if ($test) {
2863
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2864
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2865
        if ($test3 === $test) {
2866
          $strChars = self::count_chars($str, true);
2867
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2868
            if (in_array($test3char, $strChars, true) === true) {
2869
              $maybeUTF32LE++;
2870
            }
2871
          }
2872
        }
2873
      }
2874
2875
      $maybeUTF32BE = 0;
2876
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2877
      if ($test) {
2878
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2879
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2880
        if ($test3 === $test) {
2881
          $strChars = self::count_chars($str, true);
2882
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2883 2
            if (in_array($test3char, $strChars, true) === true) {
2884
              $maybeUTF32BE++;
2885 2
            }
2886
          }
2887 2
        }
2888 2
      }
2889 2
2890
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2891
        if ($maybeUTF32LE > $maybeUTF32BE) {
2892
          return 1;
2893 2
        } else {
2894
          return 2;
2895
        }
2896
      }
2897
2898
    }
2899
2900
    return false;
2901
  }
2902
2903
  /**
2904
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2905
   *
2906
   * @see    http://hsivonen.iki.fi/php-utf8/
2907
   *
2908
   * @param string $str    <p>The string to be checked.</p>
2909
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2910
   *
2911
   * @return bool
2912
   */
2913
  public static function is_utf8($str, $strict = false)
2914
  {
2915
    $str = (string)$str;
2916
2917
    if (!isset($str[0])) {
2918
      return true;
2919
    }
2920
2921
    if ($strict === true) {
2922
      if (self::is_utf16($str) !== false) {
2923
        return false;
2924
      }
2925
2926
      if (self::is_utf32($str) !== false) {
2927
        return false;
2928
      }
2929
    }
2930
2931
    if (self::pcre_utf8_support() !== true) {
2932 2
2933
      // If even just the first character can be matched, when the /u
2934 2
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2935
      // invalid, nothing at all will match, even if the string contains
2936 2
      // some valid sequences
2937
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2938
2939 2
    } else {
2940
2941
      $mState = 0; // cached expected number of octets after the current octet
2942 2
      // until the beginning of the next UTF8 character sequence
2943
      $mUcs4 = 0; // cached Unicode character
2944
      $mBytes = 1; // cached expected number of octets in the current sequence
2945
      $len = strlen($str);
2946
2947
      /** @noinspection ForeachInvariantsInspection */
2948
      for ($i = 0; $i < $len; $i++) {
2949
        $in = ord($str[$i]);
2950
        if ($mState === 0) {
2951
          // When mState is zero we expect either a US-ASCII character or a
2952 6
          // multi-octet sequence.
2953
          if (0 === (0x80 & $in)) {
2954 6
            // US-ASCII, pass straight through.
2955
            $mBytes = 1;
2956 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2957
            // First octet of 2 octet sequence.
2958
            $mUcs4 = $in;
2959
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2960
            $mState = 1;
2961
            $mBytes = 2;
2962
          } elseif (0xE0 === (0xF0 & $in)) {
2963
            // First octet of 3 octet sequence.
2964
            $mUcs4 = $in;
2965 24
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2966
            $mState = 2;
2967 24
            $mBytes = 3;
2968 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2969 24
            // First octet of 4 octet sequence.
2970 2
            $mUcs4 = $in;
2971
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2972
            $mState = 3;
2973
            $mBytes = 4;
2974 23
          } elseif (0xF8 === (0xFC & $in)) {
2975 2
            /* First octet of 5 octet sequence.
2976
            *
2977
            * This is illegal because the encoded codepoint must be either
2978 23
            * (a) not the shortest form or
2979
            * (b) outside the Unicode range of 0-0x10FFFF.
2980 23
            * Rather than trying to resynchronize, we will carry on until the end
2981
            * of the sequence and let the later error handling code catch it.
2982
            */
2983
            $mUcs4 = $in;
2984
            $mUcs4 = ($mUcs4 & 0x03) << 24;
2985
            $mState = 4;
2986
            $mBytes = 5;
2987 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2988
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
2989
            $mUcs4 = $in;
2990 1
            $mUcs4 = ($mUcs4 & 1) << 30;
2991
            $mState = 5;
2992 1
            $mBytes = 6;
2993
          } else {
2994
            /* Current octet is neither in the US-ASCII range nor a legal first
2995
             * octet of a multi-octet sequence.
2996 1
             */
2997
            return false;
2998
          }
2999
        } else {
3000
          // When mState is non-zero, we expect a continuation of the multi-octet
3001
          // sequence
3002
          if (0x80 === (0xC0 & $in)) {
3003
            // Legal continuation.
3004
            $shift = ($mState - 1) * 6;
3005
            $tmp = $in;
3006
            $tmp = ($tmp & 0x0000003F) << $shift;
3007 1
            $mUcs4 |= $tmp;
3008
            /**
3009 1
             * End of the multi-octet sequence. mUcs4 now contains the final
3010 1
             * Unicode code point to be output
3011 1
             */
3012
            if (0 === --$mState) {
3013 1
              /*
3014
              * Check for illegal sequences and code points.
3015
              */
3016
              // From Unicode 3.1, non-shortest form is illegal
3017
              if (
3018
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3019
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3020
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3021
                  (4 < $mBytes) ||
3022 2
                  // From Unicode 3.2, surrogate characters are illegal.
3023
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3024 2
                  // Code points outside the Unicode range are illegal.
3025
                  ($mUcs4 > 0x10FFFF)
3026 2
              ) {
3027 2
                return false;
3028 2
              }
3029
              // initialize UTF8 cache
3030 2
              $mState = 0;
3031
              $mUcs4 = 0;
3032
              $mBytes = 1;
3033
            }
3034
          } else {
3035
            /**
3036
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3037
             * Incomplete multi-octet sequence.
3038
             */
3039
            return false;
3040 1
          }
3041
        }
3042 1
      }
3043
3044
      return true;
3045
    }
3046 1
  }
3047
3048
  /**
3049
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3050
   * Decodes a JSON string
3051
   *
3052
   * @link http://php.net/manual/en/function.json-decode.php
3053
   *
3054
   * @param string $json    <p>
3055
   *                        The <i>json</i> string being decoded.
3056
   *                        </p>
3057
   *                        <p>
3058 1
   *                        This function only works with UTF-8 encoded strings.
3059
   *                        </p>
3060 1
   *                        <p>PHP implements a superset of
3061
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3062
   *                        only supports these values when they are nested inside an array or an object.
3063
   *                        </p>
3064
   * @param bool   $assoc   [optional] <p>
3065
   *                        When <b>TRUE</b>, returned objects will be converted into
3066
   *                        associative arrays.
3067
   *                        </p>
3068
   * @param int    $depth   [optional] <p>
3069
   *                        User specified recursion depth.
3070 16
   *                        </p>
3071
   * @param int    $options [optional] <p>
3072 16
   *                        Bitmask of JSON decode options. Currently only
3073
   *                        <b>JSON_BIGINT_AS_STRING</b>
3074 16
   *                        is supported (default is to cast large integers as floats)
3075 2
   *                        </p>
3076
   *
3077
   * @return mixed the value encoded in <i>json</i> in appropriate
3078 16
   * PHP type. Values true, false and
3079 1
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3080
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3081
   * <i>json</i> cannot be decoded or if the encoded
3082 16
   * data is deeper than the recursion limit.
3083 4
   */
3084
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3085
  {
3086 15
    $json = self::filter($json);
3087 14
3088
    if (Bootup::is_php('5.4') === true) {
3089
      $json = json_decode($json, $assoc, $depth, $options);
3090 4
    } else {
3091 4
      $json = json_decode($json, $assoc, $depth);
3092 4
    }
3093
3094
    return $json;
3095 4
  }
3096 4
3097 4
  /**
3098 4
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3099 4
   * Returns the JSON representation of a value.
3100 4
   *
3101 4
   * @link http://php.net/manual/en/function.json-encode.php
3102 4
   *
3103 4
   * @param mixed $value   <p>
3104 4
   *                       The <i>value</i> being encoded. Can be any type except
3105 4
   *                       a resource.
3106 4
   *                       </p>
3107 4
   *                       <p>
3108 4
   *                       All string data must be UTF-8 encoded.
3109 4
   *                       </p>
3110
   *                       <p>PHP implements a superset of
3111 4
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3112 4
   *                       only supports these values when they are nested inside an array or an object.
3113 4
   *                       </p>
3114
   * @param int   $options [optional] <p>
3115 4
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3116
   *                       <b>JSON_HEX_TAG</b>,
3117 4
   *                       <b>JSON_HEX_AMP</b>,
3118
   *                       <b>JSON_HEX_APOS</b>,
3119
   *                       <b>JSON_NUMERIC_CHECK</b>,
3120
   *                       <b>JSON_PRETTY_PRINT</b>,
3121
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3122
   *                       <b>JSON_FORCE_OBJECT</b>,
3123
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3124
   *                       constants is described on
3125
   *                       the JSON constants page.
3126
   *                       </p>
3127 13
   * @param int   $depth   [optional] <p>
3128
   *                       Set the maximum depth. Must be greater than zero.
3129 13
   *                       </p>
3130 13
   *
3131
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3132 13
   */
3133 1
  public static function json_encode($value, $options = 0, $depth = 512)
3134 1
  {
3135 1
    $value = self::filter($value);
3136
3137 13
    if (Bootup::is_php('5.5')) {
3138
      $json = json_encode($value, $options, $depth);
3139
    } else {
3140
      $json = json_encode($value, $options);
3141
    }
3142
3143
    return $json;
3144
  }
3145
3146
  /**
3147
   * Makes string's first char lowercase.
3148
   *
3149
   * @param string $str <p>The input string</p>
3150 18
   *
3151
   * @return string <p>The resulting string</p>
3152 18
   */
3153 18
  public static function lcfirst($str)
3154
  {
3155 18
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3156
  }
3157 18
3158
  /**
3159 2
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3160
   *
3161 2
   * @param string $str   <p>The string to be trimmed</p>
3162
   * @param string $chars <p>Optional characters to be stripped</p>
3163 1
   *
3164 1
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3165
   */
3166 2 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3167 2
  {
3168
    $str = (string)$str;
3169 18
3170 18
    if (!isset($str[0])) {
3171 1
      return '';
3172 1
    }
3173
3174 18
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3175 18
    if ($chars === INF || !$chars) {
3176
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3177 18
    }
3178
3179
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3180
  }
3181
3182
  /**
3183
   * Returns the UTF-8 character with the maximum code point in the given data.
3184
   *
3185
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3186
   *
3187
   * @return string <p>The character with the highest code point than others.</p>
3188
   */
3189 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3190
  {
3191
    if (is_array($arg)) {
3192
      $arg = implode('', $arg);
3193
    }
3194
3195
    return self::chr(max(self::codepoints($arg)));
3196
  }
3197
3198
  /**
3199
   * Calculates and returns the maximum number of bytes taken by any
3200
   * UTF-8 encoded character in the given string.
3201
   *
3202
   * @param string $str <p>The original Unicode string.</p>
3203
   *
3204
   * @return int <p>Max byte lengths of the given chars.</p>
3205
   */
3206
  public static function max_chr_width($str)
3207
  {
3208
    $bytes = self::chr_size_list($str);
3209
    if (count($bytes) > 0) {
3210
      return (int)max($bytes);
3211
    } else {
3212
      return 0;
3213
    }
3214
  }
3215
3216
  /**
3217
   * Checks whether mbstring is available on the server.
3218
   *
3219
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3220
   */
3221
  public static function mbstring_loaded()
3222
  {
3223
    $return = extension_loaded('mbstring') ? true : false;
3224
3225
    if ($return === true) {
3226
      \mb_internal_encoding('UTF-8');
3227
    }
3228
3229
    return $return;
3230 17
  }
3231
3232 17
  /**
3233 3
   * Returns the UTF-8 character with the minimum code point in the given data.
3234
   *
3235
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3236 16
   *
3237
   * @return string <p>The character with the lowest code point than others.</p>
3238
   */
3239 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3240 16
  {
3241
    if (is_array($arg)) {
3242
      $arg = implode('', $arg);
3243
    }
3244
3245
    return self::chr(min(self::codepoints($arg)));
3246
  }
3247
3248 16
  /**
3249 16
   * alias for "UTF8::normalize_encoding()"
3250 15
   *
3251
   * @see UTF8::normalize_encoding()
3252
   *
3253 9
   * @param string $encoding
3254 9
   *
3255 9
   * @return string
3256
   *
3257 9
   * @deprecated
3258 1
   */
3259
  public static function normalizeEncoding($encoding)
3260
  {
3261 9
    return self::normalize_encoding($encoding);
3262 4
  }
3263
3264
  /**
3265 9
   * Normalize the encoding-"name" input.
3266 5
   *
3267
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3268
   *
3269 9
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3270
   */
3271
  public static function normalize_encoding($encoding)
3272
  {
3273
    static $staticNormalizeEncodingCache = array();
3274
3275
    if (!$encoding) {
3276
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3277
    }
3278
3279
    if ('UTF-8' === $encoding) {
3280
      return $encoding;
3281
    }
3282
3283
    if (in_array($encoding, self::$iconvEncoding, true)) {
3284
      return $encoding;
3285 1
    }
3286
3287
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3288 1
      return $staticNormalizeEncodingCache[$encoding];
3289
    }
3290 1
3291 1
    $encodingOrig = $encoding;
3292 1
    $encoding = strtoupper($encoding);
3293
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3294
3295 1
    $equivalences = array(
3296
        'ISO88591'    => 'ISO-8859-1',
3297
        'ISO8859'     => 'ISO-8859-1',
3298
        'ISO'         => 'ISO-8859-1',
3299
        'LATIN1'      => 'ISO-8859-1',
3300
        'LATIN'       => 'ISO-8859-1',
3301
        'WIN1252'     => 'ISO-8859-1',
3302
        'WINDOWS1252' => 'ISO-8859-1',
3303 41
        'UTF16'       => 'UTF-16',
3304
        'UTF32'       => 'UTF-32',
3305
        'UTF8'        => 'UTF-8',
3306 41
        'UTF'         => 'UTF-8',
3307
        'UTF7'        => 'UTF-7',
3308
        '8BIT'        => 'CP850',
3309
        'BINARY'      => 'CP850',
3310
    );
3311
3312
    if (!empty($equivalences[$encodingUpperHelper])) {
3313
      $encoding = $equivalences[$encodingUpperHelper];
3314
    }
3315
3316
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3317 1
3318
    return $encoding;
3319 1
  }
3320 1
3321
  /**
3322
   * Normalize some MS Word special characters.
3323 1
   *
3324 1
   * @param string $str <p>The string to be normalized.</p>
3325 1
   *
3326
   * @return string
3327
   */
3328 1 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3329
  {
3330
    // init
3331 1
    $str = (string)$str;
3332
3333
    if (!isset($str[0])) {
3334
      return '';
3335 1
    }
3336 1
3337 1
    static $UTF8_MSWORD_KEYS_CACHE = null;
3338
    static $UTF8_MSWORD_VALUES_CACHE = null;
3339
3340 1
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3341
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3342
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3343 1
    }
3344
3345
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3346
  }
3347 1
3348
  /**
3349 1
   * Normalize the whitespace.
3350 1
   *
3351 1
   * @param string $str                     <p>The string to be normalized.</p>
3352 1
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3353 1
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3354
   *                                        bidirectional text chars.</p>
3355
   *
3356
   * @return string
3357
   */
3358
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3359
  {
3360
    // init
3361
    $str = (string)$str;
3362
3363
    if (!isset($str[0])) {
3364
      return '';
3365 5
    }
3366
3367 5
    static $WHITESPACE_CACHE = array();
3368
    $cacheKey = (int)$keepNonBreakingSpace;
3369
3370
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3371
3372
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3373
3374
      if ($keepNonBreakingSpace === true) {
3375
        /** @noinspection OffsetOperationsInspection */
3376
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3377 10
      }
3378
3379 10
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3380 10
    }
3381 5
3382 5
    if ($keepBidiUnicodeControls === false) {
3383 10
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3384
3385 10
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3386
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3387
      }
3388
3389
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3390
    }
3391
3392
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3393
  }
3394
3395
  /**
3396 1
   * Format a number with grouped thousands.
3397
   *
3398 1
   * @param float  $number
3399 1
   * @param int    $decimals
3400 1
   * @param string $dec_point
3401
   * @param string $thousands_sep
3402 1
   *
3403 1
   * @return string
3404 1
   *    *
3405 1
   * @deprecated Because this has nothing to do with UTF8. :/
3406 1
   */
3407
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3408 1
  {
3409
    $thousands_sep = (string)$thousands_sep;
3410
    $dec_point = (string)$dec_point;
3411
3412
    if (
3413
        isset($thousands_sep[1], $dec_point[1])
3414
        &&
3415
        Bootup::is_php('5.4') === true
3416
    ) {
3417
      return str_replace(
3418
          array(
3419
              '.',
3420
              ',',
3421
          ),
3422
          array(
3423
              $dec_point,
3424 45
              $thousands_sep,
3425
          ),
3426
          number_format($number, $decimals, '.', ',')
3427 45
      );
3428
    }
3429
3430
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3431 45
  }
3432 45
3433 45
  /**
3434 45
   * Calculates Unicode code point of the given UTF-8 encoded character.
3435
   *
3436 45
   * INFO: opposite to UTF8::chr()
3437
   *
3438
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3439 45
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3440 45
   *
3441
   * @return int <p>
3442 45
   *             Unicode code point of the given character,<br />
3443
   *             0 on invalid UTF-8 byte sequence.
3444
   *             </p>
3445
   */
3446
  public static function ord($chr, $encoding = 'UTF-8')
3447
  {
3448
    if (!$chr && $chr !== '0') {
3449
      return 0;
3450
    }
3451
3452
    if ($encoding !== 'UTF-8') {
3453 45
      $encoding = self::normalize_encoding($encoding);
3454
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3455 45
    }
3456
3457 45
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3458 45
      self::checkForSupport();
3459 45
    }
3460
3461 45
    if (self::$support['intlChar'] === true) {
3462 45
      $tmpReturn = \IntlChar::ord($chr);
3463 45
      if ($tmpReturn) {
3464
        return $tmpReturn;
3465 45
      }
3466
    }
3467
3468
    // use static cache, if there is no support for "IntlChar"
3469
    static $cache = array();
3470
    if (isset($cache[$chr]) === true) {
3471
      return $cache[$chr];
3472
    }
3473
3474
    $chr_orig = $chr;
3475
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3476 23
    $chr = unpack('C*', substr($chr, 0, 4));
3477
    $code = $chr ? $chr[1] : 0;
3478 23
3479
    if (0xF0 <= $code && isset($chr[4])) {
3480 23
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3481 5
    }
3482
3483
    if (0xE0 <= $code && isset($chr[3])) {
3484
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3485 19
    }
3486 3
3487
    if (0xC0 <= $code && isset($chr[2])) {
3488
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3489 18
    }
3490
3491 18
    return $cache[$chr_orig] = $code;
3492
  }
3493
3494
  /**
3495
   * Parses the string into an array (into the the second parameter).
3496
   *
3497
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3498
   *          if the second parameter is not set!
3499
   *
3500
   * @link http://php.net/manual/en/function.parse-str.php
3501
   *
3502 52
   * @param string  $str       <p>The input string.</p>
3503
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3504 52
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3505
   *
3506 52
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3507
   */
3508 52
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3509 40
  {
3510
    if ($cleanUtf8 === true) {
3511
      $str = self::clean($str);
3512 18
    }
3513
3514
    $return = \mb_parse_str($str, $result);
3515 18
    if ($return === false || empty($result)) {
3516 17
      return false;
3517
    }
3518 17
3519 17
    return true;
3520 17
  }
3521 2
3522 2
  /**
3523
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3524
   *
3525 18
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3526
   */
3527 18
  public static function pcre_utf8_support()
3528 18
  {
3529 18
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3530
    return (bool)@preg_match('//u', '');
3531 18
  }
3532 18
3533 18
  /**
3534
   * Create an array containing a range of UTF-8 characters.
3535
   *
3536
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3537 18
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3538
   *
3539 18
   * @return array
3540
   */
3541
  public static function range($var1, $var2)
3542
  {
3543
    if (!$var1 || !$var2) {
3544
      return array();
3545
    }
3546
3547 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3548
      $start = (int)$var1;
3549
    } elseif (ctype_xdigit($var1)) {
3550
      $start = (int)self::hex_to_int($var1);
3551
    } else {
3552
      $start = self::ord($var1);
3553
    }
3554
3555
    if (!$start) {
3556
      return array();
3557
    }
3558
3559 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3560 1
      $end = (int)$var2;
3561
    } elseif (ctype_xdigit($var2)) {
3562 1
      $end = (int)self::hex_to_int($var2);
3563 1
    } else {
3564
      $end = self::ord($var2);
3565
    }
3566
3567
    if (!$end) {
3568 1
      return array();
3569 1
    }
3570 1
3571 1
    return array_map(
3572
        array(
3573
            '\\voku\\helper\\UTF8',
3574 1
            'chr',
3575
        ),
3576
        range($start, $end)
3577
    );
3578
  }
3579
3580
  /**
3581
   * Multi decode html entity & fix urlencoded-win1252-chars.
3582
   *
3583
   * e.g:
3584
   * 'test+test'                     => 'test+test'
3585
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3586 36
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3587
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3588 36
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3589
   * 'Düsseldorf'                   => 'Düsseldorf'
3590 36
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3591 2
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3592
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3593
   *
3594
   * @param string $str          <p>The input string.</p>
3595 36
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3596 36
   *
3597
   * @return string
3598 36
   */
3599 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3600
  {
3601
    $str = (string)$str;
3602 36
3603
    if (!isset($str[0])) {
3604 36
      return '';
3605 6
    }
3606 6
3607
    $pattern = '/%u([0-9a-f]{3,4})/i';
3608 36
    if (preg_match($pattern, $str)) {
3609 36
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3610 36
    }
3611 36
3612 36
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3613
3614 36
    do {
3615
      $str_compare = $str;
3616
3617
      $str = self::fix_simple_utf8(
3618
          rawurldecode(
3619
              self::html_entity_decode(
3620
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3621
                  $flags
3622
              )
3623
          )
3624
      );
3625
3626
    } while ($multi_decode === true && $str_compare !== $str);
3627
3628
    return (string)$str;
3629
  }
3630
3631
  /**
3632
   * alias for "UTF8::remove_bom()"
3633
   *
3634
   * @see UTF8::remove_bom()
3635
   *
3636
   * @param string $str
3637
   *
3638
   * @return string
3639
   *
3640
   * @deprecated
3641
   */
3642
  public static function removeBOM($str)
3643
  {
3644
    return self::remove_bom($str);
3645
  }
3646 36
3647 5
  /**
3648
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3649 5
   *
3650 5
   * @param string $str <p>The input string.</p>
3651
   *
3652
   * @return string <p>String without UTF-BOM</p>
3653 36
   */
3654
  public static function remove_bom($str)
3655
  {
3656
    foreach (self::$bom as $bomString => $bomByteLength) {
3657 36
      if (0 === strpos($str, $bomString)) {
3658
        $str = substr($str, $bomByteLength);
3659
      }
3660
    }
3661
3662
    return $str;
3663
  }
3664
3665
  /**
3666
   * Removes duplicate occurrences of a string in another string.
3667
   *
3668
   * @param string          $str  <p>The base string.</p>
3669
   * @param string|string[] $what <p>String to search for in the base string.</p>
3670 12
   *
3671
   * @return string <p>The result string with removed duplicates.</p>
3672
   */
3673
  public static function remove_duplicates($str, $what = ' ')
3674
  {
3675
    if (is_string($what)) {
3676 12
      $what = array($what);
3677 2
    }
3678 1
3679 2
    if (is_array($what)) {
3680 1
      /** @noinspection ForeachSourceInspection */
3681 2
      foreach ($what as $item) {
3682
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3683 2
      }
3684
    }
3685
3686 2
    return $str;
3687
  }
3688
3689
  /**
3690
   * Remove invisible characters from a string.
3691
   *
3692 12
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3693 3
   *
3694
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3695
   *
3696
   * @param string $str
3697
   * @param bool   $url_encoded
3698
   * @param string $replacement
3699
   *
3700 12
   * @return string
3701 9
   */
3702
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3703
  {
3704
    // init
3705
    $non_displayables = array();
3706
3707
    // every control character except newline (dec 10),
3708
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3709
    if ($url_encoded) {
3710 6
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3711 6
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3712 6
    }
3713 6
3714 6
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3715 6
3716 6
    do {
3717 6
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3718 6
    } while ($count !== 0);
3719 6
3720 6
    return $str;
3721 6
  }
3722 6
3723 6
  /**
3724 6
   * Replace the diamond question mark (�) with the replacement.
3725 6
   *
3726 6
   * @param string $str
3727 6
   * @param string $unknown
3728 6
   *
3729 6
   * @return string
3730 6
   */
3731
  public static function replace_diamond_question_mark($str, $unknown = '?')
3732 6
  {
3733 6
    $str = (string)$str;
3734 6
3735
    if (!isset($str[0])) {
3736
      return '';
3737
    }
3738
3739
    $unknownHelper = $unknown;
3740
    if ($unknown === '') {
3741
      $unknownHelper = 'none';
3742
    }
3743
3744
    $save = \mb_substitute_character();
3745
    \mb_substitute_character($unknownHelper);
3746
    $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3747
    \mb_substitute_character($save);
3748
3749
    return str_replace(
3750
        array(
3751
            "\xEF\xBF\xBD",
3752
            '�',
3753
        ),
3754
        array(
3755
            $unknown,
3756
            $unknown,
3757
        ),
3758
        $str
3759
    );
3760
  }
3761
3762
  /**
3763
   * Strip whitespace or other characters from end of a UTF-8 string.
3764
   *
3765
   * @param string $str   <p>The string to be trimmed.</p>
3766
   * @param string $chars <p>Optional characters to be stripped.</p>
3767
   *
3768
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3769
   */
3770 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3771
  {
3772
    $str = (string)$str;
3773
3774
    if (!isset($str[0])) {
3775
      return '';
3776
    }
3777
3778 14
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3779
    if ($chars === INF || !$chars) {
3780 14
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3781
    }
3782
3783 14
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3784 14
  }
3785 1
3786 1
  /**
3787 13
   * rxClass
3788
   *
3789 14
   * @param string $s
3790
   * @param string $class
3791 14
   *
3792 14
   * @return string
3793
   */
3794 14
  private static function rxClass($s, $class = '')
3795
  {
3796
    static $rxClassCache = array();
3797
3798
    $cacheKey = $s . $class;
3799
3800
    if (isset($rxClassCache[$cacheKey])) {
3801
      return $rxClassCache[$cacheKey];
3802
    }
3803
3804
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3805
    $class = array($class);
3806 1
3807
    /** @noinspection SuspiciousLoopInspection */
3808 1
    foreach (self::str_split($s) as $s) {
3809
      if ('-' === $s) {
3810 1
        $class[0] = '-' . $class[0];
3811
      } elseif (!isset($s[2])) {
3812
        $class[0] .= preg_quote($s, '/');
3813
      } elseif (1 === self::strlen($s)) {
3814 1
        $class[0] .= $s;
3815
      } else {
3816 1
        $class[] = $s;
3817
      }
3818
    }
3819
3820 1
    if ($class[0]) {
3821 1
      $class[0] = '[' . $class[0] . ']';
3822
    }
3823
3824 1
    if (1 === count($class)) {
3825 1
      $return = $class[0];
3826 1
    } else {
3827 1
      $return = '(?:' . implode('|', $class) . ')';
3828
    }
3829 1
3830
    $rxClassCache[$cacheKey] = $return;
3831
3832 1
    return $return;
3833
  }
3834
3835 1
  /**
3836
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3837
   */
3838
  public static function showSupport()
3839
  {
3840
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3841
      self::checkForSupport();
3842
    }
3843
3844
    foreach (self::$support as $utf8Support) {
3845
      echo $utf8Support . "\n<br>";
3846
    }
3847
  }
3848
3849
  /**
3850
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3851 2
   *
3852
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3853 2
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3854
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3855
   *
3856 2
   * @return string <p>The HTML numbered entity.</p>
3857 2
   */
3858
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3859 2
  {
3860
    // init
3861 2
    $char = (string)$char;
3862 2
3863
    if (!isset($char[0])) {
3864 2
      return '';
3865
    }
3866
3867 2
    if (
3868 2
        $keepAsciiChars === true
3869 2
        &&
3870 2
        self::is_ascii($char) === true
3871 2
    ) {
3872
      return $char;
3873 2
    }
3874 2
3875 2
    if ($encoding !== 'UTF-8') {
3876 2
      $encoding = self::normalize_encoding($encoding);
3877 2
    }
3878 2
3879
    return '&#' . self::ord($char, $encoding) . ';';
3880 2
  }
3881 2
3882 2
  /**
3883 2
   * Convert a string to an array of Unicode characters.
3884 2
   *
3885 2
   * @param string  $str       <p>The string to split into array.</p>
3886
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3887 2
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3888
   *
3889
   * @return string[] <p>An array containing chunks of the string.</p>
3890 2
   */
3891
  public static function split($str, $length = 1, $cleanUtf8 = false)
3892
  {
3893
    $str = (string)$str;
3894
3895
    if (!isset($str[0])) {
3896
      return array();
3897
    }
3898
3899
    // init
3900
    $str = (string)$str;
3901
    $ret = array();
3902
3903
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3904
      self::checkForSupport();
3905
    }
3906
3907
    if (self::$support['pcre_utf8'] === true) {
3908
3909
      if ($cleanUtf8 === true) {
3910
        $str = self::clean($str);
3911 1
      }
3912
3913 1
      preg_match_all('/./us', $str, $retArray);
3914
      if (isset($retArray[0])) {
3915 1
        $ret = $retArray[0];
3916
      }
3917
      unset($retArray);
3918
3919
    } else {
3920
3921
      // fallback
3922
3923
      $len = strlen($str);
3924
3925
      /** @noinspection ForeachInvariantsInspection */
3926
      for ($i = 0; $i < $len; $i++) {
3927
        if (($str[$i] & "\x80") === "\x00") {
3928
          $ret[] = $str[$i];
3929
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3930
          if (($str[$i + 1] & "\xC0") === "\x80") {
3931
            $ret[] = $str[$i] . $str[$i + 1];
3932
3933
            $i++;
3934
          }
3935 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3936
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3937
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3938
3939
            $i += 2;
3940
          }
3941
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3942 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3943
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3944
3945
            $i += 3;
3946
          }
3947 12
        }
3948
      }
3949 12
    }
3950
3951
    if ($length > 1) {
3952
      $ret = array_chunk($ret, $length);
3953
3954
      return array_map(
3955
          function ($item) {
3956
            return implode('', $item);
3957
          }, $ret
3958
      );
3959 1
    }
3960
3961 1
    /** @noinspection OffsetOperationsInspection */
3962
    if (isset($ret[0]) && $ret[0] === '') {
3963 1
      return array();
3964
    }
3965 1
3966
    return $ret;
3967
  }
3968
3969
  /**
3970
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3971
   *
3972
   * @param string $str <p>The input string.</p>
3973
   *
3974
   * @return false|string <p>
3975
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3976
   *                      otherwise it will return false.
3977 1
   *                      </p>
3978
   */
3979 1
  public static function str_detect_encoding($str)
3980
  {
3981 1
    //
3982 1
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3983 1
    //
3984
3985 1
    if (self::is_binary($str)) {
3986 1
      if (self::is_utf16($str) === 1) {
3987 1
        return 'UTF-16LE';
3988 1
      } elseif (self::is_utf16($str) === 2) {
3989
        return 'UTF-16BE';
3990
      } elseif (self::is_utf32($str) === 1) {
3991 1
        return 'UTF-32LE';
3992
      } elseif (self::is_utf32($str) === 2) {
3993
        return 'UTF-32BE';
3994
      }
3995
    }
3996
3997
    //
3998
    // 2.) simple check for ASCII chars
3999
    //
4000
4001
    if (self::is_ascii($str) === true) {
4002 21
      return 'ASCII';
4003
    }
4004
4005 21
    //
4006 21
    // 3.) simple check for UTF-8 chars
4007
    //
4008 21
4009 1
    if (self::is_utf8($str) === true) {
4010
      return 'UTF-8';
4011
    }
4012 20
4013
    //
4014
    // 4.) check via "\mb_detect_encoding()"
4015
    //
4016 20
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4017 20
4018
    $detectOrder = array(
4019 20
        'ISO-8859-1',
4020 20
        'ISO-8859-2',
4021
        'ISO-8859-3',
4022
        'ISO-8859-4',
4023 1
        'ISO-8859-5',
4024 1
        'ISO-8859-6',
4025
        'ISO-8859-7',
4026
        'ISO-8859-8',
4027 1
        'ISO-8859-9',
4028 1
        'ISO-8859-10',
4029 1
        'ISO-8859-13',
4030 1
        'ISO-8859-14',
4031 1
        'ISO-8859-15',
4032
        'ISO-8859-16',
4033 1
        'WINDOWS-1251',
4034
        'WINDOWS-1252',
4035 1
        'WINDOWS-1254',
4036
        'ISO-2022-JP',
4037
        'JIS',
4038
        'EUC-JP',
4039
    );
4040
4041
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4042
    if ($encoding) {
4043
      return $encoding;
4044
    }
4045 1
4046
    //
4047 1
    // 5.) check via "iconv()"
4048
    //
4049 1
4050
    $md5 = md5($str);
4051 1
    foreach (self::$iconvEncoding as $encodingTmp) {
4052
      # INFO: //IGNORE and //TRANSLIT still throw notice
4053
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4054
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4055
        return $encodingTmp;
4056
      }
4057
    }
4058
4059
    return false;
4060
  }
4061
4062
  /**
4063
   * Check if the string ends with the given substring.
4064
   *
4065 7
   * @param string $haystack <p>The string to search in.</p>
4066
   * @param string $needle   <p>The substring to search for.</p>
4067 7
   *
4068
   * @return bool
4069
   */
4070 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4071
  {
4072
    $haystack = (string)$haystack;
4073
    $needle = (string)$needle;
4074
4075
    if (!isset($haystack[0], $needle[0])) {
4076
      return false;
4077
    }
4078
4079
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4080
      return true;
4081
    }
4082
4083 1
    return false;
4084
  }
4085 1
4086 1
  /**
4087
   * Check if the string ends with the given substring, case insensitive.
4088 1
   *
4089
   * @param string $haystack <p>The string to search in.</p>
4090 1
   * @param string $needle   <p>The substring to search for.</p>
4091
   *
4092 1
   * @return bool
4093 1
   */
4094 1 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4095 1
  {
4096
    $haystack = (string)$haystack;
4097 1
    $needle = (string)$needle;
4098
4099 1
    if (!isset($haystack[0], $needle[0])) {
4100 1
      return false;
4101 1
    }
4102 1
4103 1
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4104 1
      return true;
4105
    }
4106 1
4107
    return false;
4108 1
  }
4109
4110
  /**
4111
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4112 1
   *
4113
   * @link  http://php.net/manual/en/function.str-ireplace.php
4114
   *
4115
   * @param mixed $search  <p>
4116
   *                       Every replacement with search array is
4117
   *                       performed on the result of previous replacement.
4118
   *                       </p>
4119
   * @param mixed $replace <p>
4120
   *                       </p>
4121
   * @param mixed $subject <p>
4122
   *                       If subject is an array, then the search and
4123
   *                       replace is performed with every entry of
4124
   *                       subject, and the return value is an array as
4125
   *                       well.
4126
   *                       </p>
4127
   * @param int   $count   [optional] <p>
4128
   *                       The number of matched and replaced needles will
4129 9
   *                       be returned in count which is passed by
4130
   *                       reference.
4131 9
   *                       </p>
4132
   *
4133
   * @return mixed <p>A string or an array of replacements.</p>
4134
   */
4135
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4136
  {
4137
    $search = (array)$search;
4138
4139
    /** @noinspection AlterInForeachInspection */
4140
    foreach ($search as &$s) {
4141
      if ('' === $s .= '') {
4142
        $s = '/^(?<=.)$/';
4143
      } else {
4144
        $s = '/' . preg_quote($s, '/') . '/ui';
4145
      }
4146
    }
4147 1
4148
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4149 1
    $count = $replace; // used as reference parameter
4150
4151
    return $subject;
4152
  }
4153
4154
  /**
4155
   * Check if the string starts with the given substring, case insensitive.
4156
   *
4157
   * @param string $haystack <p>The string to search in.</p>
4158
   * @param string $needle   <p>The substring to search for.</p>
4159
   *
4160
   * @return bool
4161
   */
4162 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4163
  {
4164 12
    $haystack = (string)$haystack;
4165
    $needle = (string)$needle;
4166 12
4167 11
    if (!isset($haystack[0], $needle[0])) {
4168 11
      return false;
4169 12
    }
4170
4171
    if (self::stripos($haystack, $needle) === 0) {
4172
      return true;
4173
    }
4174
4175
    return false;
4176
  }
4177
4178
  /**
4179
   * Limit the number of characters in a string, but also after the next word.
4180
   *
4181
   * @param string $str
4182 9
   * @param int    $length
4183
   * @param string $strAddOn
4184 9
   *
4185 1
   * @return string
4186
   */
4187
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4188 8
  {
4189 2
    $str = (string)$str;
4190 2
4191
    if (!isset($str[0])) {
4192 8
      return '';
4193 8
    }
4194 1
4195
    $length = (int)$length;
4196
4197 7
    if (self::strlen($str) <= $length) {
4198
      return $str;
4199 7
    }
4200
4201
    if (self::substr($str, $length - 1, 1) === ' ') {
4202 1
      return self::substr($str, 0, $length - 1) . $strAddOn;
4203
    }
4204
4205
    $str = self::substr($str, 0, $length);
4206
    $array = explode(' ', $str);
4207
    array_pop($array);
4208
    $new_str = implode(' ', $array);
4209
4210
    if ($new_str === '') {
4211
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4212
    } else {
4213
      $str = $new_str . $strAddOn;
4214
    }
4215
4216
    return $str;
4217
  }
4218 1
4219
  /**
4220 1
   * Pad a UTF-8 string to given length with another string.
4221
   *
4222
   * @param string $str        <p>The input string.</p>
4223
   * @param int    $pad_length <p>The length of return string.</p>
4224
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4225
   * @param int    $pad_type   [optional] <p>
4226
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4227
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4228
   *                           </p>
4229
   *
4230
   * @return string <strong>Returns the padded string</strong>
4231
   */
4232 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4233
  {
4234 2
    $str_length = self::strlen($str);
4235 2
4236
    if (
4237 2
        is_int($pad_length) === true
4238 2
        &&
4239 2
        $pad_length > 0
4240
        &&
4241 2
        $pad_length >= $str_length
4242 2
    ) {
4243
      $ps_length = self::strlen($pad_string);
4244
4245
      $diff = $pad_length - $str_length;
4246
4247
      switch ($pad_type) {
4248 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4249
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4250
          $pre = self::substr($pre, 0, $diff);
4251
          $post = '';
4252 3
          break;
4253
4254 3
        case STR_PAD_BOTH:
4255 3
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4256 3
          $pre = self::substr($pre, 0, (int)$diff / 2);
4257
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4258 3
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4259
          break;
4260 3
4261
        case STR_PAD_RIGHT:
4262 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4263
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4264
          $post = self::substr($post, 0, $diff);
4265
          $pre = '';
4266
      }
4267
4268
      return $pre . $str . $post;
4269
    }
4270
4271
    return $str;
4272
  }
4273
4274
  /**
4275
   * Repeat a string.
4276
   *
4277
   * @param string $str        <p>
4278
   *                           The string to be repeated.
4279
   *                           </p>
4280
   * @param int    $multiplier <p>
4281
   *                           Number of time the input string should be
4282 2
   *                           repeated.
4283
   *                           </p>
4284
   *                           <p>
4285 2
   *                           multiplier has to be greater than or equal to 0.
4286
   *                           If the multiplier is set to 0, the function
4287 2
   *                           will return an empty string.
4288
   *                           </p>
4289
   *
4290
   * @return string <p>The repeated string.</p>
4291
   */
4292
  public static function str_repeat($str, $multiplier)
4293
  {
4294
    $str = self::filter($str);
4295
4296
    return str_repeat($str, $multiplier);
4297
  }
4298
4299
  /**
4300
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4301
   *
4302
   * Replace all occurrences of the search string with the replacement string
4303
   *
4304
   * @link http://php.net/manual/en/function.str-replace.php
4305
   *
4306
   * @param mixed $search  <p>
4307
   *                       The value being searched for, otherwise known as the needle.
4308
   *                       An array may be used to designate multiple needles.
4309
   *                       </p>
4310
   * @param mixed $replace <p>
4311
   *                       The replacement value that replaces found search
4312
   *                       values. An array may be used to designate multiple replacements.
4313
   *                       </p>
4314 8
   * @param mixed $subject <p>
4315
   *                       The string or array being searched and replaced on,
4316 8
   *                       otherwise known as the haystack.
4317 8
   *                       </p>
4318
   *                       <p>
4319 8
   *                       If subject is an array, then the search and
4320 3
   *                       replace is performed with every entry of
4321
   *                       subject, and the return value is an array as
4322
   *                       well.
4323 7
   *                       </p>
4324 1
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4325 1
   *
4326 1
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4327
   */
4328
  public static function str_replace($search, $replace, $subject, &$count = null)
4329
  {
4330 7
    return str_replace($search, $replace, $subject, $count);
4331 1
  }
4332 7
4333 7
  /**
4334 7
   * Shuffles all the characters in the string.
4335
   *
4336
   * @param string $str <p>The input string</p>
4337
   *
4338 7
   * @return string <p>The shuffled string.</p>
4339
   */
4340
  public static function str_shuffle($str)
4341
  {
4342
    $array = self::split($str);
4343
4344
    shuffle($array);
4345
4346
    return implode('', $array);
4347
  }
4348
4349
  /**
4350
   * Sort all characters according to code points.
4351
   *
4352
   * @param string $str    <p>A UTF-8 string.</p>
4353
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4354
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4355 8
   *
4356
   * @return string <p>String of sorted characters.</p>
4357 8
   */
4358 2
  public static function str_sort($str, $unique = false, $desc = false)
4359
  {
4360
    $array = self::codepoints($str);
4361 6
4362
    if ($unique) {
4363
      $array = array_flip(array_flip($array));
4364
    }
4365 6
4366
    if ($desc) {
4367
      arsort($array);
4368
    } else {
4369
      asort($array);
4370
    }
4371
4372 6
    return self::string($array);
4373
  }
4374
4375
  /**
4376
   * Split a string into an array.
4377
   *
4378
   * @param string $str
4379
   * @param int    $len
4380
   *
4381
   * @return array
4382
   */
4383
  public static function str_split($str, $len = 1)
4384
  {
4385
    // init
4386
    $len = (int)$len;
4387 62
    $str = (string)$str;
4388
4389 62
    if (!isset($str[0])) {
4390
      return array();
4391 62
    }
4392 4
4393
    if ($len < 1) {
4394
      return str_split($str, $len);
4395
    }
4396
4397 61
    /** @noinspection PhpInternalEntityUsedInspection */
4398 2
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4399 61
    $a = $a[0];
4400 60
4401 60
    if ($len === 1) {
4402 2
      return $a;
4403
    }
4404
4405
    $arrayOutput = array();
4406 61
    $p = -1;
4407 61
4408 1
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4409
    foreach ($a as $l => $a) {
4410
      if ($l % $len) {
4411 61
        $arrayOutput[$p] .= $a;
4412 2
      } else {
4413 2
        $arrayOutput[++$p] = $a;
4414
      }
4415 61
    }
4416
4417
    return $arrayOutput;
4418
  }
4419
4420
  /**
4421
   * Check if the string starts with the given substring.
4422
   *
4423
   * @param string $haystack <p>The string to search in.</p>
4424
   * @param string $needle   <p>The substring to search for.</p>
4425
   *
4426
   * @return bool
4427
   */
4428 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4429
  {
4430 1
    $haystack = (string)$haystack;
4431
    $needle = (string)$needle;
4432 1
4433
    if (!isset($haystack[0], $needle[0])) {
4434
      return false;
4435
    }
4436
4437
    if (self::strpos($haystack, $needle) === 0) {
4438
      return true;
4439
    }
4440
4441
    return false;
4442
  }
4443
4444
  /**
4445
   * Get a binary representation of a specific string.
4446
   *
4447
   * @param string $str <p>The input string.</p>
4448
   *
4449 2
   * @return string
4450
   */
4451 2
  public static function str_to_binary($str)
4452
  {
4453
    $str = (string)$str;
4454
4455
    $value = unpack('H*', $str);
4456
4457
    return base_convert($value[1], 16, 2);
4458
  }
4459
4460
  /**
4461
   * Convert a string into an array of words.
4462
   *
4463
   * @param string $str
4464
   * @param string $charlist
4465
   *
4466
   * @return array
4467 1
   */
4468
  public static function str_to_words($str, $charlist = '')
4469 1
  {
4470
    $str = (string)$str;
4471
4472
    if (!isset($str[0])) {
4473
      return array('');
4474
    }
4475
4476
    $charlist = self::rxClass($charlist, '\pL');
4477
4478
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4479
  }
4480
4481
  /**
4482
   * alias for "UTF8::to_ascii()"
4483
   *
4484
   * @see UTF8::to_ascii()
4485 2
   *
4486
   * @param string $str
4487 2
   * @param string $unknown
4488 2
   * @param bool   $strict
4489
   *
4490 2
   * @return string
4491
   */
4492
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4493
  {
4494
    return self::to_ascii($str, $unknown, $strict);
4495
  }
4496
4497
  /**
4498
   * Counts number of words in the UTF-8 string.
4499
   *
4500
   * @param string $str      <p>The input string.</p>
4501
   * @param int    $format   [optional] <p>
4502
   *                         <strong>0</strong> => return a number of words (default)<br />
4503 1
   *                         <strong>1</strong> => return an array of words<br />
4504
   *                         <strong>2</strong> => return an array of words with word-offset as key
4505 1
   *                         </p>
4506 1
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4507
   *
4508 1
   * @return array|int <p>The number of words in the string</p>
4509 1
   */
4510
  public static function str_word_count($str, $format = 0, $charlist = '')
4511
  {
4512 1
    $strParts = self::str_to_words($str, $charlist);
4513 1
4514
    $len = count($strParts);
4515 1
4516
    if ($format === 1) {
4517
4518
      $numberOfWords = array();
4519
      for ($i = 1; $i < $len; $i += 2) {
4520
        $numberOfWords[] = $strParts[$i];
4521
      }
4522
4523
    } elseif ($format === 2) {
4524
4525
      $numberOfWords = array();
4526
      $offset = self::strlen($strParts[0]);
4527
      for ($i = 1; $i < $len; $i += 2) {
4528
        $numberOfWords[$offset] = $strParts[$i];
4529
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4530
      }
4531
4532
    } else {
4533
4534
      $numberOfWords = ($len - 1) / 2;
4535 15
4536
    }
4537 15
4538 15
    return $numberOfWords;
4539
  }
4540 15
4541 2
  /**
4542
   * Case-insensitive string comparison.
4543
   *
4544
   * INFO: Case-insensitive version of UTF8::strcmp()
4545 14
   *
4546
   * @param string $str1
4547
   * @param string $str2
4548
   *
4549 14
   * @return int <p>
4550
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4551
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4552
   *             <strong>0</strong> if they are equal.
4553 14
   *             </p>
4554
   */
4555
  public static function strcasecmp($str1, $str2)
4556 2
  {
4557 2
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4558 2
  }
4559
4560 14
  /**
4561
   * alias for "UTF8::strstr()"
4562
   *
4563
   * @see UTF8::strstr()
4564
   *
4565
   * @param string  $haystack
4566 14
   * @param string  $needle
4567 2
   * @param bool    $before_needle
4568 14
   * @param string  $encoding
4569 14
   * @param boolean $cleanUtf8
4570 14
   *
4571 1
   * @return string|false
4572
   */
4573
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4574 14
  {
4575 14
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4576
  }
4577
4578
  /**
4579
   * Case-sensitive string comparison.
4580
   *
4581
   * @param string $str1
4582
   * @param string $str2
4583
   *
4584
   * @return int  <p>
4585
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4586
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4587
   *              <strong>0</strong> if they are equal.
4588
   *              </p>
4589
   */
4590
  public static function strcmp($str1, $str2)
4591
  {
4592
    /** @noinspection PhpUndefinedClassInspection */
4593
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4594
        \Normalizer::normalize($str1, \Normalizer::NFD),
4595
        \Normalizer::normalize($str2, \Normalizer::NFD)
4596
    );
4597
  }
4598
4599
  /**
4600
   * Find length of initial segment not matching mask.
4601
   *
4602
   * @param string $str
4603
   * @param string $charList
4604
   * @param int    $offset
4605
   * @param int    $length
4606
   *
4607
   * @return int|null
4608
   */
4609
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4610
  {
4611
    if ('' === $charList .= '') {
4612
      return null;
4613
    }
4614
4615
    if ($offset || 2147483647 !== $length) {
4616
      $str = (string)self::substr($str, $offset, $length);
4617
    }
4618
4619
    $str = (string)$str;
4620 1
    if (!isset($str[0])) {
4621
      return null;
4622 1
    }
4623 1
4624 1
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4625
      /** @noinspection OffsetOperationsInspection */
4626 1
      return self::strlen($length[1]);
4627
    }
4628
4629
    return self::strlen($str);
4630
  }
4631
4632
  /**
4633 1
   * alias for "UTF8::stristr()"
4634
   *
4635
   * @see UTF8::stristr()
4636
   *
4637
   * @param string  $haystack
4638
   * @param string  $needle
4639
   * @param bool    $before_needle
4640
   * @param string  $encoding
4641
   * @param boolean $cleanUtf8
4642
   *
4643 4
   * @return string|false
4644
   */
4645 4
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4646
  {
4647 4
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4648 2
  }
4649
4650
  /**
4651 3
   * Create a UTF-8 string from code points.
4652
   *
4653
   * INFO: opposite to UTF8::codepoints()
4654
   *
4655
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4656
   *
4657
   * @return string <p>UTF-8 encoded string.</p>
4658
   */
4659
  public static function string(array $array)
4660
  {
4661
    return implode(
4662
        '',
4663
        array_map(
4664
            array(
4665
                '\\voku\\helper\\UTF8',
4666
                'chr',
4667
            ),
4668
            $array
4669
        )
4670
    );
4671
  }
4672
4673
  /**
4674
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4675
   *
4676
   * @param string $str <p>The input string.</p>
4677 1
   *
4678
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4679 1
   */
4680 1
  public static function string_has_bom($str)
4681 1
  {
4682
    foreach (self::$bom as $bomString => $bomByteLength) {
4683 1
      if (0 === strpos($str, $bomString)) {
4684
        return true;
4685
      }
4686
    }
4687
4688
    return false;
4689
  }
4690 1
4691
  /**
4692
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4693
   *
4694
   * @link http://php.net/manual/en/function.strip-tags.php
4695
   *
4696
   * @param string  $str            <p>
4697
   *                                The input string.
4698
   *                                </p>
4699
   * @param string  $allowable_tags [optional] <p>
4700
   *                                You can use the optional second parameter to specify tags which should
4701
   *                                not be stripped.
4702
   *                                </p>
4703
   *                                <p>
4704
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4705
   *                                can not be changed with allowable_tags.
4706
   *                                </p>
4707 1
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4708
   *
4709 1
   * @return string <p>The stripped string.</p>
4710
   */
4711
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4712
  {
4713
    if ($cleanUtf8) {
4714
      $str = self::clean($str);
4715
    }
4716
4717
    return strip_tags($str, $allowable_tags);
4718
  }
4719
4720
  /**
4721
   * Finds position of first occurrence of a string within another, case insensitive.
4722
   *
4723
   * @link http://php.net/manual/en/function.mb-stripos.php
4724
   *
4725
   * @param string  $haystack  <p>
4726
   *                           The string from which to get the position of the first occurrence
4727
   *                           of needle
4728
   *                           </p>
4729 11
   * @param string  $needle    <p>
4730
   *                           The string to find in haystack
4731 11
   *                           </p>
4732
   * @param int     $offset    [optional] <p>
4733 11
   *                           The position in haystack
4734 2
   *                           to start searching
4735 2
   *                           </p>
4736
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4737 11
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4738
   *
4739 11
   * @return int|false <p>
4740 2
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4741
   *                   or false if needle is not found.
4742
   *                   </p>
4743
   */
4744 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4745 10
  {
4746
    $haystack = (string)$haystack;
4747
    $needle = (string)$needle;
4748
4749 10
    if (!isset($haystack[0], $needle[0])) {
4750
      return false;
4751 10
    }
4752
4753
    if ($cleanUtf8 === true) {
4754 3
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4755 3
      // if invalid characters are found in $haystack before $needle
4756 3
      $haystack = self::clean($haystack);
4757
      $needle = self::clean($needle);
4758 10
    }
4759
4760 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4761
        $encoding === 'UTF-8'
4762
        ||
4763
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4764 10
    ) {
4765 1
      $encoding = 'UTF-8';
4766 10
    } else {
4767 10
      $encoding = self::normalize_encoding($encoding);
4768 10
    }
4769 1
4770
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4771
      self::checkForSupport();
4772
    }
4773
4774 10
    if (
4775 10
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4776 10
        &&
4777 10
        self::$support['intl'] === true
4778
        &&
4779
        Bootup::is_php('5.4')
4780
    ) {
4781
      return \grapheme_stripos($haystack, $needle, $offset);
4782
    }
4783
4784
    // fallback to "mb_"-function via polyfill
4785
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4786
  }
4787
4788
  /**
4789
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4790
   *
4791
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4792
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4793
   * @param bool    $before_needle [optional] <p>
4794
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4795
   *                               haystack before the first occurrence of the needle (excluding the needle).
4796
   *                               </p>
4797
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4798
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4799
   *
4800
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4801
   */
4802
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4803
  {
4804
    $haystack = (string)$haystack;
4805
    $needle = (string)$needle;
4806
4807
    if (!isset($haystack[0], $needle[0])) {
4808
      return false;
4809
    }
4810
4811
    if ($encoding !== 'UTF-8') {
4812
      $encoding = self::normalize_encoding($encoding);
4813 10
    }
4814
4815
    if ($cleanUtf8 === true) {
4816 10
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4817 10
      // if invalid characters are found in $haystack before $needle
4818
      $needle = self::clean($needle);
4819 10
      $haystack = self::clean($haystack);
4820 2
    }
4821 2
4822
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4823 10
      self::checkForSupport();
4824 10
    }
4825 2
4826 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4827
        $encoding !== 'UTF-8'
4828 8
        &&
4829
        self::$support['mbstring'] === false
4830
    ) {
4831
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4832
    }
4833
4834
    if (self::$support['mbstring'] === true) {
4835
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4836
    }
4837
4838
    if (self::$support['intl'] === true) {
4839
      return \grapheme_stristr($haystack, $needle, $before_needle);
4840
    }
4841
4842
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4843
4844
    if (!isset($match[1])) {
4845 2
      return false;
4846
    }
4847 2
4848
    if ($before_needle) {
4849
      return $match[1];
4850
    }
4851
4852
    return self::substr($haystack, self::strlen($match[1]));
4853
  }
4854 2
4855 1
  /**
4856 1
   * Get the string length, not the byte-length!
4857
   *
4858
   * @link     http://php.net/manual/en/function.mb-strlen.php
4859
   *
4860 2
   * @param string  $str       <p>The string being checked for length.</p>
4861 2
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4862 2
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4863 2
   *
4864
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4865
   *             character counted as +1)</p>
4866
   */
4867
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4868
  {
4869
    $str = (string)$str;
4870
4871
    if (!isset($str[0])) {
4872
      return 0;
4873
    }
4874
4875 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4876
        $encoding === 'UTF-8'
4877
        ||
4878
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4879
    ) {
4880
      $encoding = 'UTF-8';
4881
    } else {
4882 11
      $encoding = self::normalize_encoding($encoding);
4883
    }
4884 11
4885 11
    switch ($encoding) {
4886 11
      case 'ASCII':
4887
      case 'CP850':
4888 11
        return strlen($str);
4889 1
    }
4890 1
4891 1
    if ($cleanUtf8 === true) {
4892
      $str = self::clean($str);
4893 11
    }
4894
4895 11
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4896
      self::checkForSupport();
4897 11
    }
4898 1
4899 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4900
        $encoding !== 'UTF-8'
4901
        &&
4902 11
        self::$support['mbstring'] === false
4903 11
    ) {
4904
      trigger_error('UTF8::strlen() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4905 11
    }
4906
4907 11
    if (self::$support['mbstring'] === true) {
4908
      return \mb_strlen($str, $encoding);
4909
    }
4910
4911
    if (self::$support['iconv'] === true) {
4912
      $returnTmp = \iconv_strlen($str, $encoding);
4913
      if ($returnTmp !== false) {
4914
        return $returnTmp;
4915
      }
4916
    }
4917
4918
    if (self::$support['intl'] === true) {
4919
      $returnTmp = \grapheme_strlen($str);
4920
      if ($returnTmp !== null) {
4921 21
        return $returnTmp;
4922
      }
4923
    }
4924 21
4925
    // fallback via vanilla php
4926 21
    preg_match_all('/./us', $str, $parts);
4927 6
    $returnTmp = count($parts[0]);
4928
    if ($returnTmp !== 0) {
4929
      return $returnTmp;
4930 19
    }
4931
4932
    // fallback to "mb_"-function via polyfill
4933
    return \mb_strlen($str);
4934
  }
4935
4936 19
  /**
4937 2
   * Case insensitive string comparisons using a "natural order" algorithm.
4938 2
   *
4939
   * INFO: natural order version of UTF8::strcasecmp()
4940 19
   *
4941
   * @param string $str1 <p>The first string.</p>
4942
   * @param string $str2 <p>The second string.</p>
4943
   *
4944
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
4945
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
4946
   *             <strong>0</strong> if they are equal
4947
   */
4948
  public static function strnatcasecmp($str1, $str2)
4949
  {
4950 3
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4951
  }
4952 3
4953
  /**
4954
   * String comparisons using a "natural order" algorithm
4955
   *
4956
   * INFO: natural order version of UTF8::strcmp()
4957
   *
4958
   * @link  http://php.net/manual/en/function.strnatcmp.php
4959
   *
4960
   * @param string $str1 <p>The first string.</p>
4961
   * @param string $str2 <p>The second string.</p>
4962
   *
4963
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4964
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
4965
   *             <strong>0</strong> if they are equal
4966 16
   */
4967
  public static function strnatcmp($str1, $str2)
4968 16
  {
4969
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4970 16
  }
4971 2
4972
  /**
4973
   * Case-insensitive string comparison of the first n characters.
4974 15
   *
4975
   * @link  http://php.net/manual/en/function.strncasecmp.php
4976
   *
4977
   * @param string $str1 <p>The first string.</p>
4978
   * @param string $str2 <p>The second string.</p>
4979
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4980 15
   *
4981 2
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4982 2
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4983
   *             <strong>0</strong> if they are equal
4984 15
   */
4985
  public static function strncasecmp($str1, $str2, $len)
4986
  {
4987
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4988
  }
4989
4990
  /**
4991
   * String comparison of the first n characters.
4992
   *
4993
   * @link  http://php.net/manual/en/function.strncmp.php
4994
   *
4995
   * @param string $str1 <p>The first string.</p>
4996
   * @param string $str2 <p>The second string.</p>
4997
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4998
   *
4999
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5000
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5001 1
   *             <strong>0</strong> if they are equal
5002
   */
5003 1
  public static function strncmp($str1, $str2, $len)
5004 1
  {
5005 1
    $str1 = self::substr($str1, 0, $len);
5006 1
    $str2 = self::substr($str2, 0, $len);
5007 1
5008
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5005 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5006 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5009 1
  }
5010 1
5011 1
  /**
5012 1
   * Search a string for any of a set of characters.
5013 1
   *
5014
   * @link  http://php.net/manual/en/function.strpbrk.php
5015 1
   *
5016 1
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5017
   * @param string $char_list <p>This parameter is case sensitive.</p>
5018 1
   *
5019
   * @return string String starting from the character found, or false if it is not found.
5020
   */
5021
  public static function strpbrk($haystack, $char_list)
5022
  {
5023
    $haystack = (string)$haystack;
5024
    $char_list = (string)$char_list;
5025
5026
    if (!isset($haystack[0], $char_list[0])) {
5027
      return false;
5028
    }
5029
5030 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5031
      return substr($haystack, strpos($haystack, $m[0]));
5032 1
    } else {
5033 1
      return false;
5034 1
    }
5035
  }
5036 1
5037
  /**
5038
   * Find position of first occurrence of string in a string.
5039
   *
5040 1
   * @link http://php.net/manual/en/function.mb-strpos.php
5041 1
   *
5042
   * @param string  $haystack  <p>The string being checked.</p>
5043 1
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5044
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5045
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5046
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5047
   *
5048
   * @return int|false <p>
5049
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5050
   *                   If needle is not found it returns false.
5051
   *                   </p>
5052
   */
5053
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5054
  {
5055
    $haystack = (string)$haystack;
5056
    $needle = (string)$needle;
5057
5058
    if (!isset($haystack[0], $needle[0])) {
5059 47
      return false;
5060
    }
5061
5062 47
    // init
5063
    $offset = (int)$offset;
5064 47
5065 9
    // iconv and mbstring do not support integer $needle
5066
5067
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5068 45
      $needle = (string)self::chr($needle);
5069
    }
5070
5071
    if ($cleanUtf8 === true) {
5072 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5073 1
      // if invalid characters are found in $haystack before $needle
5074
      $needle = self::clean($needle);
5075 45
      $haystack = self::clean($haystack);
5076 45
    }
5077 37
5078 37 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5079
        $encoding === 'UTF-8'
5080 45
        ||
5081 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5082
    ) {
5083
      $encoding = 'UTF-8';
5084 43
    } else {
5085 20
      $encoding = self::normalize_encoding($encoding);
5086 20
    }
5087 41
5088
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5089
      self::checkForSupport();
5090 43
    }
5091
5092 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5093
        $encoding !== 'UTF-8'
5094
        &&
5095
        self::$support['mbstring'] === false
5096 43
    ) {
5097 2
      trigger_error('UTF8::strpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5098 43
    }
5099 43
5100 43
    if (self::$support['mbstring'] === true) {
5101 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5102
    }
5103
5104 43
    if (
5105 43
        $offset >= 0 // iconv_strpos() can't handle negative offset
5106
        &&
5107
        self::$support['iconv'] === true
5108
    ) {
5109
      // ignore invalid negative offset to keep compatibility
5110
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5111
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5112
    }
5113
5114 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5115
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5116
      if ($returnTmp !== false) {
5117
        return $returnTmp;
5118
      }
5119
    }
5120
5121
    // fallback via vanilla php
5122
5123
    $haystack = self::substr($haystack, $offset);
5124
5125
    if ($offset < 0) {
5126
      $offset = 0;
5127
    }
5128
5129
    $pos = strpos($haystack, $needle);
5130
    if ($pos === false) {
5131
      return false;
5132
    }
5133
5134
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5135 1
    if ($returnTmp !== false) {
5136
      return $returnTmp;
5137 1
    }
5138 1
5139
    // fallback to "mb_"-function via polyfill
5140 1
    return \mb_strpos($haystack, $needle, $offset);
5141
  }
5142
5143
  /**
5144
   * Finds the last occurrence of a character in a string within another.
5145
   *
5146
   * @link http://php.net/manual/en/function.mb-strrchr.php
5147
   *
5148
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5149
   * @param string $needle        <p>The string to find in haystack</p>
5150
   * @param bool   $before_needle [optional] <p>
5151
   *                              Determines which portion of haystack
5152
   *                              this function returns.
5153
   *                              If set to true, it returns all of haystack
5154
   *                              from the beginning to the last occurrence of needle.
5155
   *                              If set to false, it returns all of haystack
5156
   *                              from the last occurrence of needle to the end,
5157
   *                              </p>
5158
   * @param string $encoding      [optional] <p>
5159
   *                              Character encoding name to use.
5160
   *                              If it is omitted, internal character encoding is used.
5161 1
   *                              </p>
5162
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5163 1
   *
5164 1
   * @return string|false The portion of haystack or false if needle is not found.
5165
   */
5166 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5167 1
  {
5168
    if ($encoding !== 'UTF-8') {
5169
      $encoding = self::normalize_encoding($encoding);
5170 1
    }
5171 1
5172 1
    if ($cleanUtf8 === true) {
5173
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5174 1
      // if invalid characters are found in $haystack before $needle
5175 1
      $needle = self::clean($needle);
5176
      $haystack = self::clean($haystack);
5177
    }
5178 1
5179 1
    // fallback to "mb_"-function via polyfill
5180
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5181 1
  }
5182 1
5183 1
  /**
5184
   * Reverses characters order in the string.
5185 1
   *
5186
   * @param string $str The input string
5187
   *
5188
   * @return string The string with characters in the reverse sequence
5189
   */
5190
  public static function strrev($str)
5191
  {
5192 1
    $str = (string)$str;
5193
5194
    if (!isset($str[0])) {
5195
      return '';
5196
    }
5197
5198
    return implode('', array_reverse(self::split($str)));
5199
  }
5200
5201
  /**
5202
   * Finds the last occurrence of a character in a string within another, case insensitive.
5203
   *
5204
   * @link http://php.net/manual/en/function.mb-strrichr.php
5205
   *
5206
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5207 6
   * @param string  $needle        <p>The string to find in haystack.</p>
5208
   * @param bool    $before_needle [optional] <p>
5209 6
   *                               Determines which portion of haystack
5210 1
   *                               this function returns.
5211
   *                               If set to true, it returns all of haystack
5212
   *                               from the beginning to the last occurrence of needle.
5213 1
   *                               If set to false, it returns all of haystack
5214 1
   *                               from the last occurrence of needle to the end,
5215 1
   *                               </p>
5216 1
   * @param string  $encoding      [optional] <p>
5217
   *                               Character encoding name to use.
5218
   *                               If it is omitted, internal character encoding is used.
5219
   *                               </p>
5220 1
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5221 1
   *
5222 1
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5223 1
   */
5224 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5225 1
  {
5226 1
    if ($encoding !== 'UTF-8') {
5227 1
      $encoding = self::normalize_encoding($encoding);
5228
    }
5229
5230
    if ($cleanUtf8 === true) {
5231 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5232 1
      // if invalid characters are found in $haystack before $needle
5233 1
      $needle = self::clean($needle);
5234 1
      $haystack = self::clean($haystack);
5235 1
    }
5236 1
5237 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5238 1
  }
5239
5240
  /**
5241 1
   * Find position of last occurrence of a case-insensitive string.
5242 1
   *
5243 1
   * @param string  $haystack  <p>The string to look in.</p>
5244 1
   * @param string  $needle    <p>The string to look for.</p>
5245
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5246
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5247
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5248 1
   *
5249
   * @return int|false <p>
5250 6
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5251 1
   *                   not found, it returns false.
5252 1
   *                   </p>
5253 1
   */
5254 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5255
  {
5256 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5257
      $needle = (string)self::chr($needle);
5258
    }
5259 6
5260 6
    // init
5261
    $haystack = (string)$haystack;
5262 6
    $needle = (string)$needle;
5263 4
    $offset = (int)$offset;
5264 4
5265
    if (!isset($haystack[0], $needle[0])) {
5266 6
      return false;
5267
    }
5268 6
5269 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5270
        $cleanUtf8 === true
5271
        ||
5272
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5273
    ) {
5274
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5275
5276
      $needle = self::clean($needle);
5277
      $haystack = self::clean($haystack);
5278
    }
5279
5280 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5281
        $encoding === 'UTF-8'
5282 1
        ||
5283
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5284 1
    ) {
5285 1
      $encoding = 'UTF-8';
5286
    } else {
5287
      $encoding = self::normalize_encoding($encoding);
5288 1
    }
5289 1
5290 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5291
      self::checkForSupport();
5292 1
    }
5293
5294 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5295 1
        $encoding !== 'UTF-8'
5296 1
        &&
5297
        self::$support['mbstring'] === false
5298 1
    ) {
5299 1
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5300
    }
5301 1
5302
    if (self::$support['mbstring'] === true) {
5303 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5304 1
    }
5305
5306 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5307
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5308 1
      if ($returnTmp !== false) {
5309
        return $returnTmp;
5310 1
      }
5311
    }
5312 1
5313
    // fallback via vanilla php
5314
5315
    return self::strrpos(self::strtoupper($haystack, $encoding), self::strtoupper($needle, $encoding), $offset, $encoding, $cleanUtf8);
5316
  }
5317
5318
  /**
5319
   * Find position of last occurrence of a string in a string.
5320
   *
5321
   * @link http://php.net/manual/en/function.mb-strrpos.php
5322
   *
5323
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5324
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5325
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5326 7
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5327
   *                              the end of the string.
5328 7
   *                              </p>
5329
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5330
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5331
   *
5332
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5333
   *                   is not found, it returns false.</p>
5334
   */
5335
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5336
  {
5337
    if ((int)$needle === $needle && $needle >= 0) {
5338
      $needle = (string)self::chr($needle);
5339
    }
5340 1
5341
    // init
5342 1
    $haystack = (string)$haystack;
5343
    $needle = (string)$needle;
5344
    $offset = (int)$offset;
5345
5346
    if (!isset($haystack[0], $needle[0])) {
5347
      return false;
5348
    }
5349
5350 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5351
        $cleanUtf8 === true
5352
        ||
5353
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5354 1
    ) {
5355
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5356 1
      $needle = self::clean($needle);
5357
      $haystack = self::clean($haystack);
5358
    }
5359
5360 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5361
        $encoding === 'UTF-8'
5362
        ||
5363
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5364
    ) {
5365
      $encoding = 'UTF-8';
5366
    } else {
5367
      $encoding = self::normalize_encoding($encoding);
5368 1
    }
5369
5370 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5371
      self::checkForSupport();
5372
    }
5373
5374 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5375
        $encoding !== 'UTF-8'
5376
        &&
5377
        self::$support['mbstring'] === false
5378
    ) {
5379
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5380
    }
5381
5382 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5383
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5384
      if ($returnTmp !== false) {
5385 13
        return $returnTmp;
5386
      }
5387 13
    }
5388
5389 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5390 13
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5391
      if ($returnTmp !== false) {
5392 13
        return $returnTmp;
5393 3
      }
5394
    }
5395
5396 11
    // fallback via vanilla php
5397
5398
    if ($offset > 0) {
5399 11
      $haystack = self::substr($haystack, $offset);
5400 7
    } elseif ($offset < 0) {
5401
      $haystack = self::substr($haystack, 0, $offset);
5402
      $offset = 0;
5403 5
    }
5404 1
5405
    $pos = strrpos($haystack, $needle);
5406
    if ($pos === false) {
5407
      return false;
5408 1
    }
5409 1
5410
    return $offset + self::strlen(substr($haystack, 0, $pos));
5411
  }
5412 1
5413 1
  /**
5414
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5415
   * mask.
5416 1
   *
5417
   * @param string $str    <p>The input string.</p>
5418
   * @param string $mask   <p>The mask of chars</p>
5419 1
   * @param int    $offset [optional]
5420
   * @param int    $length [optional]
5421 5
   *
5422 5
   * @return int
5423 5
   */
5424
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5425 5
  {
5426
    // init
5427 5
    $length = (int)$length;
5428 5
    $offset = (int)$offset;
5429
5430
    if ($offset || 2147483647 !== $length) {
5431 5
      $str = self::substr($str, $offset, $length);
5432
    }
5433
5434 5
    $str = (string)$str;
5435 5
    if (!isset($str[0], $mask[0])) {
5436 5
      return 0;
5437
    }
5438 5
5439 2
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5440
  }
5441 2
5442 2
  /**
5443 2
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5444
   *
5445 2
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5446 1
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5447
   * @param bool    $before_needle [optional] <p>
5448 1
   *                               If <b>TRUE</b>, strstr() returns the part of the
5449 1
   *                               haystack before the first occurrence of the needle (excluding the needle).
5450 1
   *                               </p>
5451
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5452 1
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5453
   *
5454
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5455
   */
5456
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5457
  {
5458
    $haystack = (string)$haystack;
5459
    $needle = (string)$needle;
5460
5461
    if (!isset($haystack[0], $needle[0])) {
5462
      return false;
5463
    }
5464
5465
    if ($cleanUtf8 === true) {
5466
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5467 1
      // if invalid characters are found in $haystack before $needle
5468 2
      $needle = self::clean($needle);
5469
      $haystack = self::clean($haystack);
5470 5
    }
5471
5472
    if ($encoding !== 'UTF-8') {
5473
      $encoding = self::normalize_encoding($encoding);
5474
    }
5475 5
5476
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5477
      self::checkForSupport();
5478
    }
5479
5480 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5481 5
        $encoding !== 'UTF-8'
5482 1
        &&
5483 1
        self::$support['mbstring'] === false
5484
    ) {
5485 1
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5486 1
    }
5487 1
5488
    if (self::$support['mbstring'] === true) {
5489 1
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5490
      if ($returnTmp !== false) {
5491 5
        return $returnTmp;
5492 5
      }
5493 5
    }
5494 5
5495 1
    if (self::$support['intl'] === true) {
5496
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5497 5
      if ($returnTmp !== false) {
5498
        return $returnTmp;
5499 5
      }
5500
    }
5501
5502
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5503
5504
    if (!isset($match[1])) {
5505
      return false;
5506
    }
5507
5508
    if ($before_needle) {
5509 2
      return $match[1];
5510
    }
5511 2
5512
    return self::substr($haystack, self::strlen($match[1]));
5513 1
  }
5514
5515
  /**
5516 1
   * Unicode transformation for case-less matching.
5517 1
   *
5518
   * @link http://unicode.org/reports/tr21/tr21-5.html
5519 1
   *
5520
   * @param string  $str       <p>The input string.</p>
5521
   * @param bool    $full      [optional] <p>
5522 2
   *                           <b>true</b>, replace full case folding chars (default)<br />
5523
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5524 2
   *                           </p>
5525 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5526
   *
5527
   * @return string
5528 2
   */
5529
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5530
  {
5531
    // init
5532
    $str = (string)$str;
5533
5534
    if (!isset($str[0])) {
5535
      return '';
5536
    }
5537
5538
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5539
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5540 1
5541
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5542 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5543
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5544
    }
5545
5546
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5547
5548
    if ($full) {
5549
5550
      static $fullCaseFold = null;
5551
5552
      if ($fullCaseFold === null) {
5553
        $fullCaseFold = self::getData('caseFolding_full');
5554
      }
5555
5556
      /** @noinspection OffsetOperationsInspection */
5557
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5558
    }
5559
5560
    if ($cleanUtf8 === true) {
5561
      $str = self::clean($str);
5562
    }
5563
5564
    return self::strtolower($str);
5565
  }
5566
5567
  /**
5568 20
   * Make a string lowercase.
5569
   *
5570 20
   * @link http://php.net/manual/en/function.mb-strtolower.php
5571 2
   *
5572
   * @param string  $str       <p>The string being lowercased.</p>
5573
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5574 2
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5575 2
   *
5576
   * @return string str with all alphabetic characters converted to lowercase.
5577 2
   */
5578 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5579
  {
5580 20
    // init
5581
    $str = (string)$str;
5582 20
5583 4
    if (!isset($str[0])) {
5584
      return '';
5585
    }
5586 19
5587 19
    if ($cleanUtf8 === true) {
5588
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5589
      // if invalid characters are found in $haystack before $needle
5590 19
      $str = self::clean($str);
5591 19
    }
5592
5593 19
    if ($encoding !== 'UTF-8') {
5594 19
      $encoding = self::normalize_encoding($encoding);
5595 19
    }
5596 19
5597
    return \mb_strtolower($str, $encoding);
5598 19
  }
5599
5600 16
  /**
5601 16
   * Generic case sensitive transformation for collation matching.
5602 16
   *
5603 16
   * @param string $str <p>The input string</p>
5604 5
   *
5605 5
   * @return string
5606 5
   */
5607
  private static function strtonatfold($str)
5608
  {
5609 19
    /** @noinspection PhpUndefinedClassInspection */
5610
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5611 17
  }
5612 13
5613 13
  /**
5614 13
   * Make a string uppercase.
5615 8
   *
5616 8
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5617 8
   *
5618
   * @param string  $str       <p>The string being uppercased.</p>
5619
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5620 19
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5621
   *
5622 9
   * @return string str with all alphabetic characters converted to uppercase.
5623 4
   */
5624 4 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5625 4
  {
5626 6
    $str = (string)$str;
5627 6
5628 6
    if (!isset($str[0])) {
5629
      return '';
5630
    }
5631 9
5632 6
    if ($cleanUtf8 === true) {
5633 6
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5634 6
      // if invalid characters are found in $haystack before $needle
5635
      $str = self::clean($str);
5636
    }
5637 19
5638
    if ($encoding !== 'UTF-8') {
5639 4
      $encoding = self::normalize_encoding($encoding);
5640 4
    }
5641 2
5642 2
    return \mb_strtoupper($str, $encoding);
5643 3
  }
5644 3
5645 3
  /**
5646
   * Translate characters or replace sub-strings.
5647
   *
5648 4
   * @link  http://php.net/manual/en/function.strtr.php
5649 16
   *
5650
   * @param string          $str  <p>The string being translated.</p>
5651 19
   * @param string|string[] $from <p>The string replacing from.</p>
5652
   * @param string|string[] $to   <p>The string being translated to to.</p>
5653
   *
5654 19
   * @return string <p>
5655 19
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5656
   *                corresponding character in to.
5657 3
   *                </p>
5658 19
   */
5659
  public static function strtr($str, $from, $to = INF)
5660 19
  {
5661
    if (INF !== $to) {
5662
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5662 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5663 19
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5663 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5664 19
      $countFrom = count($from);
5665 19
      $countTo = count($to);
5666 2
5667 19
      if ($countFrom > $countTo) {
5668
        $from = array_slice($from, 0, $countTo);
5669 19
      } elseif ($countFrom < $countTo) {
5670
        $to = array_slice($to, 0, $countFrom);
5671 19
      }
5672
5673
      $from = array_combine($from, $to);
5674
    }
5675
5676
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5659 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5677
  }
5678
5679
  /**
5680
   * Return the width of a string.
5681
   *
5682
   * @param string  $str       <p>The input string.</p>
5683
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5684
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5685
   *
5686
   * @return int
5687 26
   */
5688
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5689 26
  {
5690
    if ($encoding !== 'UTF-8') {
5691 26
      $encoding = self::normalize_encoding($encoding);
5692 5
    }
5693
5694
    if ($cleanUtf8 === true) {
5695
      // iconv and mbstring are not tolerant to invalid encoding
5696 22
      // further, their behaviour is inconsistent with that of PHP's substr
5697 6
      $str = self::clean($str);
5698
    }
5699
5700 16
    // fallback to "mb_"-function via polyfill
5701
    return \mb_strwidth($str, $encoding);
5702
  }
5703
5704
  /**
5705
   * Get part of a string.
5706
   *
5707
   * @link http://php.net/manual/en/function.mb-substr.php
5708
   *
5709
   * @param string  $str       <p>The string being checked.</p>
5710
   * @param int     $start     <p>The first position used in str.</p>
5711
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5712 14
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5713
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5714 14
   *
5715
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5716
   */
5717
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5718
  {
5719
    // init
5720
    $str = (string)$str;
5721
5722
    if (!isset($str[0])) {
5723
      return '';
5724
    }
5725
5726
    if ($cleanUtf8 === true) {
5727
      // iconv and mbstring are not tolerant to invalid encoding
5728 1
      // further, their behaviour is inconsistent with that of PHP's substr
5729
      $str = self::clean($str);
5730 1
    }
5731
5732
    $str_length = 0;
5733
    if ($start || $length === null) {
5734
      $str_length = (int)self::strlen($str);
5735
    }
5736
5737
    if ($start && $start > $str_length) {
5738
      return false;
5739
    }
5740
5741
    if ($length === null) {
5742
      $length = $str_length;
5743
    } else {
5744 8
      $length = (int)$length;
5745
    }
5746 8
5747 2 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5748
        $encoding === 'UTF-8'
5749
        ||
5750 7
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5751 7
    ) {
5752 7
      $encoding = 'UTF-8';
5753
    } else {
5754 7
      $encoding = self::normalize_encoding($encoding);
5755 1
    }
5756 1
5757 7
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5758
      self::checkForSupport();
5759
    }
5760 7
5761 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5762 7
        $encoding !== 'UTF-8'
5763 7
        &&
5764
        self::$support['mbstring'] === false
5765
    ) {
5766
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5767 7
    }
5768
5769
    if (self::$support['mbstring'] === true) {
5770
      return \mb_substr($str, $start, $length, $encoding);
5771 1
    }
5772 1
5773 1
    if (
5774 7
        $length >= 0 // "iconv_substr()" can't handle negative length
5775 7
        &&
5776 7
        self::$support['iconv'] === true
5777
    ) {
5778 7
      return \iconv_substr($str, $start, $length);
5779 7
    }
5780
5781 7
    if (self::$support['intl'] === true) {
5782
      return \grapheme_substr($str, $start, $length);
5783
    }
5784
5785
    // fallback via vanilla php
5786
5787
    // split to array, and remove invalid characters
5788
    $array = self::split($str);
5789
5790
    // extract relevant part, and join to make sting again
5791
    return implode('', array_slice($array, $start, $length));
5792
  }
5793
5794
  /**
5795
   * Binary safe comparison of two strings from an offset, up to length characters.
5796
   *
5797
   * @param string  $main_str           <p>The main string being compared.</p>
5798
   * @param string  $str                <p>The secondary string being compared.</p>
5799
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5800
   *                                    the end of the string.</p>
5801 1
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5802
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5803 1
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5804
   *                                    insensitive.</p>
5805 1
   *
5806 1
   * @return int
5807
   */
5808
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5809 1
  {
5810
    $main_str = self::substr($main_str, $offset, $length);
5811 1
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5810 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5812
5813 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5810 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5811 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5810 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5811 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5814 1
  }
5815 1
5816 1
  /**
5817
   * Count the number of substring occurrences.
5818 1
   *
5819 1
   * @link  http://php.net/manual/en/function.substr-count.php
5820 1
   *
5821
   * @param string  $haystack  <p>The string to search in.</p>
5822 1
   * @param string  $needle    <p>The substring to search for.</p>
5823
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5824
   * @param int     $length    [optional] <p>
5825
   *                           The maximum length after the specified offset to search for the
5826
   *                           substring. It outputs a warning if the offset plus the length is
5827
   *                           greater than the haystack length.
5828
   *                           </p>
5829
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5830 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5831
   *
5832
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5833
   */
5834
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5835
  {
5836
    // init
5837
    $haystack = (string)$haystack;
5838
    $needle = (string)$needle;
5839
5840
    if (!isset($haystack[0], $needle[0])) {
5841
      return false;
5842
    }
5843
5844
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5845
      $offset = (int)$offset;
5846
      $length = (int)$length;
5847
5848
      if (
5849
          $length + $offset <= 0
5850
          &&
5851
          Bootup::is_php('7.1') === false
5852
      ) {
5853
        return false;
5854
      }
5855
5856
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5857
    }
5858
5859
    if ($encoding !== 'UTF-8') {
5860
      $encoding = self::normalize_encoding($encoding);
5861
    }
5862
5863
    if ($cleanUtf8 === true) {
5864
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5865
      // if invalid characters are found in $haystack before $needle
5866
      $needle = self::clean($needle);
5867
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5868
    }
5869
5870
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5871
      self::checkForSupport();
5872
    }
5873
5874 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5875
        $encoding !== 'UTF-8'
5876
        &&
5877
        self::$support['mbstring'] === false
5878
    ) {
5879
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5880
    }
5881
5882
    if (self::$support['mbstring'] === true) {
5883
      return \mb_substr_count($haystack, $needle, $encoding);
5884
    }
5885
5886
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5887
5888
    return count($matches);
5889
  }
5890
5891
  /**
5892
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5893
   *
5894
   * @param string $haystack <p>The string to search in.</p>
5895
   * @param string $needle   <p>The substring to search for.</p>
5896
   *
5897
   * @return string <p>Return the sub-string.</p>
5898
   */
5899 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5900
  {
5901
    // init
5902
    $haystack = (string)$haystack;
5903
    $needle = (string)$needle;
5904
5905
    if (!isset($haystack[0])) {
5906
      return '';
5907
    }
5908
5909
    if (!isset($needle[0])) {
5910
      return $haystack;
5911
    }
5912
5913
    if (self::str_istarts_with($haystack, $needle) === true) {
5914
      $haystack = self::substr($haystack, self::strlen($needle));
5915
    }
5916
5917
    return $haystack;
5918
  }
5919
5920
  /**
5921
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5922
   *
5923
   * @param string $haystack <p>The string to search in.</p>
5924
   * @param string $needle   <p>The substring to search for.</p>
5925
   *
5926
   * @return string <p>Return the sub-string.</p>
5927
   */
5928 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5929
  {
5930
    // init
5931
    $haystack = (string)$haystack;
5932
    $needle = (string)$needle;
5933
5934
    if (!isset($haystack[0])) {
5935
      return '';
5936
    }
5937
5938
    if (!isset($needle[0])) {
5939
      return $haystack;
5940
    }
5941
5942
    if (self::str_iends_with($haystack, $needle) === true) {
5943
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5944
    }
5945
5946
    return $haystack;
5947
  }
5948
5949
  /**
5950
   * Removes an prefix ($needle) from start of the string ($haystack).
5951
   *
5952
   * @param string $haystack <p>The string to search in.</p>
5953
   * @param string $needle   <p>The substring to search for.</p>
5954
   *
5955
   * @return string <p>Return the sub-string.</p>
5956
   */
5957 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5958
  {
5959
    // init
5960
    $haystack = (string)$haystack;
5961
    $needle = (string)$needle;
5962
5963
    if (!isset($haystack[0])) {
5964
      return '';
5965
    }
5966
5967
    if (!isset($needle[0])) {
5968
      return $haystack;
5969
    }
5970
5971
    if (self::str_starts_with($haystack, $needle) === true) {
5972
      $haystack = self::substr($haystack, self::strlen($needle));
5973
    }
5974
5975
    return $haystack;
5976
  }
5977
5978
  /**
5979
   * Replace text within a portion of a string.
5980
   *
5981
   * source: https://gist.github.com/stemar/8287074
5982
   *
5983
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
5984
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
5985
   * @param int|int[]       $start            <p>
5986
   *                                          If start is positive, the replacing will begin at the start'th offset
5987
   *                                          into string.
5988
   *                                          <br /><br />
5989
   *                                          If start is negative, the replacing will begin at the start'th character
5990
   *                                          from the end of string.
5991
   *                                          </p>
5992
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
5993
   *                                          portion of string which is to be replaced. If it is negative, it
5994
   *                                          represents the number of characters from the end of string at which to
5995
   *                                          stop replacing. If it is not given, then it will default to strlen(
5996
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
5997
   *                                          length is zero then this function will have the effect of inserting
5998
   *                                          replacement into string at the given start offset.</p>
5999
   *
6000
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6001
   */
6002
  public static function substr_replace($str, $replacement, $start, $length = null)
6003
  {
6004
    if (is_array($str)) {
6005
      $num = count($str);
6006
6007
      // $replacement
6008
      if (is_array($replacement)) {
6009
        $replacement = array_slice($replacement, 0, $num);
6010
      } else {
6011
        $replacement = array_pad(array($replacement), $num, $replacement);
6012
      }
6013
6014
      // $start
6015 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6016
        $start = array_slice($start, 0, $num);
6017
        foreach ($start as &$valueTmp) {
6018
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6019
        }
6020
        unset($valueTmp);
6021
      } else {
6022
        $start = array_pad(array($start), $num, $start);
6023
      }
6024
6025
      // $length
6026
      if (!isset($length)) {
6027
        $length = array_fill(0, $num, 0);
6028 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6029
        $length = array_slice($length, 0, $num);
6030
        foreach ($length as &$valueTmpV2) {
6031
          if (isset($valueTmpV2)) {
6032
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6033
          } else {
6034
            $valueTmpV2 = 0;
6035
          }
6036
        }
6037
        unset($valueTmpV2);
6038
      } else {
6039
        $length = array_pad(array($length), $num, $length);
6040
      }
6041
6042
      // Recursive call
6043
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6044
6045
    } else {
6046
6047
      if (is_array($replacement)) {
6048
        if (count($replacement) > 0) {
6049
          $replacement = $replacement[0];
6050
        } else {
6051
          $replacement = '';
6052
        }
6053
      }
6054
    }
6055
6056
    // init
6057 1
    $str = (string)$str;
6058
    $replacement = (string)$replacement;
6059 1
6060
    if (!isset($str[0])) {
6061
      return $replacement;
6062
    }
6063
6064
    preg_match_all('/./us', $str, $smatches);
6065
    preg_match_all('/./us', $replacement, $rmatches);
6066
6067
    if ($length === null) {
6068
      $length = (int)self::strlen($str);
6069 6
    }
6070
6071 6
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6072 6
6073
    return implode('', $smatches[0]);
6074 6
  }
6075
6076 6
  /**
6077 3
   * Removes an suffix ($needle) from end of the string ($haystack).
6078
   *
6079
   * @param string $haystack <p>The string to search in.</p>
6080
   * @param string $needle   <p>The substring to search for.</p>
6081 6
   *
6082
   * @return string <p>Return the sub-string.</p>
6083 6
   */
6084 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6085 1
  {
6086 1
    $haystack = (string)$haystack;
6087
    $needle = (string)$needle;
6088 6
6089
    if (!isset($haystack[0])) {
6090
      return '';
6091
    }
6092
6093
    if (!isset($needle[0])) {
6094
      return $haystack;
6095
    }
6096
6097
    if (self::str_ends_with($haystack, $needle) === true) {
6098 6
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6099
    }
6100 6
6101
    return $haystack;
6102 6
  }
6103 6
6104
  /**
6105
   * Returns a case swapped version of the string.
6106 5
   *
6107 5
   * @param string  $str       <p>The input string.</p>
6108
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6109 5
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6110 1
   *
6111 1
   * @return string <p>Each character's case swapped.</p>
6112 1
   */
6113
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6114 5
  {
6115
    $str = (string)$str;
6116
6117
    if (!isset($str[0])) {
6118
      return '';
6119
    }
6120
6121
    if ($encoding !== 'UTF-8') {
6122
      $encoding = self::normalize_encoding($encoding);
6123
    }
6124
6125
    if ($cleanUtf8 === true) {
6126
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6127
      // if invalid characters are found in $haystack before $needle
6128
      $str = self::clean($str);
6129
    }
6130
6131
    $strSwappedCase = preg_replace_callback(
6132
        '/[\S]/u',
6133
        function ($match) use ($encoding) {
6134
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6135
6136
          if ($match[0] === $marchToUpper) {
6137
            return UTF8::strtolower($match[0], $encoding);
6138
          } else {
6139
            return $marchToUpper;
6140
          }
6141
        },
6142
        $str
6143
    );
6144 1
6145
    return $strSwappedCase;
6146 1
  }
6147
6148
  /**
6149
   * alias for "UTF8::to_ascii()"
6150
   *
6151
   * @see UTF8::to_ascii()
6152
   *
6153
   * @param string $s
6154
   * @param string $subst_chr
6155
   * @param bool   $strict
6156
   *
6157
   * @return string
6158 1
   *
6159
   * @deprecated
6160 1
   */
6161
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6162 1
  {
6163 1
    return self::to_ascii($s, $subst_chr, $strict);
6164
  }
6165
6166 1
  /**
6167
   * alias for "UTF8::to_iso8859()"
6168 1
   *
6169 1
   * @see UTF8::to_iso8859()
6170
   *
6171
   * @param string $str
6172 1
   *
6173
   * @return string|string[]
6174
   *
6175 1
   * @deprecated
6176 1
   */
6177 1
  public static function toIso8859($str)
6178 1
  {
6179 1
    return self::to_iso8859($str);
6180
  }
6181
6182 1
  /**
6183
   * alias for "UTF8::to_latin1()"
6184
   *
6185
   * @see UTF8::to_latin1()
6186
   *
6187
   * @param $str
6188
   *
6189
   * @return string
6190
   *
6191
   * @deprecated
6192
   */
6193
  public static function toLatin1($str)
6194
  {
6195
    return self::to_latin1($str);
6196
  }
6197
6198
  /**
6199
   * alias for "UTF8::to_utf8()"
6200
   *
6201 10
   * @see UTF8::to_utf8()
6202
   *
6203 10
   * @param string $str
6204 10
   *
6205
   * @return string
6206 10
   *
6207 3
   * @deprecated
6208
   */
6209
  public static function toUTF8($str)
6210 8
  {
6211 8
    return self::to_utf8($str);
6212 8
  }
6213
6214 8
  /**
6215
   * Convert a string into ASCII.
6216 8
   *
6217
   * @param string $str     <p>The input string.</p>
6218 8
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6219 1
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6220 1
   *                        performance</p>
6221 1
   *
6222
   * @return string
6223 8
   *
6224 8
   * @throws \Exception
6225
   */
6226 8
  public static function to_ascii($str, $unknown = '?', $strict = false)
6227 8
  {
6228 8
    static $UTF8_TO_ASCII;
6229 8
6230 8
    // init
6231
    $str = (string)$str;
6232 8
6233 8
    if (!isset($str[0])) {
6234 8
      return '';
6235 8
    }
6236
6237 8
    $str = self::clean($str, true, true, true);
6238 6
6239 6
    // check if we only have ASCII
6240 6
    if (self::is_ascii($str) === true) {
6241 6
      return $str;
6242
    }
6243 6
6244 3
    if ($strict === true) {
6245 3
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6246
        self::checkForSupport();
6247 6
      }
6248 6
6249
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6250 8
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6251
6252
        // check again, if we only have ASCII, now ...
6253
        if (self::is_ascii($str) === true) {
6254
          return $str;
6255
        }
6256
6257
      } else {
6258 1
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6259
      }
6260 1
    }
6261
6262
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6263
    $chars = $ar[0];
6264
    foreach ($chars as &$c) {
6265
6266
      $ordC0 = ord($c[0]);
6267
6268
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6269
        continue;
6270
      }
6271
6272
      $ordC1 = ord($c[1]);
6273
6274
      // ASCII - next please
6275
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6276
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6277
      }
6278
6279
      if ($ordC0 >= 224) {
6280
        $ordC2 = ord($c[2]);
6281
6282
        if ($ordC0 <= 239) {
6283
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6284
        }
6285
6286
        if ($ordC0 >= 240) {
6287
          $ordC3 = ord($c[3]);
6288
6289
          if ($ordC0 <= 247) {
6290
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6291
          }
6292
6293
          if ($ordC0 >= 248) {
6294
            $ordC4 = ord($c[4]);
6295
6296 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6297
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6298
            }
6299
6300
            if ($ordC0 >= 252) {
6301
              $ordC5 = ord($c[5]);
6302
6303 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6304
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6305
              }
6306
            }
6307
          }
6308
        }
6309
      }
6310
6311
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6312
        $c = $unknown;
6313
        continue;
6314
      }
6315
6316
      if (!isset($ord)) {
6317
        $c = $unknown;
6318
        continue;
6319
      }
6320
6321
      $bank = $ord >> 8;
6322
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6323
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6324
        if (file_exists($bankfile)) {
6325
          /** @noinspection PhpIncludeInspection */
6326
          require $bankfile;
6327
        } else {
6328
          $UTF8_TO_ASCII[$bank] = array();
6329
        }
6330
      }
6331
6332
      $newchar = $ord & 255;
6333
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6334
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6335
      } else {
6336
        $c = $unknown;
6337
      }
6338
    }
6339
6340
    return implode('', $chars);
6341
  }
6342
6343
  /**
6344
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6345
   *
6346
   * @param string|string[] $str
6347
   *
6348
   * @return string|string[]
6349
   */
6350
  public static function to_iso8859($str)
6351
  {
6352
    if (is_array($str)) {
6353
6354
      /** @noinspection ForeachSourceInspection */
6355
      foreach ($str as $k => $v) {
6356
        /** @noinspection AlterInForeachInspection */
6357
        /** @noinspection OffsetOperationsInspection */
6358
        $str[$k] = self::to_iso8859($v);
6359
      }
6360
6361
      return $str;
6362
    }
6363
6364
    $str = (string)$str;
6365
6366
    if (!isset($str[0])) {
6367
      return '';
6368
    }
6369
6370
    return self::utf8_decode($str);
6371
  }
6372
6373
  /**
6374
   * alias for "UTF8::to_iso8859()"
6375
   *
6376
   * @see UTF8::to_iso8859()
6377
   *
6378
   * @param string|string[] $str
6379
   *
6380
   * @return string|string[]
6381
   */
6382
  public static function to_latin1($str)
6383
  {
6384
    return self::to_iso8859($str);
6385
  }
6386
6387
  /**
6388
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6389
   *
6390
   * - It decode UTF-8 codepoints and unicode escape sequences.
6391
   *
6392
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6393
   *
6394
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6395
   *
6396
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6397
   *    are followed by any of these:  ("group B")
6398
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6399
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6400
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6401
   * is also a valid unicode character, and will be left unchanged.
6402
   *
6403
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6404
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6405
   *
6406
   * @param string|string[] $str                    <p>Any string or array.</p>
6407
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6408
   *
6409
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6410
   */
6411
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6412
  {
6413
    if (is_array($str)) {
6414
      /** @noinspection ForeachSourceInspection */
6415
      foreach ($str as $k => $v) {
6416
        /** @noinspection AlterInForeachInspection */
6417
        /** @noinspection OffsetOperationsInspection */
6418
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6419
      }
6420
6421
      return $str;
6422
    }
6423
6424
    $str = (string)$str;
6425
6426
    if (!isset($str[0])) {
6427
      return $str;
6428
    }
6429
6430
    $max = strlen($str);
6431
    $buf = '';
6432
6433
    /** @noinspection ForeachInvariantsInspection */
6434
    for ($i = 0; $i < $max; $i++) {
6435
      $c1 = $str[$i];
6436
6437
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6438
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6439
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6440
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6441
6442
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6443
6444
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6445
            $buf .= $c1 . $c2;
6446
            $i++;
6447
          } else { // not valid UTF8 - convert it
6448
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6449
            $cc2 = ($c1 & "\x3f") | "\x80";
6450
            $buf .= $cc1 . $cc2;
6451
          }
6452
6453 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6454
6455
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6456
            $buf .= $c1 . $c2 . $c3;
6457
            $i += 2;
6458
          } else { // not valid UTF8 - convert it
6459
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6460
            $cc2 = ($c1 & "\x3f") | "\x80";
6461
            $buf .= $cc1 . $cc2;
6462
          }
6463
6464
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6465
6466 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6467
            $buf .= $c1 . $c2 . $c3 . $c4;
6468
            $i += 3;
6469
          } else { // not valid UTF8 - convert it
6470
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6471
            $cc2 = ($c1 & "\x3f") | "\x80";
6472
            $buf .= $cc1 . $cc2;
6473
          }
6474
6475
        } else { // doesn't look like UTF8, but should be converted
6476
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6477
          $cc2 = (($c1 & "\x3f") | "\x80");
6478
          $buf .= $cc1 . $cc2;
6479
        }
6480
6481
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6482
6483
        $ordC1 = ord($c1);
6484
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6485
          $buf .= self::$win1252ToUtf8[$ordC1];
6486
        } else {
6487
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6488
          $cc2 = (($c1 & "\x3f") | "\x80");
6489
          $buf .= $cc1 . $cc2;
6490
        }
6491
6492
      } else { // it doesn't need conversion
6493
        $buf .= $c1;
6494
      }
6495
    }
6496
6497
    // decode unicode escape sequences
6498
    $buf = preg_replace_callback(
6499
        '/\\\\u([0-9a-f]{4})/i',
6500
        function ($match) {
6501
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6502
        },
6503
        $buf
6504
    );
6505
6506
    // decode UTF-8 codepoints
6507
    if ($decodeHtmlEntityToUtf8 === true) {
6508
      $buf = self::html_entity_decode($buf, ENT_QUOTES);
6509
    }
6510
6511
    return $buf;
6512
  }
6513
6514
  /**
6515
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6516
   *
6517
   * INFO: This is slower then "trim()"
6518
   *
6519
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6520
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6521
   *
6522
   * @param string $str   <p>The string to be trimmed</p>
6523
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6524
   *
6525
   * @return string <p>The trimmed string.</p>
6526
   */
6527
  public static function trim($str = '', $chars = INF)
6528
  {
6529
    $str = (string)$str;
6530
6531
    if (!isset($str[0])) {
6532
      return '';
6533
    }
6534
6535
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6536
    if ($chars === INF || !$chars) {
6537
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6538
    }
6539
6540
    return self::rtrim(self::ltrim($str, $chars), $chars);
6541
  }
6542
6543
  /**
6544
   * Makes string's first char uppercase.
6545
   *
6546
   * @param string  $str       <p>The input string.</p>
6547
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6548
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6549
   *
6550
   * @return string <p>The resulting string</p>
6551
   */
6552
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6553
  {
6554
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6555
  }
6556
6557
  /**
6558
   * alias for "UTF8::ucfirst()"
6559
   *
6560
   * @see UTF8::ucfirst()
6561
   *
6562
   * @param string  $word
6563
   * @param string  $encoding
6564
   * @param boolean $cleanUtf8
6565
   *
6566
   * @return string
6567
   */
6568
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6569
  {
6570
    return self::ucfirst($word, $encoding, $cleanUtf8);
6571
  }
6572
6573
  /**
6574
   * Uppercase for all words in the string.
6575
   *
6576
   * @param string   $str        <p>The input string.</p>
6577
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6578
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6579
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6580
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6581
   *
6582
   * @return string
6583
   */
6584
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6585
  {
6586
    if (!$str) {
6587
      return '';
6588
    }
6589
6590
    $words = self::str_to_words($str, $charlist);
6591
    $newwords = array();
6592
6593
    if (count($exceptions) > 0) {
6594
      $useExceptions = true;
6595
    } else {
6596
      $useExceptions = false;
6597
    }
6598
6599
    foreach ($words as $word) {
6600
6601
      if (!$word) {
6602
        continue;
6603
      }
6604
6605
      if (
6606
          ($useExceptions === false)
6607
          ||
6608
          (
6609
              $useExceptions === true
6610
              &&
6611
              !in_array($word, $exceptions, true)
6612
          )
6613
      ) {
6614
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6615
      }
6616
6617
      $newwords[] = $word;
6618
    }
6619
6620
    return implode('', $newwords);
6621
  }
6622
6623
  /**
6624
   * Multi decode html entity & fix urlencoded-win1252-chars.
6625
   *
6626
   * e.g:
6627
   * 'test+test'                     => 'test test'
6628
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6629
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6630
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6631
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6632
   * 'Düsseldorf'                   => 'Düsseldorf'
6633
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6634
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6635
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6636
   *
6637
   * @param string $str          <p>The input string.</p>
6638
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6639
   *
6640
   * @return string
6641
   */
6642 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6643
  {
6644
    $str = (string)$str;
6645
6646
    if (!isset($str[0])) {
6647
      return '';
6648
    }
6649
6650
    $pattern = '/%u([0-9a-f]{3,4})/i';
6651
    if (preg_match($pattern, $str)) {
6652
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6653
    }
6654
6655
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6656
6657
    do {
6658
      $str_compare = $str;
6659
6660
      $str = self::fix_simple_utf8(
6661
          urldecode(
6662
              self::html_entity_decode(
6663
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6664
                  $flags
6665
              )
6666
          )
6667
      );
6668
6669
    } while ($multi_decode === true && $str_compare !== $str);
6670
6671
    return (string)$str;
6672
  }
6673
6674
  /**
6675
   * Return a array with "urlencoded"-win1252 -> UTF-8
6676
   *
6677
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6678
   *
6679
   * @return array
6680
   */
6681
  public static function urldecode_fix_win1252_chars()
6682
  {
6683
    static $array = array(
6684
        '%20' => ' ',
6685
        '%21' => '!',
6686
        '%22' => '"',
6687
        '%23' => '#',
6688
        '%24' => '$',
6689
        '%25' => '%',
6690
        '%26' => '&',
6691
        '%27' => "'",
6692
        '%28' => '(',
6693
        '%29' => ')',
6694
        '%2A' => '*',
6695
        '%2B' => '+',
6696
        '%2C' => ',',
6697
        '%2D' => '-',
6698
        '%2E' => '.',
6699
        '%2F' => '/',
6700
        '%30' => '0',
6701
        '%31' => '1',
6702
        '%32' => '2',
6703
        '%33' => '3',
6704
        '%34' => '4',
6705
        '%35' => '5',
6706
        '%36' => '6',
6707
        '%37' => '7',
6708
        '%38' => '8',
6709
        '%39' => '9',
6710
        '%3A' => ':',
6711
        '%3B' => ';',
6712
        '%3C' => '<',
6713
        '%3D' => '=',
6714
        '%3E' => '>',
6715
        '%3F' => '?',
6716
        '%40' => '@',
6717
        '%41' => 'A',
6718
        '%42' => 'B',
6719
        '%43' => 'C',
6720
        '%44' => 'D',
6721
        '%45' => 'E',
6722
        '%46' => 'F',
6723
        '%47' => 'G',
6724
        '%48' => 'H',
6725
        '%49' => 'I',
6726
        '%4A' => 'J',
6727
        '%4B' => 'K',
6728
        '%4C' => 'L',
6729
        '%4D' => 'M',
6730
        '%4E' => 'N',
6731
        '%4F' => 'O',
6732
        '%50' => 'P',
6733
        '%51' => 'Q',
6734
        '%52' => 'R',
6735
        '%53' => 'S',
6736
        '%54' => 'T',
6737
        '%55' => 'U',
6738
        '%56' => 'V',
6739
        '%57' => 'W',
6740
        '%58' => 'X',
6741
        '%59' => 'Y',
6742
        '%5A' => 'Z',
6743
        '%5B' => '[',
6744
        '%5C' => '\\',
6745
        '%5D' => ']',
6746
        '%5E' => '^',
6747
        '%5F' => '_',
6748
        '%60' => '`',
6749
        '%61' => 'a',
6750
        '%62' => 'b',
6751
        '%63' => 'c',
6752
        '%64' => 'd',
6753
        '%65' => 'e',
6754
        '%66' => 'f',
6755
        '%67' => 'g',
6756
        '%68' => 'h',
6757
        '%69' => 'i',
6758
        '%6A' => 'j',
6759
        '%6B' => 'k',
6760
        '%6C' => 'l',
6761
        '%6D' => 'm',
6762
        '%6E' => 'n',
6763
        '%6F' => 'o',
6764
        '%70' => 'p',
6765
        '%71' => 'q',
6766
        '%72' => 'r',
6767
        '%73' => 's',
6768
        '%74' => 't',
6769
        '%75' => 'u',
6770
        '%76' => 'v',
6771
        '%77' => 'w',
6772
        '%78' => 'x',
6773
        '%79' => 'y',
6774
        '%7A' => 'z',
6775
        '%7B' => '{',
6776
        '%7C' => '|',
6777
        '%7D' => '}',
6778
        '%7E' => '~',
6779
        '%7F' => '',
6780
        '%80' => '`',
6781
        '%81' => '',
6782
        '%82' => '‚',
6783
        '%83' => 'ƒ',
6784
        '%84' => '„',
6785
        '%85' => '…',
6786
        '%86' => '†',
6787
        '%87' => '‡',
6788
        '%88' => 'ˆ',
6789
        '%89' => '‰',
6790
        '%8A' => 'Š',
6791
        '%8B' => '‹',
6792
        '%8C' => 'Œ',
6793
        '%8D' => '',
6794
        '%8E' => 'Ž',
6795
        '%8F' => '',
6796
        '%90' => '',
6797
        '%91' => '‘',
6798
        '%92' => '’',
6799
        '%93' => '“',
6800
        '%94' => '”',
6801
        '%95' => '•',
6802
        '%96' => '–',
6803
        '%97' => '—',
6804
        '%98' => '˜',
6805
        '%99' => '™',
6806
        '%9A' => 'š',
6807
        '%9B' => '›',
6808
        '%9C' => 'œ',
6809
        '%9D' => '',
6810
        '%9E' => 'ž',
6811
        '%9F' => 'Ÿ',
6812
        '%A0' => '',
6813
        '%A1' => '¡',
6814
        '%A2' => '¢',
6815
        '%A3' => '£',
6816
        '%A4' => '¤',
6817
        '%A5' => '¥',
6818
        '%A6' => '¦',
6819
        '%A7' => '§',
6820
        '%A8' => '¨',
6821
        '%A9' => '©',
6822
        '%AA' => 'ª',
6823
        '%AB' => '«',
6824
        '%AC' => '¬',
6825
        '%AD' => '',
6826
        '%AE' => '®',
6827
        '%AF' => '¯',
6828
        '%B0' => '°',
6829
        '%B1' => '±',
6830
        '%B2' => '²',
6831
        '%B3' => '³',
6832
        '%B4' => '´',
6833
        '%B5' => 'µ',
6834
        '%B6' => '¶',
6835
        '%B7' => '·',
6836
        '%B8' => '¸',
6837
        '%B9' => '¹',
6838
        '%BA' => 'º',
6839
        '%BB' => '»',
6840
        '%BC' => '¼',
6841
        '%BD' => '½',
6842
        '%BE' => '¾',
6843
        '%BF' => '¿',
6844
        '%C0' => 'À',
6845
        '%C1' => 'Á',
6846
        '%C2' => 'Â',
6847
        '%C3' => 'Ã',
6848
        '%C4' => 'Ä',
6849
        '%C5' => 'Å',
6850
        '%C6' => 'Æ',
6851
        '%C7' => 'Ç',
6852
        '%C8' => 'È',
6853
        '%C9' => 'É',
6854
        '%CA' => 'Ê',
6855
        '%CB' => 'Ë',
6856
        '%CC' => 'Ì',
6857
        '%CD' => 'Í',
6858
        '%CE' => 'Î',
6859
        '%CF' => 'Ï',
6860
        '%D0' => 'Ð',
6861
        '%D1' => 'Ñ',
6862
        '%D2' => 'Ò',
6863
        '%D3' => 'Ó',
6864
        '%D4' => 'Ô',
6865
        '%D5' => 'Õ',
6866
        '%D6' => 'Ö',
6867
        '%D7' => '×',
6868
        '%D8' => 'Ø',
6869
        '%D9' => 'Ù',
6870
        '%DA' => 'Ú',
6871
        '%DB' => 'Û',
6872
        '%DC' => 'Ü',
6873
        '%DD' => 'Ý',
6874
        '%DE' => 'Þ',
6875
        '%DF' => 'ß',
6876
        '%E0' => 'à',
6877
        '%E1' => 'á',
6878
        '%E2' => 'â',
6879
        '%E3' => 'ã',
6880
        '%E4' => 'ä',
6881
        '%E5' => 'å',
6882
        '%E6' => 'æ',
6883
        '%E7' => 'ç',
6884
        '%E8' => 'è',
6885
        '%E9' => 'é',
6886
        '%EA' => 'ê',
6887
        '%EB' => 'ë',
6888
        '%EC' => 'ì',
6889
        '%ED' => 'í',
6890
        '%EE' => 'î',
6891
        '%EF' => 'ï',
6892
        '%F0' => 'ð',
6893
        '%F1' => 'ñ',
6894
        '%F2' => 'ò',
6895
        '%F3' => 'ó',
6896
        '%F4' => 'ô',
6897
        '%F5' => 'õ',
6898
        '%F6' => 'ö',
6899
        '%F7' => '÷',
6900
        '%F8' => 'ø',
6901
        '%F9' => 'ù',
6902
        '%FA' => 'ú',
6903
        '%FB' => 'û',
6904
        '%FC' => 'ü',
6905
        '%FD' => 'ý',
6906
        '%FE' => 'þ',
6907
        '%FF' => 'ÿ',
6908
    );
6909
6910
    return $array;
6911
  }
6912
6913
  /**
6914
   * Decodes an UTF-8 string to ISO-8859-1.
6915
   *
6916
   * @param string $str <p>The input string.</p>
6917
   *
6918
   * @return string
6919
   */
6920
  public static function utf8_decode($str)
6921
  {
6922
    // init
6923
    $str = (string)$str;
6924
6925
    if (!isset($str[0])) {
6926
      return '';
6927
    }
6928
6929
    $str = (string)self::to_utf8($str);
6930
6931
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6932
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6933
6934
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6935
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
6936
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
6937
    }
6938
6939
    /** @noinspection PhpInternalEntityUsedInspection */
6940
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
6941
  }
6942
6943
  /**
6944
   * Encodes an ISO-8859-1 string to UTF-8.
6945
   *
6946
   * @param string $str <p>The input string.</p>
6947
   *
6948
   * @return string
6949
   */
6950
  public static function utf8_encode($str)
6951
  {
6952
    // init
6953
    $str = (string)$str;
6954
6955
    if (!isset($str[0])) {
6956
      return '';
6957
    }
6958
6959
    $str = \utf8_encode($str);
6960
6961
    if (false === strpos($str, "\xC2")) {
6962
      return $str;
6963
    } else {
6964
6965
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
6966
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
6967
6968
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
6969
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
6970
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
6971
      }
6972
6973
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
6974
    }
6975
  }
6976
6977
  /**
6978
   * fix -> utf8-win1252 chars
6979
   *
6980
   * @param string $str <p>The input string.</p>
6981
   *
6982
   * @return string
6983
   *
6984
   * @deprecated use "UTF8::fix_simple_utf8()"
6985
   */
6986
  public static function utf8_fix_win1252_chars($str)
6987
  {
6988
    return self::fix_simple_utf8($str);
6989
  }
6990
6991
  /**
6992
   * Returns an array with all utf8 whitespace characters.
6993
   *
6994
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6995
   *
6996
   * @author: Derek E. [email protected]
6997
   *
6998
   * @return array <p>
6999
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7000
   *               as defined in above URL.
7001
   *               </p>
7002
   */
7003
  public static function whitespace_table()
7004
  {
7005
    return self::$whitespaceTable;
7006
  }
7007
7008
  /**
7009
   * Limit the number of words in a string.
7010
   *
7011
   * @param string $str      <p>The input string.</p>
7012
   * @param int    $words    <p>The limit of words as integer.</p>
7013
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7014
   *
7015
   * @return string
7016
   */
7017
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7018
  {
7019
    $str = (string)$str;
7020
7021
    if (!isset($str[0])) {
7022
      return '';
7023
    }
7024
7025
    $words = (int)$words;
7026
7027
    if ($words < 1) {
7028
      return '';
7029
    }
7030
7031
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7032
7033
    if (
7034
        !isset($matches[0])
7035
        ||
7036
        self::strlen($str) === self::strlen($matches[0])
7037
    ) {
7038
      return $str;
7039
    }
7040
7041
    return self::rtrim($matches[0]) . $strAddOn;
7042
  }
7043
7044
  /**
7045
   * Wraps a string to a given number of characters
7046
   *
7047
   * @link  http://php.net/manual/en/function.wordwrap.php
7048
   *
7049
   * @param string $str   <p>The input string.</p>
7050
   * @param int    $width [optional] <p>The column width.</p>
7051
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7052
   * @param bool   $cut   [optional] <p>
7053
   *                      If the cut is set to true, the string is
7054
   *                      always wrapped at or before the specified width. So if you have
7055
   *                      a word that is larger than the given width, it is broken apart.
7056
   *                      </p>
7057
   *
7058
   * @return string <p>The given string wrapped at the specified column.</p>
7059
   */
7060
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7061
  {
7062
    $str = (string)$str;
7063
    $break = (string)$break;
7064
7065
    if (!isset($str[0], $break[0])) {
7066
      return '';
7067
    }
7068
7069
    $w = '';
7070
    $strSplit = explode($break, $str);
7071
    $count = count($strSplit);
7072
7073
    $chars = array();
7074
    /** @noinspection ForeachInvariantsInspection */
7075
    for ($i = 0; $i < $count; ++$i) {
7076
7077
      if ($i) {
7078
        $chars[] = $break;
7079
        $w .= '#';
7080
      }
7081
7082
      $c = $strSplit[$i];
7083
      unset($strSplit[$i]);
7084
7085
      foreach (self::split($c) as $c) {
7086
        $chars[] = $c;
7087
        $w .= ' ' === $c ? ' ' : '?';
7088
      }
7089
    }
7090
7091
    $strReturn = '';
7092
    $j = 0;
7093
    $b = $i = -1;
7094
    $w = wordwrap($w, $width, '#', $cut);
7095
7096
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7097
      for (++$i; $i < $b; ++$i) {
7098
        $strReturn .= $chars[$j];
7099
        unset($chars[$j++]);
7100
      }
7101
7102
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7103
        unset($chars[$j++]);
7104
      }
7105
7106
      $strReturn .= $break;
7107
    }
7108
7109
    return $strReturn . implode('', $chars);
7110
  }
7111
7112
  /**
7113
   * Returns an array of Unicode White Space characters.
7114
   *
7115
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7116
   */
7117
  public static function ws()
7118
  {
7119
    return self::$whitespace;
7120
  }
7121
7122
}
7123