Completed
Push — master ( e53110...fb7a32 )
by Lars
06:00
created

UTF8   D

Complexity

Total Complexity 837

Size/Duplication

Total Lines 6916
Duplicated Lines 10.64 %

Coupling/Cohesion

Components 2
Dependencies 3

Test Coverage

Coverage 85.33%

Importance

Changes 0
Metric Value
wmc 837
lcom 2
cbo 3
dl 736
loc 6916
ccs 1570
cts 1840
cp 0.8533
rs 4.4102
c 0
b 0
f 0

165 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A access() 0 4 1
A add_bom_to_string() 0 8 2
A binary_to_str() 0 4 1
A bom() 0 4 1
A callback() 0 4 1
A checkForSupport() 0 22 2
C chr() 0 49 10
A chr_map() 0 6 1
A chr_size_list() 0 8 2
B chr_to_decimal() 0 32 6
A chr_to_hex() 0 4 1
A chunk_split() 0 4 1
B clean() 0 35 4
A cleanup() 0 20 2
B codepoints() 0 26 3
A count_chars() 0 4 1
A decimal_to_chr() 0 8 1
C encode() 0 63 16
B file_get_contents() 0 35 6
A file_has_bom() 0 4 1
C filter() 11 52 13
A filter_input() 10 10 2
A filter_input_array() 10 10 2
A filter_var() 10 10 2
A filter_var_array() 10 10 2
A fits_inside() 0 4 1
A fix_simple_utf8() 19 19 3
B fix_utf8() 0 22 4
D getCharDirection() 0 112 119
A getData() 0 10 2
A hasBom() 0 4 1
A hex_to_int() 0 12 3
A html_decode() 0 4 1
B html_encode() 0 38 5
C html_entity_decode() 0 65 12
B htmlentities() 0 28 6
A htmlspecialchars() 0 8 2
A iconv_loaded() 0 14 3
A int_to_hex() 0 12 3
A intlChar_loaded() 0 4 2
A intl_loaded() 0 4 2
A isAscii() 0 4 1
A isBase64() 0 4 1
A isBinary() 0 4 1
A isBom() 0 4 1
A isHtml() 0 4 1
A isJson() 0 4 1
A isUtf16() 0 4 1
A isUtf32() 0 4 1
A isUtf8() 0 4 1
A is_ascii() 0 10 2
A is_base64() 0 14 3
B is_binary() 0 17 5
A is_binary_file() 0 12 2
A is_bom() 0 10 3
A is_html() 0 19 3
A is_json() 0 18 4
C is_utf16() 48 48 12
C is_utf32() 48 48 12
D is_utf8() 21 134 25
A json_decode() 0 12 2
A json_encode() 0 12 2
A lcfirst() 0 4 1
A ltrim() 15 15 4
A max() 8 8 2
A max_chr_width() 0 9 2
A mbstring_loaded() 0 10 3
A min() 8 8 2
A normalizeEncoding() 0 4 1
B normalize_encoding() 0 49 6
A normalize_msword() 19 19 3
B normalize_whitespace() 0 36 6
B number_format() 0 25 3
C ord() 0 47 15
A parse_str() 0 12 3
A pcre_utf8_support() 0 5 1
D range() 14 38 9
A removeBOM() 0 4 1
A remove_bom() 0 10 3
A remove_duplicates() 0 15 4
A remove_invisible_characters() 0 20 3
A replace_diamond_question_mark() 0 14 1
A rtrim() 15 15 4
C rxClass() 0 40 8
A showSupport() 0 6 2
B single_chr_html_encode() 0 23 5
C split() 12 75 23
C str_detect_encoding() 0 82 11
A str_ends_with() 15 15 3
A str_iends_with() 15 15 3
A str_ireplace() 0 18 3
A str_istarts_with() 15 15 3
B str_limit_after_word() 0 31 5
C str_pad() 9 41 7
A str_repeat() 0 6 1
A str_replace() 0 4 1
A str_shuffle() 0 8 1
A str_sort() 0 16 3
B str_split() 0 36 6
A str_starts_with() 15 15 3
A str_to_binary() 0 8 1
A str_transliterate() 0 4 1
B str_word_count() 0 31 5
A strcasecmp() 0 4 1
A strchr() 0 4 1
A strcmp() 0 8 2
B strcspn() 0 22 6
A strichr() 0 4 1
A string() 0 13 1
A string_has_bom() 0 10 3
A strip_tags() 0 7 1
D stripos() 9 39 9
D stristr() 7 41 9
C strlen() 9 59 15
A strnatcasecmp() 0 4 1
A strnatcmp() 0 4 2
A strncasecmp() 0 4 1
A strncmp() 0 7 1
A strpbrk() 0 15 3
D strpos() 22 75 17
A strrchr() 15 15 3
A strrev() 0 10 2
A strrichr() 15 15 3
C strripos() 32 57 14
D strrpos() 32 71 17
B strspn() 0 17 5
C strstr() 7 48 10
B strtocasefold() 0 37 6
A strtolower() 21 21 4
A strtonatfold() 0 5 1
A strtoupper() 20 20 4
A strtr() 0 19 4
A strwidth() 0 15 3
C substr() 9 69 16
A substr_compare() 0 7 2
C substr_count() 7 47 10
A substr_ileft() 19 19 4
A substr_iright() 19 19 4
A substr_left() 19 19 4
C substr_replace() 20 63 14
A substr_right() 19 19 4
B swapCase() 0 34 5
A toAscii() 0 4 1
A toIso8859() 0 4 1
A toLatin1() 0 4 1
A toUTF8() 0 4 1
F to_ascii() 6 116 27
B to_iso8859() 0 22 4
A to_latin1() 0 4 1
D to_utf8() 20 106 26
A trim() 0 15 4
A ucfirst() 0 4 1
A ucword() 0 4 1
C ucwords() 0 39 8
B urldecode() 31 31 6
B rawurldecode() 31 31 6
B urldecode_fix_win1252_chars() 0 231 1
A utf8_decode() 0 22 3
B utf8_encode() 0 26 4
A utf8_fix_win1252_chars() 0 4 1
A whitespace_table() 0 4 1
B words_limit() 0 26 5
C wordwrap() 0 51 10
A ws() 0 4 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complex Class

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $utf8ToWin1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $utf8MSWord = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $iconvEncoding = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $support = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    return self::substr($str, $pos, 1);
828
  }
829
830
  /**
831
   * Prepends UTF-8 BOM character to the string and returns the whole string.
832
   *
833
   * INFO: If BOM already existed there, the Input string is returned.
834 1
   *
835
   * @param string $str <p>The input string.</p>
836 1
   *
837 1
   * @return string <p>The output string that contains BOM.</p>
838 1
   */
839
  public static function add_bom_to_string($str)
840 1
  {
841
    if (self::string_has_bom($str) === false) {
842
      $str = self::bom() . $str;
843
    }
844
845
    return $str;
846
  }
847
848
  /**
849
   * Convert binary into an string.
850 1
   *
851
   * @param mixed $bin 1|0
852 1
   *
853
   * @return string
854
   */
855
  public static function binary_to_str($bin)
856
  {
857
    return pack('H*', base_convert($bin, 2, 16));
858
  }
859
860 2
  /**
861
   * Returns the UTF-8 Byte Order Mark Character.
862 2
   *
863
   * @return string UTF-8 Byte Order Mark
864
   */
865
  public static function bom()
866
  {
867
    return "\xEF\xBB\xBF";
868
  }
869
870
  /**
871
   * @alias of UTF8::chr_map()
872
   * @see   UTF8::chr_map()
873
   *
874 1
   * @param string|array $callback
875
   * @param string       $str
876 1
   *
877
   * @return array
878
   */
879
  public static function callback($callback, $str)
880
  {
881
    return self::chr_map($callback, $str);
882
  }
883
884 2
  /**
885
   * This method will auto-detect your server environment for UTF-8 support.
886 2
   *
887
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
888 1
   */
889
  public static function checkForSupport()
890 1
  {
891 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
892 1
893 1
      self::$support['already_checked_via_portable_utf8'] = true;
894 1
895 1
      // http://php.net/manual/en/book.mbstring.php
896 2
      self::$support['mbstring'] = self::mbstring_loaded();
897
898
      // http://php.net/manual/en/book.iconv.php
899
      self::$support['iconv'] = self::iconv_loaded();
900
901
      // http://php.net/manual/en/book.intl.php
902
      self::$support['intl'] = self::intl_loaded();
903
904
      // http://php.net/manual/en/class.intlchar.php
905
      self::$support['intlChar'] = self::intlChar_loaded();
906
907 9
      // http://php.net/manual/en/book.pcre.php
908
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
909 9
    }
910 9
  }
911 1
912
  /**
913
   * Generates a UTF-8 encoded character from the given code point.
914 9
   *
915
   * INFO: opposite to UTF8::ord()
916
   *
917
   * @param int    $code_point <p>The code point for which to generate a character.</p>
918 9
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
919
   *
920
   * @return string|null <p>Multi-Byte character, returns null on failure to encode.</p>
921
   */
922
  public static function chr($code_point, $encoding = 'UTF-8')
923 9
  {
924 9
    $i = (int)$code_point;
925 8
    if ($i !== $code_point) {
926
      return null;
927
    }
928
929 8
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
930 6
      self::checkForSupport();
931
    }
932
933 7
    if ($encoding !== 'UTF-8') {
934 6
      $encoding = self::normalize_encoding($encoding);
935 6
    } elseif (self::$support['intlChar'] === true) {
936
      return \IntlChar::chr($code_point);
937
    }
938 7
939 7
    // use static cache, if there is no support for "IntlChar"
940 7
    static $cache = array();
941 7
    $cacheKey = $code_point . $encoding;
942
    if (isset($cache[$cacheKey]) === true) {
943
      return $cache[$cacheKey];
944 1
    }
945 1
946 1
    if (0x80 > $code_point %= 0x200000) {
947 1
      $str = chr($code_point);
948 1
    } elseif (0x800 > $code_point) {
949
      $str = chr(0xC0 | $code_point >> 6) .
950
             chr(0x80 | $code_point & 0x3F);
951
    } elseif (0x10000 > $code_point) {
952
      $str = chr(0xE0 | $code_point >> 12) .
953
             chr(0x80 | $code_point >> 6 & 0x3F) .
954
             chr(0x80 | $code_point & 0x3F);
955
    } else {
956
      $str = chr(0xF0 | $code_point >> 18) .
957
             chr(0x80 | $code_point >> 12 & 0x3F) .
958
             chr(0x80 | $code_point >> 6 & 0x3F) .
959
             chr(0x80 | $code_point & 0x3F);
960
    }
961
962
    if ($encoding !== 'UTF-8') {
963 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
964
    }
965 1
966
    // add into static cache
967 1
    $cache[$cacheKey] = $str;
968
969
    return $str;
970
  }
971
972
  /**
973
   * Applies callback to all characters of a string.
974
   *
975
   * @param string|array $callback <p>The callback function.</p>
976
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
977
   *
978
   * @return array <p>The outcome of callback.</p>
979
   */
980
  public static function chr_map($callback, $str)
981
  {
982 4
    $chars = self::split($str);
983
984 4
    return array_map($callback, $chars);
985 3
  }
986
987
  /**
988 4
   * Generates an array of byte length of each character of a Unicode string.
989
   *
990
   * 1 byte => U+0000  - U+007F
991
   * 2 byte => U+0080  - U+07FF
992
   * 3 byte => U+0800  - U+FFFF
993
   * 4 byte => U+10000 - U+10FFFF
994
   *
995
   * @param string $str <p>The original Unicode string.</p>
996
   *
997
   * @return array <p>An array of byte lengths of each character.</p>
998 2
   */
999
  public static function chr_size_list($str)
1000 2
  {
1001 2
    if (!$str) {
1002 2
      return array();
1003
    }
1004 2
1005
    return array_map('strlen', self::split($str));
1006 2
  }
1007
1008
  /**
1009 2
   * Get a decimal code representation of a specific character.
1010
   *
1011 2
   * @param string $char <p>The input character.</p>
1012 2
   *
1013 2
   * @return int
1014
   */
1015 1
  public static function chr_to_decimal($char)
1016 1
  {
1017 1
    $char = (string)$char;
1018
    $code = self::ord($char[0]);
1019
    $bytes = 1;
1020
1021
    if (!($code & 0x80)) {
1022
      // 0xxxxxxx
1023 2
      return $code;
1024
    }
1025 2
1026 2
    if (($code & 0xe0) === 0xc0) {
1027
      // 110xxxxx
1028 2
      $bytes = 2;
1029
      $code &= ~0xc0;
1030
    } elseif (($code & 0xf0) === 0xe0) {
1031
      // 1110xxxx
1032
      $bytes = 3;
1033
      $code &= ~0xe0;
1034
    } elseif (($code & 0xf8) === 0xf0) {
1035
      // 11110xxx
1036
      $bytes = 4;
1037
      $code &= ~0xf0;
1038
    }
1039 1
1040
    for ($i = 2; $i <= $bytes; $i++) {
1041 1
      // 10xxxxxx
1042
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1043
    }
1044
1045
    return $code;
1046
  }
1047
1048
  /**
1049
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1050
   *
1051
   * @param string $char <p>The input character</p>
1052
   * @param string $pfix [optional]
1053 1
   *
1054
   * @return string <p>The code point encoded as U+xxxx<p>
1055 1
   */
1056
  public static function chr_to_hex($char, $pfix = 'U+')
1057
  {
1058
    return self::int_to_hex(self::ord($char), $pfix);
1059
  }
1060
1061
  /**
1062
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1063
   *
1064
   * @param string $body     <p>The original string to be split.</p>
1065
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1066
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1067
   *
1068
   * @return string <p>The chunked string</p>
1069
   */
1070
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1071 44
  {
1072
    return implode($end, self::split($body, $chunklen));
1073
  }
1074
1075
  /**
1076
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1077
   *
1078
   * @param string $str                     <p>The string to be sanitized.</p>
1079
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1080
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1081
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1082
   *                                        => "..."</p>
1083
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1084
   *                                        $normalize_whitespace</p>
1085
   *
1086 44
   * @return string <p>Clean UTF-8 encoded string.</p>
1087 44
   */
1088
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1089 44
  {
1090 44
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1091
    // caused connection reset problem on larger strings
1092 44
1093 17
    $regx = '/
1094 17
      (
1095
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1096 44
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1097 12
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1098 12
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1099
        ){1,100}                      # ...one or more times
1100 44
      )
1101 5
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1102 5
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1103
    /x';
1104 44
    $str = preg_replace($regx, '$1', $str);
1105
1106
    $str = self::replace_diamond_question_mark($str, '');
1107
    $str = self::remove_invisible_characters($str);
1108
1109
    if ($normalize_whitespace === true) {
1110
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1111
    }
1112
1113
    if ($normalize_msword === true) {
1114 4
      $str = self::normalize_msword($str);
1115
    }
1116 4
1117
    if ($remove_bom === true) {
1118 4
      $str = self::removeBOM($str);
1119 1
    }
1120
1121
    return $str;
1122
  }
1123 4
1124
  /**
1125
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1126
   *
1127
   * @param string $str <p>The input string.</p>
1128
   *
1129
   * @return string
1130 4
   */
1131
  public static function cleanup($str)
1132 4
  {
1133
    $str = (string)$str;
1134
1135
    if (!isset($str[0])) {
1136
      return '';
1137
    }
1138
1139
    // fixed ISO <-> UTF-8 Errors
1140
    $str = self::fix_simple_utf8($str);
1141
1142
    // remove all none UTF-8 symbols
1143
    // && remove diamond question mark (�)
1144
    // && remove remove invisible characters (e.g. "\0")
1145
    // && remove BOM
1146 5
    // && normalize whitespace chars (but keep non-breaking-spaces)
1147
    $str = self::clean($str, true, true, false, true);
1148 5
1149 5
    return (string)$str;
1150 5
  }
1151
1152 5
  /**
1153
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1154 5
   *
1155 5
   * INFO: opposite to UTF8::string()
1156 5
   *
1157
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1158 5
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1159
   *                                    default, code points will be returned as integers.</p>
1160 5
   *
1161 1
   * @return array <p>The array of code points.</p>
1162
   */
1163 1
  public static function codepoints($arg, $u_style = false)
1164 1
  {
1165 1
    if (is_string($arg)) {
1166
      $arg = self::split($arg);
1167 1
    }
1168 1
1169
    $arg = array_map(
1170 5
        array(
1171
            '\\voku\\helper\\UTF8',
1172
            'ord',
1173
        ),
1174
        $arg
1175
    );
1176
1177
    if ($u_style) {
1178
      $arg = array_map(
1179
          array(
1180
              '\\voku\\helper\\UTF8',
1181
              'int_to_hex',
1182 6
          ),
1183
          $arg
1184 6
      );
1185
    }
1186
1187
    return $arg;
1188
  }
1189
1190
  /**
1191
   * Returns count of characters used in a string.
1192
   *
1193
   * @param string $str       <p>The input string.</p>
1194 1
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1195
   *
1196 1
   * @return array <p>An associative array of Character as keys and
1197 1
   *               their count as values.</p>
1198 1
   */
1199
  public static function count_chars($str, $cleanUtf8 = false)
1200 1
  {
1201
    return array_count_values(self::split($str, 1, $cleanUtf8));
1202
  }
1203
1204
  /**
1205
   * Get a UTF-8 character from its decimal code representation.
1206
   *
1207
   * @param int $code
1208
   *
1209
   * @return string
1210
   */
1211
  public static function decimal_to_chr($code)
1212
  {
1213
    return \mb_convert_encoding(
1214
        '&#x' . dechex($code) . ';',
1215
        'UTF-8',
1216 11
        'HTML-ENTITIES'
1217
    );
1218 11
  }
1219 11
1220
  /**
1221 11
   * Encode a string with a new charset-encoding.
1222 5
   *
1223
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1224
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1225 11
   *
1226 1
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1227 1
   * @param string $str      <p>The input string</p>
1228
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1229 11
   *                         /> otherwise we auto-detect the current string-encoding</p>
1230
   *
1231
   * @return string
1232
   */
1233 11
  public static function encode($encoding, $str, $force = true)
1234
  {
1235
    $str = (string)$str;
1236 11
    $encoding = (string)$encoding;
1237
1238 1
    if (!isset($str[0], $encoding[0])) {
1239 11
      return $str;
1240
    }
1241
1242
    if ($encoding !== 'UTF-8') {
1243 11
      $encoding = self::normalize_encoding($encoding);
1244
    }
1245
1246 11
    $encodingDetected = self::str_detect_encoding($str);
1247 1
1248 1
    if (
1249 1
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1250 11
        &&
1251 11
        (
1252
            $force === true
1253
            ||
1254
            $encodingDetected !== $encoding
1255
        )
1256 2
    ) {
1257
1258
      if (
1259 1
          $encoding === 'UTF-8'
1260
          &&
1261
          (
1262 2
              $force === true
1263 1
              || $encodingDetected === 'UTF-8'
1264
              || $encodingDetected === 'WINDOWS-1252'
1265
              || $encodingDetected === 'ISO-8859-1'
1266 2
          )
1267 2
      ) {
1268 2
        return self::to_utf8($str);
1269
      }
1270 2
1271
      if (
1272 2
          $encoding === 'ISO-8859-1'
1273 2
          &&
1274
          (
1275
              $force === true
1276
              || $encodingDetected === 'ISO-8859-1'
1277 1
              || $encodingDetected === 'UTF-8'
1278
          )
1279
      ) {
1280
        return self::to_iso8859($str);
1281
      }
1282
1283
      $strEncoded = \mb_convert_encoding(
1284
          $str,
1285
          $encoding,
1286
          $encodingDetected
1287
      );
1288
1289
      if ($strEncoded) {
1290
        return $strEncoded;
1291
      }
1292
    }
1293
1294
    return $str;
1295
  }
1296
1297
  /**
1298
   * Reads entire file into a string.
1299
   *
1300
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1301
   *
1302
   * @link http://php.net/manual/en/function.file-get-contents.php
1303
   *
1304
   * @param string        $filename      <p>
1305
   *                                     Name of the file to read.
1306
   *                                     </p>
1307
   * @param int|null      $flags         [optional] <p>
1308
   *                                     Prior to PHP 6, this parameter is called
1309
   *                                     use_include_path and is a bool.
1310
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1311
   *                                     to trigger include path
1312
   *                                     search.
1313
   *                                     </p>
1314
   *                                     <p>
1315
   *                                     The value of flags can be any combination of
1316
   *                                     the following flags (with some restrictions), joined with the
1317
   *                                     binary OR (|)
1318
   *                                     operator.
1319
   *                                     </p>
1320
   *                                     <p>
1321
   *                                     <table>
1322
   *                                     Available flags
1323
   *                                     <tr valign="top">
1324
   *                                     <td>Flag</td>
1325
   *                                     <td>Description</td>
1326
   *                                     </tr>
1327
   *                                     <tr valign="top">
1328
   *                                     <td>
1329
   *                                     FILE_USE_INCLUDE_PATH
1330
   *                                     </td>
1331
   *                                     <td>
1332
   *                                     Search for filename in the include directory.
1333
   *                                     See include_path for more
1334
   *                                     information.
1335
   *                                     </td>
1336
   *                                     </tr>
1337
   *                                     <tr valign="top">
1338
   *                                     <td>
1339
   *                                     FILE_TEXT
1340
   *                                     </td>
1341
   *                                     <td>
1342
   *                                     As of PHP 6, the default encoding of the read
1343
   *                                     data is UTF-8. You can specify a different encoding by creating a
1344
   *                                     custom context or by changing the default using
1345
   *                                     stream_default_encoding. This flag cannot be
1346
   *                                     used with FILE_BINARY.
1347
   *                                     </td>
1348
   *                                     </tr>
1349
   *                                     <tr valign="top">
1350
   *                                     <td>
1351
   *                                     FILE_BINARY
1352
   *                                     </td>
1353
   *                                     <td>
1354
   *                                     With this flag, the file is read in binary mode. This is the default
1355
   *                                     setting and cannot be used with FILE_TEXT.
1356
   *                                     </td>
1357
   *                                     </tr>
1358
   *                                     </table>
1359
   *                                     </p>
1360
   * @param resource|null $context       [optional] <p>
1361
   *                                     A valid context resource created with
1362 2
   *                                     stream_context_create. If you don't need to use a
1363
   *                                     custom context, you can skip this parameter by &null;.
1364
   *                                     </p>
1365 2
   * @param int|null      $offset        [optional] <p>
1366 2
   *                                     The offset where the reading starts.
1367
   *                                     </p>
1368 2
   * @param int|null      $maxlen        [optional] <p>
1369 2
   *                                     Maximum length of data read. The default is to read until end
1370
   *                                     of file is reached.
1371
   *                                     </p>
1372
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1373 2
   *
1374 2
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1375
   *                                     or pdf, because they used non default utf-8 chars</p>
1376 2
   *
1377 2
   * @return string <p>The function returns the read data or false on failure.</p>
1378
   */
1379 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1380 1
  {
1381 1
    // init
1382 2
    $timeout = (int)$timeout;
1383
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1384
1385
    if ($timeout && $context === null) {
1386 2
      $context = stream_context_create(
1387 1
          array(
1388
              'http' =>
1389
                  array(
1390 1
                      'timeout' => $timeout,
1391 1
                  ),
1392 1
          )
1393 1
      );
1394
    }
1395 1
1396
    if (is_int($maxlen)) {
1397
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1398
    } else {
1399
      $data = file_get_contents($filename, $flags, $context, $offset);
1400
    }
1401
1402
    // return false on error
1403
    if ($data === false) {
1404
      return false;
1405 1
    }
1406
1407 1
    if ($convertToUtf8 === true) {
1408
      $data = self::encode('UTF-8', $data, false);
1409
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1410
    }
1411
1412
    return $data;
1413
  }
1414
1415
  /**
1416
   * Checks if a file starts with BOM (Byte Order Mark) character.
1417
   *
1418
   * @param string $file_path <p>Path to a valid file.</p>
1419 9
   *
1420
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1421 9
   */
1422 9
  public static function file_has_bom($file_path)
1423 3
  {
1424
    return self::string_has_bom(file_get_contents($file_path));
1425 3
  }
1426 3
1427 3
  /**
1428 9
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1429 2
   *
1430 2
   * @param mixed  $var
1431 2
   * @param int    $normalization_form
1432 2
   * @param string $leading_combining
1433 9
   *
1434
   * @return mixed
1435 8
   */
1436
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1437 2
  {
1438 2
    switch (gettype($var)) {
1439 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1440 8
        foreach ($var as $k => $v) {
1441
          /** @noinspection AlterInForeachInspection */
1442 8
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1443 6
        }
1444 6
        break;
1445 6 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1446
        foreach ($var as $k => $v) {
1447 6
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1448 3
        }
1449 3
        break;
1450 5
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1451
1452
        if (false !== strpos($var, "\r")) {
1453
          // Workaround https://bugs.php.net/65732
1454
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1455 8
        }
1456 8
1457 5
        if (self::is_ascii($var) === false) {
1458 8
1459
          /** @noinspection PhpUndefinedClassInspection */
1460
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1461 2
            $n = '-';
1462 2
          } else {
1463 8
            /** @noinspection PhpUndefinedClassInspection */
1464 8
            $n = \Normalizer::normalize($var, $normalization_form);
1465 9
1466
            if (isset($n[0])) {
1467 9
              $var = $n;
1468
            } else {
1469
              $var = self::encode('UTF-8', $var);
1470
            }
1471
          }
1472
1473
          if (
1474
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1475
              &&
1476
              preg_match('/^\p{Mn}/u', $var)
1477
          ) {
1478
            // Prevent leading combining chars
1479
            // for NFC-safe concatenations.
1480
            $var = $leading_combining . $var;
1481
          }
1482
        }
1483
        break;
1484
    }
1485
1486
    return $var;
1487
  }
1488
1489
  /**
1490
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1491
   *
1492
   * Gets a specific external variable by name and optionally filters it
1493
   *
1494
   * @link  http://php.net/manual/en/function.filter-input.php
1495
   *
1496
   * @param int    $type          <p>
1497
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1498
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1499
   *                              <b>INPUT_ENV</b>.
1500
   *                              </p>
1501
   * @param string $variable_name <p>
1502
   *                              Name of a variable to get.
1503
   *                              </p>
1504
   * @param int    $filter        [optional] <p>
1505
   *                              The ID of the filter to apply. The
1506
   *                              manual page lists the available filters.
1507
   *                              </p>
1508
   * @param mixed  $options       [optional] <p>
1509
   *                              Associative array of options or bitwise disjunction of flags. If filter
1510
   *                              accepts options, flags can be provided in "flags" field of array.
1511
   *                              </p>
1512
   *
1513
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1514
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1515
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1516
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1517
   * @since 5.2.0
1518
   */
1519 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1520 1
  {
1521
    if (4 > func_num_args()) {
1522 1
      $var = filter_input($type, $variable_name, $filter);
1523 1
    } else {
1524 1
      $var = filter_input($type, $variable_name, $filter, $options);
1525 1
    }
1526
1527
    return self::filter($var);
1528 1
  }
1529
1530
  /**
1531
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1532
   *
1533
   * Gets external variables and optionally filters them
1534
   *
1535
   * @link  http://php.net/manual/en/function.filter-input-array.php
1536
   *
1537
   * @param int   $type       <p>
1538
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1539
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1540 1
   *                          <b>INPUT_ENV</b>.
1541
   *                          </p>
1542 1
   * @param mixed $definition [optional] <p>
1543 1
   *                          An array defining the arguments. A valid key is a string
1544 1
   *                          containing a variable name and a valid value is either a filter type, or an array
1545 1
   *                          optionally specifying the filter, flags and options. If the value is an
1546
   *                          array, valid keys are filter which specifies the
1547
   *                          filter type,
1548 1
   *                          flags which specifies any flags that apply to the
1549
   *                          filter, and options which specifies any options that
1550
   *                          apply to the filter. See the example below for a better understanding.
1551
   *                          </p>
1552
   *                          <p>
1553
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1554
   *                          input array are filtered by this filter.
1555
   *                          </p>
1556
   * @param bool  $add_empty  [optional] <p>
1557
   *                          Add missing keys as <b>NULL</b> to the return value.
1558
   *                          </p>
1559 1
   *
1560
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1561 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1562
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1563
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1564
   * fails.
1565
   * @since 5.2.0
1566
   */
1567 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1568
  {
1569
    if (2 > func_num_args()) {
1570
      $a = filter_input_array($type);
1571
    } else {
1572
      $a = filter_input_array($type, $definition, $add_empty);
1573
    }
1574
1575
    return self::filter($a);
1576
  }
1577 7
1578
  /**
1579 7
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1580 7
   *
1581
   * Filters a variable with a specified filter
1582 7
   *
1583
   * @link  http://php.net/manual/en/function.filter-var.php
1584 7
   *
1585 2
   * @param mixed $variable <p>
1586
   *                        Value to filter.
1587
   *                        </p>
1588 7
   * @param int   $filter   [optional] <p>
1589 1
   *                        The ID of the filter to apply. The
1590 1
   *                        manual page lists the available filters.
1591 1
   *                        </p>
1592
   * @param mixed $options  [optional] <p>
1593 7
   *                        Associative array of options or bitwise disjunction of flags. If filter
1594
   *                        accepts options, flags can be provided in "flags" field of array. For
1595
   *                        the "callback" filter, callable type should be passed. The
1596
   *                        callback must accept one argument, the value to be filtered, and return
1597
   *                        the value after filtering/sanitizing it.
1598
   *                        </p>
1599
   *                        <p>
1600
   *                        <code>
1601
   *                        // for filters that accept options, use this format
1602
   *                        $options = array(
1603 1
   *                        'options' => array(
1604
   *                        'default' => 3, // value to return if the filter fails
1605 1
   *                        // other options here
1606
   *                        'min_range' => 0
1607 1
   *                        ),
1608
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1609
   *                        );
1610 1
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1611 1
   *                        // for filter that only accept flags, you can pass them directly
1612
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1613 1
   *                        // for filter that only accept flags, you can also pass as an array
1614
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1615
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1616 1
   *                        // callback validate filter
1617 1
   *                        function foo($value)
1618 1
   *                        {
1619 1
   *                        // Expected format: Surname, GivenNames
1620 1
   *                        if (strpos($value, ", ") === false) return false;
1621
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1622 1
   *                        $empty = (empty($surname) || empty($givennames));
1623
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1624
   *                        if ($empty || $notstrings) {
1625
   *                        return false;
1626
   *                        } else {
1627
   *                        return $value;
1628
   *                        }
1629
   *                        }
1630
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1631
   *                        </code>
1632 1
   *                        </p>
1633
   *
1634 1
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1635
   * @since 5.2.0
1636
   */
1637 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1638 1
  {
1639
    if (3 > func_num_args()) {
1640
      $variable = filter_var($variable, $filter);
1641
    } else {
1642
      $variable = filter_var($variable, $filter, $options);
1643
    }
1644
1645
    return self::filter($variable);
1646
  }
1647
1648
  /**
1649
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1650
   *
1651
   * Gets multiple variables and optionally filters them
1652
   *
1653
   * @link  http://php.net/manual/en/function.filter-var-array.php
1654 1
   *
1655
   * @param array $data       <p>
1656 1
   *                          An array with string keys containing the data to filter.
1657 1
   *                          </p>
1658
   * @param mixed $definition [optional] <p>
1659
   *                          An array defining the arguments. A valid key is a string
1660 1
   *                          containing a variable name and a valid value is either a
1661
   *                          filter type, or an
1662 1
   *                          array optionally specifying the filter, flags and options.
1663 1
   *                          If the value is an array, valid keys are filter
1664 1
   *                          which specifies the filter type,
1665 1
   *                          flags which specifies any flags that apply to the
1666 1
   *                          filter, and options which specifies any options that
1667 1
   *                          apply to the filter. See the example below for a better understanding.
1668 1
   *                          </p>
1669 1
   *                          <p>
1670 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1671 1
   *                          input array are filtered by this filter.
1672 1
   *                          </p>
1673
   * @param bool  $add_empty  [optional] <p>
1674
   *                          Add missing keys as <b>NULL</b> to the return value.
1675
   *                          </p>
1676
   *
1677
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1678
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1679
   * the variable is not set.
1680
   * @since 5.2.0
1681
   */
1682 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1683
  {
1684
    if (2 > func_num_args()) {
1685
      $a = filter_var_array($data);
1686
    } else {
1687
      $a = filter_var_array($data, $definition, $add_empty);
1688
    }
1689
1690
    return self::filter($a);
1691
  }
1692 1
1693 1
  /**
1694
   * Check if the number of unicode characters are not more than the specified integer.
1695
   *
1696
   * @param string $str      The original string to be checked.
1697
   * @param int    $box_size The size in number of chars to be checked against string.
1698
   *
1699
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1700
   */
1701
  public static function fits_inside($str, $box_size)
1702
  {
1703
    return (self::strlen($str) <= $box_size);
1704
  }
1705
1706
  /**
1707
   * Try to fix simple broken UTF-8 strings.
1708
   *
1709
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1710
   *
1711
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1712
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1713
   * See: http://en.wikipedia.org/wiki/Windows-1252
1714
   *
1715
   * @param string $str <p>The input string</p>
1716
   *
1717
   * @return string
1718
   */
1719 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1720
  {
1721
    // init
1722
    $str = (string)$str;
1723
1724
    if (!isset($str[0])) {
1725
      return '';
1726
    }
1727
1728
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1729
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1730
1731
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1732
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1733
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1734
    }
1735
1736
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1737
  }
1738
1739
  /**
1740
   * Fix a double (or multiple) encoded UTF8 string.
1741
   *
1742
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1743
   *
1744
   * @return mixed
1745
   */
1746
  public static function fix_utf8($str)
1747
  {
1748
    if (is_array($str)) {
1749
1750
      /** @noinspection ForeachSourceInspection */
1751
      foreach ($str as $k => $v) {
1752 1
        /** @noinspection AlterInForeachInspection */
1753
        /** @noinspection OffsetOperationsInspection */
1754 1
        $str[$k] = self::fix_utf8($v);
1755 1
      }
1756
1757 1
      return $str;
1758
    }
1759
1760
    $last = '';
1761
    while ($last !== $str) {
1762
      $last = $str;
1763
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1763 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1764
    }
1765
1766
    return $str;
1767
  }
1768
1769
  /**
1770
   * Get character of a specific character.
1771
   *
1772 1
   * @param string $char
1773
   *
1774 1
   * @return string <p>'RTL' or 'LTR'</p>
1775
   */
1776
  public static function getCharDirection($char)
1777
  {
1778
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1779
      self::checkForSupport();
1780
    }
1781
1782
    if (self::$support['intlChar'] === true) {
1783
      $tmpReturn = \IntlChar::charDirection($char);
1784
1785
      // from "IntlChar"-Class
1786 1
      $charDirection = array(
1787
          'RTL' => array(1, 13, 14, 15, 21),
1788 1
          'LTR' => array(0, 11, 12, 20),
1789 1
      );
1790
1791
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1792 1
        return 'LTR';
1793 1
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1794
        return 'RTL';
1795
      }
1796 1
    }
1797
1798
    $c = static::chr_to_decimal($char);
1799
1800
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1801
      return 'LTR';
1802
    }
1803
1804
    if (0x85e >= $c) {
1805
1806
      if (0x5be === $c ||
1807
          0x5c0 === $c ||
1808
          0x5c3 === $c ||
1809
          0x5c6 === $c ||
1810 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1811
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1812 1
          0x608 === $c ||
1813
          0x60b === $c ||
1814
          0x60d === $c ||
1815
          0x61b === $c ||
1816
          (0x61e <= $c && 0x64a >= $c) ||
1817
          (0x66d <= $c && 0x66f >= $c) ||
1818
          (0x671 <= $c && 0x6d5 >= $c) ||
1819
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1820
          (0x6ee <= $c && 0x6ef >= $c) ||
1821
          (0x6fa <= $c && 0x70d >= $c) ||
1822
          0x710 === $c ||
1823
          (0x712 <= $c && 0x72f >= $c) ||
1824
          (0x74d <= $c && 0x7a5 >= $c) ||
1825
          0x7b1 === $c ||
1826 2
          (0x7c0 <= $c && 0x7ea >= $c) ||
1827
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1828
          0x7fa === $c ||
1829 2
          (0x800 <= $c && 0x815 >= $c) ||
1830
          0x81a === $c ||
1831 2
          0x824 === $c ||
1832 2
          0x828 === $c ||
1833 1
          (0x830 <= $c && 0x83e >= $c) ||
1834 1
          (0x840 <= $c && 0x858 >= $c) ||
1835
          0x85e === $c
1836 2
      ) {
1837 1
        return 'RTL';
1838 1
      }
1839
1840 2
    } elseif (0x200f === $c) {
1841 2
1842 2
      return 'RTL';
1843
1844 2
    } elseif (0xfb1d <= $c) {
1845
1846
      if (0xfb1d === $c ||
1847
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1848
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1849
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1850
          0xfb3e === $c ||
1851
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1852
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1853
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1854
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1855
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1856
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1857
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1858
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1859
          (0xfe76 <= $c && 0xfefc >= $c) ||
1860
          (0x10800 <= $c && 0x10805 >= $c) ||
1861
          0x10808 === $c ||
1862
          (0x1080a <= $c && 0x10835 >= $c) ||
1863
          (0x10837 <= $c && 0x10838 >= $c) ||
1864
          0x1083c === $c ||
1865
          (0x1083f <= $c && 0x10855 >= $c) ||
1866
          (0x10857 <= $c && 0x1085f >= $c) ||
1867
          (0x10900 <= $c && 0x1091b >= $c) ||
1868
          (0x10920 <= $c && 0x10939 >= $c) ||
1869
          0x1093f === $c ||
1870
          0x10a00 === $c ||
1871
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1872
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1873
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1874
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1875
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1876
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1877
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1878
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1879
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1880
          (0x10b78 <= $c && 0x10b7f >= $c)
1881
      ) {
1882
        return 'RTL';
1883
      }
1884
    }
1885
1886
    return 'LTR';
1887
  }
1888
1889
  /**
1890
   * get data from "/data/*.ser"
1891
   *
1892
   * @param string $file
1893
   *
1894
   * @return bool|string|array|int <p>Will return false on error.</p>
1895
   */
1896
  private static function getData($file)
1897
  {
1898
    $file = __DIR__ . '/data/' . $file . '.php';
1899
    if (file_exists($file)) {
1900
      /** @noinspection PhpIncludeInspection */
1901
      return require $file;
1902
    } else {
1903
      return false;
1904
    }
1905
  }
1906
1907
  /**
1908
   * alias for "UTF8::string_has_bom()"
1909
   *
1910
   * @see UTF8::string_has_bom()
1911
   *
1912
   * @param string $str
1913
   *
1914
   * @return bool
1915
   */
1916
  public static function hasBom($str)
1917
  {
1918
    return self::string_has_bom($str);
1919
  }
1920
1921
  /**
1922
   * Converts hexadecimal U+xxxx code point representation to integer.
1923
   *
1924
   * INFO: opposite to UTF8::int_to_hex()
1925
   *
1926 9
   * @param string $str <p>The hexadecimal code point representation.</p>
1927
   *
1928 9
   * @return int|false <p>The code point, or false on failure.</p>
1929
   */
1930 9
  public static function hex_to_int($str)
1931 6
  {
1932
    if (!$str) {
1933
      return false;
1934 9
    }
1935 7
1936
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
1937
      return intval($match[1], 16);
1938
    }
1939 9
1940 9
    return false;
1941
  }
1942 9
1943 9
  /**
1944 9
   * alias for "UTF8::html_entity_decode()"
1945 9
   *
1946 9
   * @see UTF8::html_entity_decode()
1947 6
   *
1948
   * @param string $str
1949
   * @param int    $flags
1950 9
   * @param string $encoding
1951 2
   *
1952 2
   * @return string
1953
   */
1954 9
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1955 4
  {
1956 4
    return self::html_entity_decode($str, $flags, $encoding);
1957 4
  }
1958
1959
  /**
1960 4
   * Converts a UTF-8 string to a series of HTML numbered entities.
1961
   *
1962
   * INFO: opposite to UTF8::html_decode()
1963 9
   *
1964
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1965 9
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1966 9
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1967
   *
1968 7
   * @return string <p>HTML numbered entities.</p>
1969
   */
1970 7
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1971 6
  {
1972
    // init
1973 4
    $str = (string)$str;
1974
1975 9
    if (!isset($str[0])) {
1976
      return '';
1977 9
    }
1978
1979
    if ($encoding !== 'UTF-8') {
1980 9
      $encoding = self::normalize_encoding($encoding);
1981 9
    }
1982 9
1983
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1984 9
    if (function_exists('mb_encode_numericentity')) {
1985
1986 9
      $startCode = 0x00;
1987
      if ($keepAsciiChars === true) {
1988 9
        $startCode = 0x80;
1989
      }
1990
1991
      return mb_encode_numericentity(
1992
          $str,
1993
          array($startCode, 0xfffff, 0, 0xfffff, 0),
1994
          $encoding
1995
      );
1996
    }
1997
1998
    return implode(
1999
        '',
2000
        array_map(
2001
            function ($data) use ($keepAsciiChars, $encoding) {
2002
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2003
            },
2004
            self::split($str)
2005
        )
2006
    );
2007
  }
2008
2009
  /**
2010
   * UTF-8 version of html_entity_decode()
2011
   *
2012
   * The reason we are not using html_entity_decode() by itself is because
2013
   * while it is not technically correct to leave out the semicolon
2014
   * at the end of an entity most browsers will still interpret the entity
2015
   * correctly. html_entity_decode() does not convert entities without
2016
   * semicolons, so we are left with our own little solution here. Bummer.
2017
   *
2018
   * Convert all HTML entities to their applicable characters
2019
   *
2020
   * INFO: opposite to UTF8::html_encode()
2021
   *
2022
   * @link http://php.net/manual/en/function.html-entity-decode.php
2023
   *
2024
   * @param string $str      <p>
2025
   *                         The input string.
2026
   *                         </p>
2027
   * @param int    $flags    [optional] <p>
2028
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2029
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2030
   *                         <table>
2031
   *                         Available <i>flags</i> constants
2032
   *                         <tr valign="top">
2033
   *                         <td>Constant Name</td>
2034
   *                         <td>Description</td>
2035
   *                         </tr>
2036
   *                         <tr valign="top">
2037
   *                         <td><b>ENT_COMPAT</b></td>
2038
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2039
   *                         </tr>
2040
   *                         <tr valign="top">
2041
   *                         <td><b>ENT_QUOTES</b></td>
2042
   *                         <td>Will convert both double and single quotes.</td>
2043
   *                         </tr>
2044
   *                         <tr valign="top">
2045
   *                         <td><b>ENT_NOQUOTES</b></td>
2046
   *                         <td>Will leave both double and single quotes unconverted.</td>
2047
   *                         </tr>
2048
   *                         <tr valign="top">
2049
   *                         <td><b>ENT_HTML401</b></td>
2050
   *                         <td>
2051
   *                         Handle code as HTML 4.01.
2052
   *                         </td>
2053
   *                         </tr>
2054
   *                         <tr valign="top">
2055
   *                         <td><b>ENT_XML1</b></td>
2056
   *                         <td>
2057
   *                         Handle code as XML 1.
2058
   *                         </td>
2059
   *                         </tr>
2060
   *                         <tr valign="top">
2061
   *                         <td><b>ENT_XHTML</b></td>
2062
   *                         <td>
2063
   *                         Handle code as XHTML.
2064
   *                         </td>
2065
   *                         </tr>
2066
   *                         <tr valign="top">
2067
   *                         <td><b>ENT_HTML5</b></td>
2068
   *                         <td>
2069
   *                         Handle code as HTML 5.
2070
   *                         </td>
2071
   *                         </tr>
2072
   *                         </table>
2073
   *                         </p>
2074
   * @param string $encoding [optional] <p>Encoding to use.</p>
2075
   *
2076
   * @return string <p>The decoded string.</p>
2077
   */
2078
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2079
  {
2080
    // init
2081
    $str = (string)$str;
2082
2083
    if (!isset($str[0])) {
2084
      return '';
2085
    }
2086
2087
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2088
      return $str;
2089
    }
2090
2091
    if (
2092
        strpos($str, '&') === false
2093
        ||
2094 2
        (
2095
            strpos($str, '&#') === false
2096 2
            &&
2097 1
            strpos($str, ';') === false
2098 1
        )
2099
    ) {
2100 2
      return $str;
2101
    }
2102 2
2103 1
    if ($encoding !== 'UTF-8') {
2104
      $encoding = self::normalize_encoding($encoding);
2105
    }
2106 2
2107 2
    if ($flags === null) {
2108 2
      if (Bootup::is_php('5.4') === true) {
2109 2
        $flags = ENT_COMPAT | ENT_HTML5;
2110 2
      } else {
2111 1
        $flags = ENT_COMPAT;
2112
      }
2113 1
    }
2114 1
2115 1
    do {
2116 1
      $str_compare = $str;
2117 1
2118 2
      $str = preg_replace_callback(
2119
          "/&#\d{2,6};/",
2120 2
          function ($matches) use ($encoding) {
2121
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2122
2123
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2124
              return $returnTmp;
2125
            } else {
2126
              return $matches[0];
2127
            }
2128
          },
2129
          $str
2130
      );
2131
2132
      // decode numeric & UTF16 two byte entities
2133
      $str = html_entity_decode(
2134
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2135
          $flags,
2136
          $encoding
2137
      );
2138
2139
    } while ($str_compare !== $str);
2140
2141
    return $str;
2142
  }
2143
2144
  /**
2145
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2146
   *
2147
   * @link http://php.net/manual/en/function.htmlentities.php
2148
   *
2149
   * @param string $str           <p>
2150
   *                              The input string.
2151
   *                              </p>
2152
   * @param int    $flags         [optional] <p>
2153
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2154
   *                              invalid code unit sequences and the used document type. The default is
2155
   *                              ENT_COMPAT | ENT_HTML401.
2156
   *                              <table>
2157
   *                              Available <i>flags</i> constants
2158
   *                              <tr valign="top">
2159
   *                              <td>Constant Name</td>
2160
   *                              <td>Description</td>
2161
   *                              </tr>
2162
   *                              <tr valign="top">
2163
   *                              <td><b>ENT_COMPAT</b></td>
2164
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2165
   *                              </tr>
2166
   *                              <tr valign="top">
2167
   *                              <td><b>ENT_QUOTES</b></td>
2168
   *                              <td>Will convert both double and single quotes.</td>
2169
   *                              </tr>
2170
   *                              <tr valign="top">
2171
   *                              <td><b>ENT_NOQUOTES</b></td>
2172
   *                              <td>Will leave both double and single quotes unconverted.</td>
2173
   *                              </tr>
2174
   *                              <tr valign="top">
2175
   *                              <td><b>ENT_IGNORE</b></td>
2176
   *                              <td>
2177
   *                              Silently discard invalid code unit sequences instead of returning
2178
   *                              an empty string. Using this flag is discouraged as it
2179
   *                              may have security implications.
2180
   *                              </td>
2181
   *                              </tr>
2182
   *                              <tr valign="top">
2183
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2184
   *                              <td>
2185
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2186
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2187
   *                              </td>
2188
   *                              </tr>
2189
   *                              <tr valign="top">
2190
   *                              <td><b>ENT_DISALLOWED</b></td>
2191
   *                              <td>
2192
   *                              Replace invalid code points for the given document type with a
2193
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2194
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2195
   *                              instance, to ensure the well-formedness of XML documents with
2196
   *                              embedded external content.
2197
   *                              </td>
2198
   *                              </tr>
2199
   *                              <tr valign="top">
2200
   *                              <td><b>ENT_HTML401</b></td>
2201
   *                              <td>
2202
   *                              Handle code as HTML 4.01.
2203
   *                              </td>
2204
   *                              </tr>
2205
   *                              <tr valign="top">
2206
   *                              <td><b>ENT_XML1</b></td>
2207
   *                              <td>
2208
   *                              Handle code as XML 1.
2209
   *                              </td>
2210
   *                              </tr>
2211
   *                              <tr valign="top">
2212
   *                              <td><b>ENT_XHTML</b></td>
2213
   *                              <td>
2214
   *                              Handle code as XHTML.
2215
   *                              </td>
2216
   *                              </tr>
2217
   *                              <tr valign="top">
2218
   *                              <td><b>ENT_HTML5</b></td>
2219
   *                              <td>
2220
   *                              Handle code as HTML 5.
2221
   *                              </td>
2222
   *                              </tr>
2223
   *                              </table>
2224
   *                              </p>
2225
   * @param string $encoding      [optional] <p>
2226
   *                              Like <b>htmlspecialchars</b>,
2227
   *                              <b>htmlentities</b> takes an optional third argument
2228
   *                              <i>encoding</i> which defines encoding used in
2229
   *                              conversion.
2230
   *                              Although this argument is technically optional, you are highly
2231
   *                              encouraged to specify the correct value for your code.
2232 1
   *                              </p>
2233
   * @param bool   $double_encode [optional] <p>
2234 1
   *                              When <i>double_encode</i> is turned off PHP will not
2235
   *                              encode existing html entities. The default is to convert everything.
2236
   *                              </p>
2237
   *
2238 1
   *
2239
   * @return string the encoded string.
2240
   * </p>
2241
   * <p>
2242
   * If the input <i>string</i> contains an invalid code unit
2243
   * sequence within the given <i>encoding</i> an empty string
2244
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2245
   * <b>ENT_SUBSTITUTE</b> flags are set.
2246 1
   */
2247
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2248 1
  {
2249
    if ($encoding !== 'UTF-8') {
2250
      $encoding = self::normalize_encoding($encoding);
2251
    }
2252
2253
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2254
2255
    if ($encoding !== 'UTF-8') {
2256
      return $str;
2257
    }
2258
2259
    $byteLengths = self::chr_size_list($str);
2260
    $search = array();
2261 3
    $replacements = array();
2262
    foreach ($byteLengths as $counter => $byteLength) {
2263 3
      if ($byteLength >= 3) {
2264 3
        $char = self::access($str, $counter);
2265
2266 3
        if (!isset($replacements[$char])) {
2267
          $search[$char] = $char;
2268 3
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2264 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2269
        }
2270
      }
2271
    }
2272
2273
    return str_replace($search, $replacements, $str);
2274
  }
2275
2276
  /**
2277
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2278
   *
2279 1
   * INFO: Take a look at "UTF8::htmlentities()"
2280
   *
2281 1
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2282
   *
2283
   * @param string $str           <p>
2284
   *                              The string being converted.
2285
   *                              </p>
2286
   * @param int    $flags         [optional] <p>
2287
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2288
   *                              invalid code unit sequences and the used document type. The default is
2289 2
   *                              ENT_COMPAT | ENT_HTML401.
2290
   *                              <table>
2291 2
   *                              Available <i>flags</i> constants
2292
   *                              <tr valign="top">
2293
   *                              <td>Constant Name</td>
2294
   *                              <td>Description</td>
2295
   *                              </tr>
2296
   *                              <tr valign="top">
2297
   *                              <td><b>ENT_COMPAT</b></td>
2298
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2299
   *                              </tr>
2300
   *                              <tr valign="top">
2301
   *                              <td><b>ENT_QUOTES</b></td>
2302
   *                              <td>Will convert both double and single quotes.</td>
2303 2
   *                              </tr>
2304
   *                              <tr valign="top">
2305 2
   *                              <td><b>ENT_NOQUOTES</b></td>
2306
   *                              <td>Will leave both double and single quotes unconverted.</td>
2307
   *                              </tr>
2308
   *                              <tr valign="top">
2309
   *                              <td><b>ENT_IGNORE</b></td>
2310
   *                              <td>
2311
   *                              Silently discard invalid code unit sequences instead of returning
2312
   *                              an empty string. Using this flag is discouraged as it
2313
   *                              may have security implications.
2314
   *                              </td>
2315
   *                              </tr>
2316
   *                              <tr valign="top">
2317 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2318
   *                              <td>
2319 1
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2320
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2321
   *                              </td>
2322
   *                              </tr>
2323
   *                              <tr valign="top">
2324
   *                              <td><b>ENT_DISALLOWED</b></td>
2325
   *                              <td>
2326
   *                              Replace invalid code points for the given document type with a
2327
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2328
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2329
   *                              instance, to ensure the well-formedness of XML documents with
2330
   *                              embedded external content.
2331
   *                              </td>
2332
   *                              </tr>
2333
   *                              <tr valign="top">
2334
   *                              <td><b>ENT_HTML401</b></td>
2335
   *                              <td>
2336
   *                              Handle code as HTML 4.01.
2337
   *                              </td>
2338
   *                              </tr>
2339
   *                              <tr valign="top">
2340
   *                              <td><b>ENT_XML1</b></td>
2341
   *                              <td>
2342
   *                              Handle code as XML 1.
2343
   *                              </td>
2344
   *                              </tr>
2345
   *                              <tr valign="top">
2346
   *                              <td><b>ENT_XHTML</b></td>
2347
   *                              <td>
2348
   *                              Handle code as XHTML.
2349
   *                              </td>
2350
   *                              </tr>
2351
   *                              <tr valign="top">
2352
   *                              <td><b>ENT_HTML5</b></td>
2353
   *                              <td>
2354
   *                              Handle code as HTML 5.
2355
   *                              </td>
2356
   *                              </tr>
2357
   *                              </table>
2358
   *                              </p>
2359 1
   * @param string $encoding      [optional] <p>
2360
   *                              Defines encoding used in conversion.
2361 1
   *                              </p>
2362
   *                              <p>
2363
   *                              For the purposes of this function, the encodings
2364
   *                              ISO-8859-1, ISO-8859-15,
2365
   *                              UTF-8, cp866,
2366
   *                              cp1251, cp1252, and
2367
   *                              KOI8-R are effectively equivalent, provided the
2368
   *                              <i>string</i> itself is valid for the encoding, as
2369
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2370
   *                              the same positions in all of these encodings.
2371
   *                              </p>
2372
   * @param bool   $double_encode [optional] <p>
2373
   *                              When <i>double_encode</i> is turned off PHP will not
2374
   *                              encode existing html entities, the default is to convert everything.
2375
   *                              </p>
2376
   *
2377
   * @return string The converted string.
2378
   * </p>
2379
   * <p>
2380
   * If the input <i>string</i> contains an invalid code unit
2381
   * sequence within the given <i>encoding</i> an empty string
2382
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2383
   * <b>ENT_SUBSTITUTE</b> flags are set.
2384
   */
2385
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2386
  {
2387 1
    if ($encoding !== 'UTF-8') {
2388
      $encoding = self::normalize_encoding($encoding);
2389 1
    }
2390
2391
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2392
  }
2393
2394
  /**
2395
   * Checks whether iconv is available on the server.
2396
   *
2397
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2398
   */
2399
  public static function iconv_loaded()
2400
  {
2401 1
    $return = extension_loaded('iconv') ? true : false;
2402
2403 1
    if (Bootup::is_php('5.6')) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2404
      // "default_charset" is already set by the "Bootup"-class
2405
    } else {
2406
      iconv_set_encoding('input_encoding', 'UTF-8');
2407
      iconv_set_encoding('output_encoding', 'UTF-8');
2408
      iconv_set_encoding('internal_encoding', 'UTF-8');
2409
    }
2410
2411
    return $return;
2412
  }
2413
2414
  /**
2415
   * Converts Integer to hexadecimal U+xxxx code point representation.
2416 16
   *
2417
   * INFO: opposite to UTF8::hex_to_int()
2418 16
   *
2419
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2420
   * @param string $pfix [optional]
2421
   *
2422
   * @return string <p>The code point, or empty string on failure.</p>
2423
   */
2424
  public static function int_to_hex($int, $pfix = 'U+')
2425
  {
2426
    if (ctype_digit((string)$int)) {
2427
      $hex = dechex((int)$int);
2428
2429
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2430
2431 28
      return $pfix . $hex;
2432
    }
2433 28
2434
    return '';
2435 28
  }
2436 5
2437
  /**
2438
   * Checks whether intl-char is available on the server.
2439 28
   *
2440
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2441
   */
2442
  public static function intlChar_loaded()
2443
  {
2444
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2445
  }
2446
2447
  /**
2448
   * Checks whether intl is available on the server.
2449 1
   *
2450
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2451 1
   */
2452
  public static function intl_loaded()
2453 1
  {
2454 1
    return extension_loaded('intl') ? true : false;
2455
  }
2456
2457 1
  /**
2458 1
   * alias for "UTF8::is_ascii()"
2459
   *
2460 1
   * @see UTF8::is_ascii()
2461
   *
2462
   * @param string $str
2463
   *
2464
   * @return boolean
2465
   */
2466
  public static function isAscii($str)
2467
  {
2468
    return self::is_ascii($str);
2469
  }
2470
2471 16
  /**
2472
   * alias for "UTF8::is_base64()"
2473
   *
2474 16
   * @see UTF8::is_base64()
2475
   *
2476
   * @param string $str
2477 16
   *
2478
   * @return bool
2479 16
   */
2480 16
  public static function isBase64($str)
2481 15
  {
2482 16
    return self::is_base64($str);
2483 6
  }
2484
2485 15
  /**
2486
   * alias for "UTF8::is_binary()"
2487
   *
2488
   * @see UTF8::is_binary()
2489
   *
2490
   * @param string $str
2491
   *
2492
   * @return bool
2493
   */
2494
  public static function isBinary($str)
2495
  {
2496
    return self::is_binary($str);
2497
  }
2498
2499
  /**
2500
   * alias for "UTF8::is_bom()"
2501
   *
2502
   * @see UTF8::is_bom()
2503
   *
2504
   * @param string $utf8_chr
2505
   *
2506
   * @return boolean
2507
   */
2508
  public static function isBom($utf8_chr)
2509
  {
2510
    return self::is_bom($utf8_chr);
2511
  }
2512
2513
  /**
2514
   * alias for "UTF8::is_html()"
2515
   *
2516
   * @see UTF8::is_html()
2517
   *
2518
   * @param string $str
2519
   *
2520
   * @return boolean
2521
   */
2522
  public static function isHtml($str)
2523
  {
2524
    return self::is_html($str);
2525
  }
2526
2527
  /**
2528
   * alias for "UTF8::is_json()"
2529
   *
2530
   * @see UTF8::is_json()
2531
   *
2532
   * @param string $str
2533
   *
2534
   * @return bool
2535
   */
2536 1
  public static function isJson($str)
2537
  {
2538 1
    return self::is_json($str);
2539
  }
2540 1
2541
  /**
2542
   * alias for "UTF8::is_utf16()"
2543
   *
2544
   * @see UTF8::is_utf16()
2545 1
   *
2546
   * @param string $str
2547 1
   *
2548
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2549 1
   */
2550 1
  public static function isUtf16($str)
2551
  {
2552 1
    return self::is_utf16($str);
2553
  }
2554
2555
  /**
2556
   * alias for "UTF8::is_utf32()"
2557
   *
2558
   * @see UTF8::is_utf32()
2559
   *
2560
   * @param string $str
2561
   *
2562
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2563 1
   */
2564
  public static function isUtf32($str)
2565 1
  {
2566
    return self::is_utf32($str);
2567 1
  }
2568
2569
  /**
2570
   * alias for "UTF8::is_utf8()"
2571
   *
2572 1
   * @see UTF8::is_utf8()
2573 1
   *
2574 1
   * @param string $str
2575 1
   * @param bool   $strict
2576 1
   *
2577
   * @return bool
2578 1
   */
2579
  public static function isUtf8($str, $strict = false)
2580
  {
2581
    return self::is_utf8($str, $strict);
2582
  }
2583
2584
  /**
2585
   * Checks if a string is 7 bit ASCII.
2586
   *
2587
   * @param string $str <p>The string to check.</p>
2588
   *
2589
   * @return bool <p>
2590
   *              <strong>true</strong> if it is ASCII<br />
2591
   *              <strong>false</strong> otherwise
2592
   *              </p>
2593 4
   */
2594
  public static function is_ascii($str)
2595 4
  {
2596
    $str = (string)$str;
2597 4
2598
    if (!isset($str[0])) {
2599 4
      return true;
2600 4
    }
2601 4
2602 4
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2603 4
  }
2604 4
2605 4
  /**
2606 4
   * Returns true if the string is base64 encoded, false otherwise.
2607 4
   *
2608 2
   * @param string $str <p>The input string.</p>
2609 2
   *
2610 4
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2611 4
   */
2612 4
  public static function is_base64($str)
2613
  {
2614 4
    $str = (string)$str;
2615 4
2616 4
    if (!isset($str[0])) {
2617 4
      return false;
2618 4
    }
2619 4
2620 4
    if (base64_encode(base64_decode($str, true)) === $str) {
2621 4
      return true;
2622 4
    } else {
2623 3
      return false;
2624 3
    }
2625 4
  }
2626 4
2627 4
  /**
2628
   * Check if the input is binary... (is look like a hack).
2629 4
   *
2630 3
   * @param mixed $input
2631 2
   *
2632
   * @return bool
2633 3
   */
2634
  public static function is_binary($input)
2635
  {
2636
2637 3
    $testLength = strlen($input);
2638
2639 3
    if (
2640
        preg_match('~^[01]+$~', $input)
2641
        ||
2642
        substr_count($input, "\x00") > 0
2643
        ||
2644
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2645
    ) {
2646
      return true;
2647
    } else {
2648
      return false;
2649
    }
2650
  }
2651
2652
  /**
2653 3
   * Check if the file is binary.
2654
   *
2655 3
   * @param string $file
2656
   *
2657 3
   * @return boolean
2658
   */
2659 3
  public static function is_binary_file($file)
2660 3
  {
2661 3
    try {
2662 3
      $fp = fopen($file, 'r');
2663 3
      $block = fread($fp, 512);
2664 3
      fclose($fp);
2665 3
    } catch (\Exception $e) {
2666 3
      $block = '';
2667 3
    }
2668 1
2669 1
    return self::is_binary($block);
2670 3
  }
2671 3
2672 3
  /**
2673
   * Checks if the given string is equal to any "Byte Order Mark".
2674 3
   *
2675 3
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2676 3
   *
2677 3
   * @param string $str <p>The input string.</p>
2678 3
   *
2679 3
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2680 3
   */
2681 3
  public static function is_bom($str)
2682 3
  {
2683 1
    foreach (self::$bom as $bomString => $bomByteLength) {
2684 1
      if ($str === $bomString) {
2685 3
        return true;
2686 3
      }
2687 3
    }
2688
2689 3
    return false;
2690 1
  }
2691 1
2692
  /**
2693 1
   * Check if the string contains any html-tags <lall>.
2694
   *
2695
   * @param string $str <p>The input string.</p>
2696
   *
2697 3
   * @return boolean
2698
   */
2699 3
  public static function is_html($str)
2700
  {
2701
    $str = (string)$str;
2702
2703
    if (!isset($str[0])) {
2704
      return false;
2705
    }
2706
2707
    // init
2708
    $matches = array();
2709
2710
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2711
2712 43
    if (count($matches) == 0) {
2713
      return false;
2714 43
    } else {
2715
      return true;
2716 43
    }
2717 3
  }
2718
2719
  /**
2720 41
   * Try to check if "$str" is an json-string.
2721 1
   *
2722 1
   * @param string $str <p>The input string.</p>
2723
   *
2724
   * @return bool
2725
   */
2726
  public static function is_json($str)
2727
  {
2728
    $str = (string)$str;
2729
2730 41
    if (!isset($str[0])) {
2731
      return false;
2732
    }
2733
2734
    if (
2735
        is_object(self::json_decode($str))
2736
        &&
2737
        json_last_error() === JSON_ERROR_NONE
2738
    ) {
2739
      return true;
2740 41
    } else {
2741
      return false;
2742 41
    }
2743 41
  }
2744 41
2745
  /**
2746
   * Check if the string is UTF-16.
2747 41
   *
2748 41
   * @param string $str <p>The input string.</p>
2749 41
   *
2750
   * @return int|false <p>
2751
   *                   <strong>false</strong> if is't not UTF-16,<br />
2752 41
   *                   <strong>1</strong> for UTF-16LE,<br />
2753
   *                   <strong>2</strong> for UTF-16BE.
2754 36
   *                   </p>
2755 41
   */
2756 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2757 34
  {
2758 34
    $str = self::remove_bom($str);
2759 34
2760 34
    if (self::is_binary($str)) {
2761 39
2762
      $maybeUTF16LE = 0;
2763 21
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2764 21
      if ($test) {
2765 21
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2766 21
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2767 33
        if ($test3 === $test) {
2768
          $strChars = self::count_chars($str, true);
2769 9
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2770 9
            if (in_array($test3char, $strChars, true) === true) {
2771 9
              $maybeUTF16LE++;
2772 9
            }
2773 16
          }
2774
        }
2775
      }
2776
2777
      $maybeUTF16BE = 0;
2778
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2779
      if ($test) {
2780
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2781
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2782 3
        if ($test3 === $test) {
2783 3
          $strChars = self::count_chars($str, true);
2784 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2785 3
            if (in_array($test3char, $strChars, true) === true) {
2786 9
              $maybeUTF16BE++;
2787
            }
2788 3
          }
2789 3
        }
2790 3
      }
2791 3
2792 3
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2793
        if ($maybeUTF16LE > $maybeUTF16BE) {
2794
          return 1;
2795
        } else {
2796 5
          return 2;
2797
        }
2798 41
      }
2799
2800
    }
2801 36
2802
    return false;
2803 33
  }
2804 33
2805 33
  /**
2806 33
   * Check if the string is UTF-32.
2807
   *
2808
   * @param string $str
2809
   *
2810
   * @return int|false <p>
2811 33
   *                   <strong>false</strong> if is't not UTF-16,<br />
2812
   *                   <strong>1</strong> for UTF-32LE,<br />
2813
   *                   <strong>2</strong> for UTF-32BE.
2814
   *                   </p>
2815
   */
2816 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2817 33
  {
2818 33
    $str = self::remove_bom($str);
2819 33
2820 33
    if (self::is_binary($str)) {
2821
2822 33
      $maybeUTF32LE = 0;
2823
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2824 33
      if ($test) {
2825 33
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2826 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2827
        if ($test3 === $test) {
2828
          $strChars = self::count_chars($str, true);
2829 33
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2830 33
            if (in_array($test3char, $strChars, true) === true) {
2831 33
              $maybeUTF32LE++;
2832 33
            }
2833 33
          }
2834
        }
2835
      }
2836
2837
      $maybeUTF32BE = 0;
2838 18
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2839
      if ($test) {
2840
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2841 41
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2842
        if ($test3 === $test) {
2843 20
          $strChars = self::count_chars($str, true);
2844
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2845
            if (in_array($test3char, $strChars, true) === true) {
2846
              $maybeUTF32BE++;
2847
            }
2848
          }
2849
        }
2850
      }
2851
2852
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2853
        if ($maybeUTF32LE > $maybeUTF32BE) {
2854
          return 1;
2855
        } else {
2856
          return 2;
2857
        }
2858
      }
2859
2860
    }
2861
2862
    return false;
2863
  }
2864
2865
  /**
2866
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2867
   *
2868
   * @see    http://hsivonen.iki.fi/php-utf8/
2869
   *
2870
   * @param string $str    <p>The string to be checked.</p>
2871
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2872
   *
2873
   * @return bool
2874
   */
2875
  public static function is_utf8($str, $strict = false)
2876
  {
2877
    $str = (string)$str;
2878
2879
    if (!isset($str[0])) {
2880
      return true;
2881
    }
2882
2883 2
    if ($strict === true) {
2884
      if (self::is_utf16($str) !== false) {
2885 2
        return false;
2886
      }
2887 2
2888 2
      if (self::is_utf32($str) !== false) {
2889 2
        return false;
2890
      }
2891
    }
2892
2893 2
    if (self::pcre_utf8_support() !== true) {
2894
2895
      // If even just the first character can be matched, when the /u
2896
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2897
      // invalid, nothing at all will match, even if the string contains
2898
      // some valid sequences
2899
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2900
2901
    } else {
2902
2903
      $mState = 0; // cached expected number of octets after the current octet
2904
      // until the beginning of the next UTF8 character sequence
2905
      $mUcs4 = 0; // cached Unicode character
2906
      $mBytes = 1; // cached expected number of octets in the current sequence
2907
      $len = strlen($str);
2908
2909
      /** @noinspection ForeachInvariantsInspection */
2910
      for ($i = 0; $i < $len; $i++) {
2911
        $in = ord($str[$i]);
2912
        if ($mState === 0) {
2913
          // When mState is zero we expect either a US-ASCII character or a
2914
          // multi-octet sequence.
2915
          if (0 === (0x80 & $in)) {
2916
            // US-ASCII, pass straight through.
2917
            $mBytes = 1;
2918 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2919
            // First octet of 2 octet sequence.
2920
            $mUcs4 = $in;
2921
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2922
            $mState = 1;
2923
            $mBytes = 2;
2924
          } elseif (0xE0 === (0xF0 & $in)) {
2925
            // First octet of 3 octet sequence.
2926
            $mUcs4 = $in;
2927
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2928
            $mState = 2;
2929
            $mBytes = 3;
2930 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2931
            // First octet of 4 octet sequence.
2932 2
            $mUcs4 = $in;
2933
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2934 2
            $mState = 3;
2935
            $mBytes = 4;
2936 2
          } elseif (0xF8 === (0xFC & $in)) {
2937
            /* First octet of 5 octet sequence.
2938
            *
2939 2
            * This is illegal because the encoded codepoint must be either
2940
            * (a) not the shortest form or
2941
            * (b) outside the Unicode range of 0-0x10FFFF.
2942 2
            * Rather than trying to resynchronize, we will carry on until the end
2943
            * of the sequence and let the later error handling code catch it.
2944
            */
2945
            $mUcs4 = $in;
2946
            $mUcs4 = ($mUcs4 & 0x03) << 24;
2947
            $mState = 4;
2948
            $mBytes = 5;
2949 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2950
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
2951
            $mUcs4 = $in;
2952 6
            $mUcs4 = ($mUcs4 & 1) << 30;
2953
            $mState = 5;
2954 6
            $mBytes = 6;
2955
          } else {
2956
            /* Current octet is neither in the US-ASCII range nor a legal first
2957
             * octet of a multi-octet sequence.
2958
             */
2959
            return false;
2960
          }
2961
        } else {
2962
          // When mState is non-zero, we expect a continuation of the multi-octet
2963
          // sequence
2964
          if (0x80 === (0xC0 & $in)) {
2965 24
            // Legal continuation.
2966
            $shift = ($mState - 1) * 6;
2967 24
            $tmp = $in;
2968
            $tmp = ($tmp & 0x0000003F) << $shift;
2969 24
            $mUcs4 |= $tmp;
2970 2
            /**
2971
             * End of the multi-octet sequence. mUcs4 now contains the final
2972
             * Unicode code point to be output
2973
             */
2974 23
            if (0 === --$mState) {
2975 2
              /*
2976
              * Check for illegal sequences and code points.
2977
              */
2978 23
              // From Unicode 3.1, non-shortest form is illegal
2979
              if (
2980 23
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
2981
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
2982
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
2983
                  (4 < $mBytes) ||
2984
                  // From Unicode 3.2, surrogate characters are illegal.
2985
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2986
                  // Code points outside the Unicode range are illegal.
2987
                  ($mUcs4 > 0x10FFFF)
2988
              ) {
2989
                return false;
2990 1
              }
2991
              // initialize UTF8 cache
2992 1
              $mState = 0;
2993
              $mUcs4 = 0;
2994
              $mBytes = 1;
2995
            }
2996 1
          } else {
2997
            /**
2998
             *((0xC0 & (*in) != 0x80) && (mState != 0))
2999
             * Incomplete multi-octet sequence.
3000
             */
3001
            return false;
3002
          }
3003
        }
3004
      }
3005
3006
      return true;
3007 1
    }
3008
  }
3009 1
3010 1
  /**
3011 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3012
   * Decodes a JSON string
3013 1
   *
3014
   * @link http://php.net/manual/en/function.json-decode.php
3015
   *
3016
   * @param string $json    <p>
3017
   *                        The <i>json</i> string being decoded.
3018
   *                        </p>
3019
   *                        <p>
3020
   *                        This function only works with UTF-8 encoded strings.
3021
   *                        </p>
3022 2
   *                        <p>PHP implements a superset of
3023
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3024 2
   *                        only supports these values when they are nested inside an array or an object.
3025
   *                        </p>
3026 2
   * @param bool   $assoc   [optional] <p>
3027 2
   *                        When <b>TRUE</b>, returned objects will be converted into
3028 2
   *                        associative arrays.
3029
   *                        </p>
3030 2
   * @param int    $depth   [optional] <p>
3031
   *                        User specified recursion depth.
3032
   *                        </p>
3033
   * @param int    $options [optional] <p>
3034
   *                        Bitmask of JSON decode options. Currently only
3035
   *                        <b>JSON_BIGINT_AS_STRING</b>
3036
   *                        is supported (default is to cast large integers as floats)
3037
   *                        </p>
3038
   *
3039
   * @return mixed the value encoded in <i>json</i> in appropriate
3040 1
   * PHP type. Values true, false and
3041
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3042 1
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3043
   * <i>json</i> cannot be decoded or if the encoded
3044
   * data is deeper than the recursion limit.
3045
   */
3046 1
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3047
  {
3048
    $json = self::filter($json);
3049
3050
    if (Bootup::is_php('5.4') === true) {
3051
      $json = json_decode($json, $assoc, $depth, $options);
3052
    } else {
3053
      $json = json_decode($json, $assoc, $depth);
3054
    }
3055
3056
    return $json;
3057
  }
3058 1
3059
  /**
3060 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3061
   * Returns the JSON representation of a value.
3062
   *
3063
   * @link http://php.net/manual/en/function.json-encode.php
3064
   *
3065
   * @param mixed $value   <p>
3066
   *                       The <i>value</i> being encoded. Can be any type except
3067
   *                       a resource.
3068
   *                       </p>
3069
   *                       <p>
3070 16
   *                       All string data must be UTF-8 encoded.
3071
   *                       </p>
3072 16
   *                       <p>PHP implements a superset of
3073
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3074 16
   *                       only supports these values when they are nested inside an array or an object.
3075 2
   *                       </p>
3076
   * @param int   $options [optional] <p>
3077
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3078 16
   *                       <b>JSON_HEX_TAG</b>,
3079 1
   *                       <b>JSON_HEX_AMP</b>,
3080
   *                       <b>JSON_HEX_APOS</b>,
3081
   *                       <b>JSON_NUMERIC_CHECK</b>,
3082 16
   *                       <b>JSON_PRETTY_PRINT</b>,
3083 4
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3084
   *                       <b>JSON_FORCE_OBJECT</b>,
3085
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3086 15
   *                       constants is described on
3087 14
   *                       the JSON constants page.
3088
   *                       </p>
3089
   * @param int   $depth   [optional] <p>
3090 4
   *                       Set the maximum depth. Must be greater than zero.
3091 4
   *                       </p>
3092 4
   *
3093
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3094
   */
3095 4
  public static function json_encode($value, $options = 0, $depth = 512)
3096 4
  {
3097 4
    $value = self::filter($value);
3098 4
3099 4
    if (Bootup::is_php('5.5')) {
3100 4
      $json = json_encode($value, $options, $depth);
3101 4
    } else {
3102 4
      $json = json_encode($value, $options);
3103 4
    }
3104 4
3105 4
    return $json;
3106 4
  }
3107 4
3108 4
  /**
3109 4
   * Makes string's first char lowercase.
3110
   *
3111 4
   * @param string $str <p>The input string</p>
3112 4
   *
3113 4
   * @return string <p>The resulting string</p>
3114
   */
3115 4
  public static function lcfirst($str)
3116
  {
3117 4
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3118
  }
3119
3120
  /**
3121
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3122
   *
3123
   * @param string $str   <p>The string to be trimmed</p>
3124
   * @param string $chars <p>Optional characters to be stripped</p>
3125
   *
3126
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3127 13
   */
3128 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3129 13
  {
3130 13
    $str = (string)$str;
3131
3132 13
    if (!isset($str[0])) {
3133 1
      return '';
3134 1
    }
3135 1
3136
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3137 13
    if ($chars === INF || !$chars) {
3138
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3139
    }
3140
3141
    return preg_replace("/^" . self::rxClass($chars) . "+/u", '', $str);
3142
  }
3143
3144
  /**
3145
   * Returns the UTF-8 character with the maximum code point in the given data.
3146
   *
3147
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3148
   *
3149
   * @return string <p>The character with the highest code point than others.</p>
3150 18
   */
3151 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3152 18
  {
3153 18
    if (is_array($arg)) {
3154
      $arg = implode('', $arg);
3155 18
    }
3156
3157 18
    return self::chr(max(self::codepoints($arg)));
3158
  }
3159 2
3160
  /**
3161 2
   * Calculates and returns the maximum number of bytes taken by any
3162
   * UTF-8 encoded character in the given string.
3163 1
   *
3164 1
   * @param string $str <p>The original Unicode string.</p>
3165
   *
3166 2
   * @return int <p>Max byte lengths of the given chars.</p>
3167 2
   */
3168
  public static function max_chr_width($str)
3169 18
  {
3170 18
    $bytes = self::chr_size_list($str);
3171 1
    if (count($bytes) > 0) {
3172 1
      return (int)max($bytes);
3173
    } else {
3174 18
      return 0;
3175 18
    }
3176
  }
3177 18
3178
  /**
3179
   * Checks whether mbstring is available on the server.
3180
   *
3181
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3182
   */
3183
  public static function mbstring_loaded()
3184
  {
3185
    $return = extension_loaded('mbstring') ? true : false;
3186
3187
    if ($return === true) {
3188
      \mb_internal_encoding('UTF-8');
3189
    }
3190
3191
    return $return;
3192
  }
3193
3194
  /**
3195
   * Returns the UTF-8 character with the minimum code point in the given data.
3196
   *
3197
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3198
   *
3199
   * @return string <p>The character with the lowest code point than others.</p>
3200
   */
3201 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3202
  {
3203
    if (is_array($arg)) {
3204
      $arg = implode('', $arg);
3205
    }
3206
3207
    return self::chr(min(self::codepoints($arg)));
3208
  }
3209
3210
  /**
3211
   * alias for "UTF8::normalize_encoding()"
3212
   *
3213
   * @see UTF8::normalize_encoding()
3214
   *
3215
   * @param string $encoding
3216
   *
3217
   * @return string
3218
   */
3219
  public static function normalizeEncoding($encoding)
3220
  {
3221
    return self::normalize_encoding($encoding);
3222
  }
3223
3224
  /**
3225
   * Normalize the encoding-"name" input.
3226
   *
3227
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3228
   *
3229
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3230 17
   */
3231
  public static function normalize_encoding($encoding)
3232 17
  {
3233 3
    static $staticNormalizeEncodingCache = array();
3234
3235
    if (!$encoding) {
3236 16
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3237
    }
3238
3239
    if ('UTF-8' === $encoding) {
3240 16
      return $encoding;
3241
    }
3242
3243
    if (in_array($encoding, self::$iconvEncoding, true)) {
3244
      return $encoding;
3245
    }
3246
3247
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3248 16
      return $staticNormalizeEncodingCache[$encoding];
3249 16
    }
3250 15
3251
    $encodingOrig = $encoding;
3252
    $encoding = strtoupper($encoding);
3253 9
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3254 9
3255 9
    $equivalences = array(
3256
        'ISO88591'    => 'ISO-8859-1',
3257 9
        'ISO8859'     => 'ISO-8859-1',
3258 1
        'ISO'         => 'ISO-8859-1',
3259
        'LATIN1'      => 'ISO-8859-1',
3260
        'LATIN'       => 'ISO-8859-1',
3261 9
        'WIN1252'     => 'ISO-8859-1',
3262 4
        'WINDOWS1252' => 'ISO-8859-1',
3263
        'UTF16'       => 'UTF-16',
3264
        'UTF32'       => 'UTF-32',
3265 9
        'UTF8'        => 'UTF-8',
3266 5
        'UTF'         => 'UTF-8',
3267
        'UTF7'        => 'UTF-7',
3268
        '8BIT'        => 'CP850',
3269 9
        'BINARY'      => 'CP850',
3270
    );
3271
3272
    if (!empty($equivalences[$encodingUpperHelper])) {
3273
      $encoding = $equivalences[$encodingUpperHelper];
3274
    }
3275
3276
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3277
3278
    return $encoding;
3279
  }
3280
3281
  /**
3282
   * Normalize some MS Word special characters.
3283
   *
3284
   * @param string $str <p>The string to be normalized.</p>
3285 1
   *
3286
   * @return string
3287
   */
3288 1 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3289
  {
3290 1
    // init
3291 1
    $str = (string)$str;
3292 1
3293
    if (!isset($str[0])) {
3294
      return '';
3295 1
    }
3296
3297
    static $UTF8_MSWORD_KEYS_CACHE = null;
3298
    static $UTF8_MSWORD_VALUES_CACHE = null;
3299
3300
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3301
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3302
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3303 41
    }
3304
3305
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3306 41
  }
3307
3308
  /**
3309
   * Normalize the whitespace.
3310
   *
3311
   * @param string $str                     <p>The string to be normalized.</p>
3312
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3313
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3314
   *                                        bidirectional text chars.</p>
3315
   *
3316
   * @return string
3317 1
   */
3318
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3319 1
  {
3320 1
    // init
3321
    $str = (string)$str;
3322
3323 1
    if (!isset($str[0])) {
3324 1
      return '';
3325 1
    }
3326
3327
    static $WHITESPACE_CACHE = array();
3328 1
    $cacheKey = (int)$keepNonBreakingSpace;
3329
3330
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3331 1
3332
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3333
3334
      if ($keepNonBreakingSpace === true) {
3335 1
        /** @noinspection OffsetOperationsInspection */
3336 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3337 1
      }
3338
3339
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3340 1
    }
3341
3342
    if ($keepBidiUnicodeControls === false) {
3343 1
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3344
3345
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3346
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3347 1
      }
3348
3349 1
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3350 1
    }
3351 1
3352 1
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3353 1
  }
3354
3355
  /**
3356
   * Format a number with grouped thousands.
3357
   *
3358
   * @param float  $number
3359
   * @param int    $decimals
3360
   * @param string $dec_point
3361
   * @param string $thousands_sep
3362
   *
3363
   * @return string
3364
   *    *
3365 5
   * @deprecated Because this has nothing to do with UTF8. :/
3366
   */
3367 5
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3368
  {
3369
    $thousands_sep = (string)$thousands_sep;
3370
    $dec_point = (string)$dec_point;
3371
3372
    if (
3373
        isset($thousands_sep[1], $dec_point[1])
3374
        &&
3375
        Bootup::is_php('5.4') === true
3376
    ) {
3377 10
      return str_replace(
3378
          array(
3379 10
              '.',
3380 10
              ',',
3381 5
          ),
3382 5
          array(
3383 10
              $dec_point,
3384
              $thousands_sep,
3385 10
          ),
3386
          number_format($number, $decimals, '.', ',')
3387
      );
3388
    }
3389
3390
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3391
  }
3392
3393
  /**
3394
   * Calculates Unicode code point of the given UTF-8 encoded character.
3395
   *
3396 1
   * INFO: opposite to UTF8::chr()
3397
   *
3398 1
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3399 1
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3400 1
   *
3401
   * @return int <p>
3402 1
   *             Unicode code point of the given character,<br />
3403 1
   *             0 on invalid UTF-8 byte sequence.
3404 1
   *             </p>
3405 1
   */
3406 1
  public static function ord($chr, $encoding = 'UTF-8')
3407
  {
3408 1
    if (!$chr && $chr !== '0') {
3409
      return 0;
3410
    }
3411
3412
    if ($encoding !== 'UTF-8') {
3413
      $encoding = self::normalize_encoding($encoding);
3414
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3415
    }
3416
3417
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3418
      self::checkForSupport();
3419
    }
3420
3421
    if (self::$support['intlChar'] === true) {
3422
      $tmpReturn = \IntlChar::ord($chr);
3423
      if ($tmpReturn) {
3424 45
        return $tmpReturn;
3425
      }
3426
    }
3427 45
3428
    // use static cache, if there is no support for "IntlChar"
3429
    static $cache = array();
3430
    if (isset($cache[$chr]) === true) {
3431 45
      return $cache[$chr];
3432 45
    }
3433 45
3434 45
    $chr_orig = $chr;
3435
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3436 45
    $chr = unpack('C*', substr($chr, 0, 4));
3437
    $code = $chr ? $chr[1] : 0;
3438
3439 45
    if (0xF0 <= $code && isset($chr[4])) {
3440 45
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3441
    }
3442 45
3443
    if (0xE0 <= $code && isset($chr[3])) {
3444
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3445
    }
3446
3447
    if (0xC0 <= $code && isset($chr[2])) {
3448
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3449
    }
3450
3451
    return $cache[$chr_orig] = $code;
3452
  }
3453 45
3454
  /**
3455 45
   * Parses the string into an array (into the the second parameter).
3456
   *
3457 45
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3458 45
   *          if the second parameter is not set!
3459 45
   *
3460
   * @link http://php.net/manual/en/function.parse-str.php
3461 45
   *
3462 45
   * @param string $str    <p>The input string.</p>
3463 45
   * @param array  $result <p>The result will be returned into this reference parameter.</p>
3464
   *
3465 45
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3466
   */
3467
  public static function parse_str($str, &$result)
3468
  {
3469
    // clean broken utf8
3470
    $str = self::clean($str);
3471
3472
    $return = \mb_parse_str($str, $result);
3473
    if ($return === false || empty($result)) {
3474
      return false;
3475
    }
3476 23
3477
    return true;
3478 23
  }
3479
3480 23
  /**
3481 5
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3482
   *
3483
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3484
   */
3485 19
  public static function pcre_utf8_support()
3486 3
  {
3487
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3488
    return (bool)@preg_match('//u', '');
3489 18
  }
3490
3491 18
  /**
3492
   * Create an array containing a range of UTF-8 characters.
3493
   *
3494
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3495
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3496
   *
3497
   * @return array
3498
   */
3499
  public static function range($var1, $var2)
3500
  {
3501
    if (!$var1 || !$var2) {
3502 52
      return array();
3503
    }
3504 52
3505 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3506 52
      $start = (int)$var1;
3507
    } elseif (ctype_xdigit($var1)) {
3508 52
      $start = (int)self::hex_to_int($var1);
3509 40
    } else {
3510
      $start = self::ord($var1);
3511
    }
3512 18
3513
    if (!$start) {
3514
      return array();
3515 18
    }
3516 17
3517 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3518 17
      $end = (int)$var2;
3519 17
    } elseif (ctype_xdigit($var2)) {
3520 17
      $end = (int)self::hex_to_int($var2);
3521 2
    } else {
3522 2
      $end = self::ord($var2);
3523
    }
3524
3525 18
    if (!$end) {
3526
      return array();
3527 18
    }
3528 18
3529 18
    return array_map(
3530
        array(
3531 18
            '\\voku\\helper\\UTF8',
3532 18
            'chr',
3533 18
        ),
3534
        range($start, $end)
3535
    );
3536
  }
3537 18
3538
  /**
3539 18
   * alias for "UTF8::remove_bom()"
3540
   *
3541
   * @see UTF8::remove_bom()
3542
   *
3543
   * @param string $str
3544
   *
3545
   * @return string
3546
   */
3547
  public static function removeBOM($str)
3548
  {
3549
    return self::remove_bom($str);
3550
  }
3551
3552
  /**
3553
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3554
   *
3555
   * @param string $str <p>The input string.</p>
3556
   *
3557
   * @return string <p>String without UTF-BOM</p>
3558
   */
3559
  public static function remove_bom($str)
3560 1
  {
3561
    foreach (self::$bom as $bomString => $bomByteLength) {
3562 1
      if (0 === strpos($str, $bomString)) {
3563 1
        $str = substr($str, $bomByteLength);
3564
      }
3565
    }
3566
3567
    return $str;
3568 1
  }
3569 1
3570 1
  /**
3571 1
   * Removes duplicate occurrences of a string in another string.
3572
   *
3573
   * @param string          $str  <p>The base string.</p>
3574 1
   * @param string|string[] $what <p>String to search for in the base string.</p>
3575
   *
3576
   * @return string <p>The result string with removed duplicates.</p>
3577
   */
3578
  public static function remove_duplicates($str, $what = ' ')
3579
  {
3580
    if (is_string($what)) {
3581
      $what = array($what);
3582
    }
3583
3584
    if (is_array($what)) {
3585
      /** @noinspection ForeachSourceInspection */
3586 36
      foreach ($what as $item) {
3587
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3588 36
      }
3589
    }
3590 36
3591 2
    return $str;
3592
  }
3593
3594
  /**
3595 36
   * Remove invisible characters from a string.
3596 36
   *
3597
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3598 36
   *
3599
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3600
   *
3601
   * @param string $str
3602 36
   * @param bool   $url_encoded
3603
   * @param string $replacement
3604 36
   *
3605 6
   * @return string
3606 6
   */
3607
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3608 36
  {
3609 36
    // init
3610 36
    $non_displayables = array();
3611 36
3612 36
    // every control character except newline (dec 10),
3613
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3614 36
    if ($url_encoded) {
3615
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3616
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3617
    }
3618
3619
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3620
3621
    do {
3622
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3623
    } while ($count !== 0);
3624
3625
    return $str;
3626
  }
3627
3628
  /**
3629
   * Replace the diamond question mark (�) with the replacement.
3630
   *
3631
   * @param string $str
3632
   * @param string $unknown
3633
   *
3634
   * @return string
3635
   */
3636
  public static function replace_diamond_question_mark($str, $unknown = '?')
3637
  {
3638
    return str_replace(
3639
        array(
3640
            "\xEF\xBF\xBD",
3641
            '�',
3642
        ),
3643
        array(
3644
            $unknown,
3645
            $unknown,
3646 36
        ),
3647 5
        $str
3648
    );
3649 5
  }
3650 5
3651
  /**
3652
   * Strip whitespace or other characters from end of a UTF-8 string.
3653 36
   *
3654
   * @param string $str   <p>The string to be trimmed.</p>
3655
   * @param string $chars <p>Optional characters to be stripped.</p>
3656
   *
3657 36
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3658
   */
3659 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3660
  {
3661
    $str = (string)$str;
3662
3663
    if (!isset($str[0])) {
3664
      return '';
3665
    }
3666
3667
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3668
    if ($chars === INF || !$chars) {
3669
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3670 12
    }
3671
3672
    return preg_replace("/" . self::rxClass($chars) . "+$/u", '', $str);
3673
  }
3674
3675
  /**
3676 12
   * rxClass
3677 2
   *
3678 1
   * @param string $s
3679 2
   * @param string $class
3680 1
   *
3681 2
   * @return string
3682
   */
3683 2
  private static function rxClass($s, $class = '')
3684
  {
3685
    static $rxClassCache = array();
3686 2
3687
    $cacheKey = $s . $class;
3688
3689
    if (isset($rxClassCache[$cacheKey])) {
3690
      return $rxClassCache[$cacheKey];
3691
    }
3692 12
3693 3
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3694
    $class = array($class);
3695
3696
    /** @noinspection SuspiciousLoopInspection */
3697
    foreach (self::str_split($s) as $s) {
3698
      if ('-' === $s) {
3699
        $class[0] = '-' . $class[0];
3700 12
      } elseif (!isset($s[2])) {
3701 9
        $class[0] .= preg_quote($s, '/');
3702
      } elseif (1 === self::strlen($s)) {
3703
        $class[0] .= $s;
3704
      } else {
3705
        $class[] = $s;
3706
      }
3707
    }
3708
3709
    if ($class[0]) {
3710 6
      $class[0] = '[' . $class[0] . ']';
3711 6
    }
3712 6
3713 6
    if (1 === count($class)) {
3714 6
      $return = $class[0];
3715 6
    } else {
3716 6
      $return = '(?:' . implode('|', $class) . ')';
3717 6
    }
3718 6
3719 6
    $rxClassCache[$cacheKey] = $return;
3720 6
3721 6
    return $return;
3722 6
  }
3723 6
3724 6
  /**
3725 6
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3726 6
   */
3727 6
  public static function showSupport()
3728 6
  {
3729 6
    foreach (self::$support as $utf8Support) {
3730 6
      echo $utf8Support . "\n<br>";
3731
    }
3732 6
  }
3733 6
3734 6
  /**
3735
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3736
   *
3737
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3738
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3739
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
3740
   *
3741
   * @return string <p>The HTML numbered entity.</p>
3742
   */
3743
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3744
  {
3745
    // init
3746
    $char = (string)$char;
3747
3748
    if (!isset($char[0])) {
3749
      return '';
3750
    }
3751
3752
    if (
3753
        $keepAsciiChars === true
3754
        &&
3755
        self::isAscii($char) === true
3756
    ) {
3757
      return $char;
3758
    }
3759
3760
    if ($encoding !== 'UTF-8') {
3761
      $encoding = self::normalize_encoding($encoding);
3762
    }
3763
3764
    return '&#' . self::ord($char, $encoding) . ';';
3765
  }
3766
3767
  /**
3768
   * Convert a string to an array of Unicode characters.
3769
   *
3770
   * @param string  $str       <p>The string to split into array.</p>
3771
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3772
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3773
   *
3774
   * @return string[] <p>An array containing chunks of the string.</p>
3775
   */
3776
  public static function split($str, $length = 1, $cleanUtf8 = false)
3777
  {
3778 14
    $str = (string)$str;
3779
3780 14
    if (!isset($str[0])) {
3781
      return array();
3782
    }
3783 14
3784 14
    // init
3785 1
    $str = (string)$str;
3786 1
    $ret = array();
3787 13
3788
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3789 14
      self::checkForSupport();
3790
    }
3791 14
3792 14
    if (self::$support['pcre_utf8'] === true) {
3793
3794 14
      if ($cleanUtf8 === true) {
3795
        $str = self::clean($str);
3796
      }
3797
3798
      preg_match_all('/./us', $str, $retArray);
3799
      if (isset($retArray[0])) {
3800
        $ret = $retArray[0];
3801
      }
3802
      unset($retArray);
3803
3804
    } else {
3805
3806 1
      // fallback
3807
3808 1
      $len = strlen($str);
3809
3810 1
      /** @noinspection ForeachInvariantsInspection */
3811
      for ($i = 0; $i < $len; $i++) {
3812
        if (($str[$i] & "\x80") === "\x00") {
3813
          $ret[] = $str[$i];
3814 1
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3815
          if (($str[$i + 1] & "\xC0") === "\x80") {
3816 1
            $ret[] = $str[$i] . $str[$i + 1];
3817
3818
            $i++;
3819
          }
3820 1 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3821 1
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3822
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3823
3824 1
            $i += 2;
3825 1
          }
3826 1
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3827 1 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3828
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3829 1
3830
            $i += 3;
3831
          }
3832 1
        }
3833
      }
3834
    }
3835 1
3836
    if ($length > 1) {
3837
      $ret = array_chunk($ret, $length);
3838
3839
      return array_map(function($item) {
3840
        return implode('', $item);
3841
      }, $ret);
3842
    }
3843
3844
    /** @noinspection OffsetOperationsInspection */
3845
    if (isset($ret[0]) && $ret[0] === '') {
3846
      return array();
3847
    }
3848
3849
    return $ret;
3850
  }
3851 2
3852
  /**
3853 2
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3854
   *
3855
   * @param string $str <p>The input string.</p>
3856 2
   *
3857 2
   * @return false|string <p>
3858
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3859 2
   *                      otherwise it will return false.
3860
   *                      </p>
3861 2
   */
3862 2
  public static function str_detect_encoding($str)
3863
  {
3864 2
    //
3865
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3866
    //
3867 2
3868 2
    if (self::is_binary($str)) {
3869 2
      if (self::is_utf16($str) === 1) {
3870 2
        return 'UTF-16LE';
3871 2
      } elseif (self::is_utf16($str) === 2) {
3872
        return 'UTF-16BE';
3873 2
      } elseif (self::is_utf32($str) === 1) {
3874 2
        return 'UTF-32LE';
3875 2
      } elseif (self::is_utf32($str) === 2) {
3876 2
        return 'UTF-32BE';
3877 2
      }
3878 2
    }
3879
3880 2
    //
3881 2
    // 2.) simple check for ASCII chars
3882 2
    //
3883 2
3884 2
    if (self::is_ascii($str) === true) {
3885 2
      return 'ASCII';
3886
    }
3887 2
3888
    //
3889
    // 3.) simple check for UTF-8 chars
3890 2
    //
3891
3892
    if (self::is_utf8($str) === true) {
3893
      return 'UTF-8';
3894
    }
3895
3896
    //
3897
    // 4.) check via "\mb_detect_encoding()"
3898
    //
3899
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3900
3901
    $detectOrder = array(
3902
        'ISO-8859-1',
3903
        'ISO-8859-2',
3904
        'ISO-8859-3',
3905
        'ISO-8859-4',
3906
        'ISO-8859-5',
3907
        'ISO-8859-6',
3908
        'ISO-8859-7',
3909
        'ISO-8859-8',
3910
        'ISO-8859-9',
3911 1
        'ISO-8859-10',
3912
        'ISO-8859-13',
3913 1
        'ISO-8859-14',
3914
        'ISO-8859-15',
3915 1
        'ISO-8859-16',
3916
        'WINDOWS-1251',
3917
        'WINDOWS-1252',
3918
        'WINDOWS-1254',
3919
        'ISO-2022-JP',
3920
        'JIS',
3921
        'EUC-JP',
3922
    );
3923
3924
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3925
    if ($encoding) {
3926
      return $encoding;
3927
    }
3928
3929
    //
3930
    // 5.) check via "iconv()"
3931
    //
3932
3933
    $md5 = md5($str);
3934
    foreach (self::$iconvEncoding as $encodingTmp) {
3935
      # INFO: //IGNORE and //TRANSLIT still throw notice
3936
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3937
      if (md5(@\iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
3938
        return $encodingTmp;
3939
      }
3940
    }
3941
3942
    return false;
3943
  }
3944
3945
  /**
3946
   * Check if the string ends with the given substring.
3947 12
   *
3948
   * @param string $haystack <p>The string to search in.</p>
3949 12
   * @param string $needle   <p>The substring to search for.</p>
3950
   *
3951
   * @return bool
3952
   */
3953 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3954
  {
3955
    $haystack = (string)$haystack;
3956
    $needle = (string)$needle;
3957
3958
    if (!isset($haystack[0], $needle[0])) {
3959 1
      return false;
3960
    }
3961 1
3962
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
3963 1
      return true;
3964
    }
3965 1
3966
    return false;
3967
  }
3968
3969
  /**
3970
   * Check if the string ends with the given substring, case insensitive.
3971
   *
3972
   * @param string $haystack <p>The string to search in.</p>
3973
   * @param string $needle   <p>The substring to search for.</p>
3974
   *
3975
   * @return bool
3976
   */
3977 1 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3978
  {
3979 1
    $haystack = (string)$haystack;
3980
    $needle = (string)$needle;
3981 1
3982 1
    if (!isset($haystack[0], $needle[0])) {
3983 1
      return false;
3984
    }
3985 1
3986 1
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3987 1
      return true;
3988 1
    }
3989
3990
    return false;
3991 1
  }
3992
3993
  /**
3994
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3995
   *
3996
   * @link  http://php.net/manual/en/function.str-ireplace.php
3997
   *
3998
   * @param mixed $search  <p>
3999
   *                       Every replacement with search array is
4000
   *                       performed on the result of previous replacement.
4001
   *                       </p>
4002 21
   * @param mixed $replace <p>
4003
   *                       </p>
4004
   * @param mixed $subject <p>
4005 21
   *                       If subject is an array, then the search and
4006 21
   *                       replace is performed with every entry of
4007
   *                       subject, and the return value is an array as
4008 21
   *                       well.
4009 1
   *                       </p>
4010
   * @param int   $count   [optional] <p>
4011
   *                       The number of matched and replaced needles will
4012 20
   *                       be returned in count which is passed by
4013
   *                       reference.
4014
   *                       </p>
4015
   *
4016 20
   * @return mixed <p>A string or an array of replacements.</p>
4017 20
   */
4018
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4019 20
  {
4020 20
    $search = (array)$search;
4021
4022
    /** @noinspection AlterInForeachInspection */
4023 1
    foreach ($search as &$s) {
4024 1
      if ('' === $s .= '') {
4025
        $s = '/^(?<=.)$/';
4026
      } else {
4027 1
        $s = '/' . preg_quote($s, '/') . '/ui';
4028 1
      }
4029 1
    }
4030 1
4031 1
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4032
    $count = $replace; // used as reference parameter
4033 1
4034
    return $subject;
4035 1
  }
4036
4037
  /**
4038
   * Check if the string starts with the given substring, case insensitive.
4039
   *
4040
   * @param string $haystack <p>The string to search in.</p>
4041
   * @param string $needle   <p>The substring to search for.</p>
4042
   *
4043
   * @return bool
4044
   */
4045 1 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4046
  {
4047 1
    $haystack = (string)$haystack;
4048
    $needle = (string)$needle;
4049 1
4050
    if (!isset($haystack[0], $needle[0])) {
4051 1
      return false;
4052
    }
4053
4054
    if (self::stripos($haystack, $needle) === 0) {
4055
      return true;
4056
    }
4057
4058
    return false;
4059
  }
4060
4061
  /**
4062
   * Limit the number of characters in a string, but also after the next word.
4063
   *
4064
   * @param string $str
4065 7
   * @param int    $length
4066
   * @param string $strAddOn
4067 7
   *
4068
   * @return string
4069
   */
4070
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4071
  {
4072
    $str = (string)$str;
4073
4074
    if (!isset($str[0])) {
4075
      return '';
4076
    }
4077
4078
    $length = (int)$length;
4079
4080
    if (self::strlen($str) <= $length) {
4081
      return $str;
4082
    }
4083 1
4084
    if (self::substr($str, $length - 1, 1) === ' ') {
4085 1
      return self::substr($str, 0, $length - 1) . $strAddOn;
4086 1
    }
4087
4088 1
    $str = self::substr($str, 0, $length);
4089
    $array = explode(' ', $str);
4090 1
    array_pop($array);
4091
    $new_str = implode(' ', $array);
4092 1
4093 1
    if ($new_str === '') {
4094 1
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4095 1
    } else {
4096
      $str = $new_str . $strAddOn;
4097 1
    }
4098
4099 1
    return $str;
4100 1
  }
4101 1
4102 1
  /**
4103 1
   * Pad a UTF-8 string to given length with another string.
4104 1
   *
4105
   * @param string $str        <p>The input string.</p>
4106 1
   * @param int    $pad_length <p>The length of return string.</p>
4107
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4108 1
   * @param int    $pad_type   [optional] <p>
4109
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4110
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4111
   *                           </p>
4112 1
   *
4113
   * @return string <strong>Returns the padded string</strong>
4114
   */
4115
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4116
  {
4117
    $str_length = self::strlen($str);
4118
4119
    if (
4120
        is_int($pad_length) === true
4121
        &&
4122
        $pad_length > 0
4123
        &&
4124
        $pad_length >= $str_length
4125
    ) {
4126
      $ps_length = self::strlen($pad_string);
4127
4128
      $diff = $pad_length - $str_length;
4129 9
4130
      switch ($pad_type) {
4131 9 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4132
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4133
          $pre = self::substr($pre, 0, $diff);
4134
          $post = '';
4135
          break;
4136
4137
        case STR_PAD_BOTH:
4138
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4139
          $pre = self::substr($pre, 0, (int)$diff / 2);
4140
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4141
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4142
          break;
4143
4144
        case STR_PAD_RIGHT:
4145 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4146
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4147 1
          $post = self::substr($post, 0, $diff);
4148
          $pre = '';
4149 1
      }
4150
4151
      return $pre . $str . $post;
4152
    }
4153
4154
    return $str;
4155
  }
4156
4157
  /**
4158
   * Repeat a string.
4159
   *
4160
   * @param string $str        <p>
4161
   *                           The string to be repeated.
4162
   *                           </p>
4163
   * @param int    $multiplier <p>
4164 12
   *                           Number of time the input string should be
4165
   *                           repeated.
4166 12
   *                           </p>
4167 11
   *                           <p>
4168 11
   *                           multiplier has to be greater than or equal to 0.
4169 12
   *                           If the multiplier is set to 0, the function
4170
   *                           will return an empty string.
4171
   *                           </p>
4172
   *
4173
   * @return string <p>The repeated string.</p>
4174
   */
4175
  public static function str_repeat($str, $multiplier)
4176
  {
4177
    $str = self::filter($str);
4178
4179
    return str_repeat($str, $multiplier);
4180
  }
4181
4182 9
  /**
4183
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4184 9
   *
4185 1
   * Replace all occurrences of the search string with the replacement string
4186
   *
4187
   * @link http://php.net/manual/en/function.str-replace.php
4188 8
   *
4189 2
   * @param mixed $search  <p>
4190 2
   *                       The value being searched for, otherwise known as the needle.
4191
   *                       An array may be used to designate multiple needles.
4192 8
   *                       </p>
4193 8
   * @param mixed $replace <p>
4194 1
   *                       The replacement value that replaces found search
4195
   *                       values. An array may be used to designate multiple replacements.
4196
   *                       </p>
4197 7
   * @param mixed $subject <p>
4198
   *                       The string or array being searched and replaced on,
4199 7
   *                       otherwise known as the haystack.
4200
   *                       </p>
4201
   *                       <p>
4202 1
   *                       If subject is an array, then the search and
4203
   *                       replace is performed with every entry of
4204
   *                       subject, and the return value is an array as
4205
   *                       well.
4206
   *                       </p>
4207
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4208
   *
4209
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4210
   */
4211
  public static function str_replace($search, $replace, $subject, &$count = null)
4212
  {
4213
    return str_replace($search, $replace, $subject, $count);
4214
  }
4215
4216
  /**
4217
   * Shuffles all the characters in the string.
4218 1
   *
4219
   * @param string $str <p>The input string</p>
4220 1
   *
4221
   * @return string <p>The shuffled string.</p>
4222
   */
4223
  public static function str_shuffle($str)
4224
  {
4225
    $array = self::split($str);
4226
4227
    shuffle($array);
4228
4229
    return implode('', $array);
4230
  }
4231
4232 2
  /**
4233
   * Sort all characters according to code points.
4234 2
   *
4235 2
   * @param string $str    <p>A UTF-8 string.</p>
4236
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4237 2
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4238 2
   *
4239 2
   * @return string <p>String of sorted characters.</p>
4240
   */
4241 2
  public static function str_sort($str, $unique = false, $desc = false)
4242 2
  {
4243
    $array = self::codepoints($str);
4244
4245
    if ($unique) {
4246
      $array = array_flip(array_flip($array));
4247
    }
4248
4249
    if ($desc) {
4250
      arsort($array);
4251
    } else {
4252 3
      asort($array);
4253
    }
4254 3
4255 3
    return self::string($array);
4256 3
  }
4257
4258 3
  /**
4259
   * Split a string into an array.
4260 3
   *
4261
   * @param string $str
4262
   * @param int    $len
4263
   *
4264
   * @return array
4265
   */
4266
  public static function str_split($str, $len = 1)
4267
  {
4268
    // init
4269
    $len = (int)$len;
4270
    $str = (string)$str;
4271
4272
    if (!isset($str[0])) {
4273
      return array();
4274
    }
4275
4276
    if ($len < 1) {
4277
      return str_split($str, $len);
4278
    }
4279
4280
    /** @noinspection PhpInternalEntityUsedInspection */
4281
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4282 2
    $a = $a[0];
4283
4284
    if ($len === 1) {
4285 2
      return $a;
4286
    }
4287 2
4288
    $arrayOutput = array();
4289
    $p = -1;
4290
4291
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4292
    foreach ($a as $l => $a) {
4293
      if ($l % $len) {
4294
        $arrayOutput[$p] .= $a;
4295
      } else {
4296
        $arrayOutput[++$p] = $a;
4297
      }
4298
    }
4299
4300
    return $arrayOutput;
4301
  }
4302
4303
  /**
4304
   * Check if the string starts with the given substring.
4305
   *
4306
   * @param string $haystack <p>The string to search in.</p>
4307
   * @param string $needle   <p>The substring to search for.</p>
4308
   *
4309
   * @return bool
4310
   */
4311 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4312
  {
4313
    $haystack = (string)$haystack;
4314 8
    $needle = (string)$needle;
4315
4316 8
    if (!isset($haystack[0], $needle[0])) {
4317 8
      return false;
4318
    }
4319 8
4320 3
    if (self::strpos($haystack, $needle) === 0) {
4321
      return true;
4322
    }
4323 7
4324 1
    return false;
4325 1
  }
4326 1
4327
  /**
4328
   * Get a binary representation of a specific string.
4329
   *
4330 7
   * @param string $str <p>The input string.</p>
4331 1
   *
4332 7
   * @return string
4333 7
   */
4334 7
  public static function str_to_binary($str)
4335
  {
4336
    $str = (string)$str;
4337
4338 7
    $value = unpack('H*', $str);
4339
4340
    return base_convert($value[1], 16, 2);
4341
  }
4342
4343
  /**
4344
   * alias for "UTF8::to_ascii()"
4345
   *
4346
   * @see UTF8::to_ascii()
4347
   *
4348
   * @param string $str
4349
   * @param string $unknown
4350
   * @param bool   $strict
4351
   *
4352
   * @return string
4353
   */
4354
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4355 8
  {
4356
    return self::to_ascii($str, $unknown, $strict);
4357 8
  }
4358 2
4359
  /**
4360
   * Counts number of words in the UTF-8 string.
4361 6
   *
4362
   * @param string $str      <p>The input string.</p>
4363
   * @param int    $format   [optional] <p>
4364
   *                         <strong>0</strong> => return a number of words (default)<br />
4365 6
   *                         <strong>1</strong> => return an array of words<br />
4366
   *                         <strong>2</strong> => return an array of words with word-offset as key
4367
   *                         </p>
4368
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4369
   *
4370
   * @return array|int <p>The number of words in the string</p>
4371
   */
4372 6
  public static function str_word_count($str, $format = 0, $charlist = '')
4373
  {
4374
    $charlist = self::rxClass($charlist, '\pL');
4375
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4376
4377
    $len = count($strParts);
4378
4379
    if ($format === 1) {
4380
4381
      $numberOfWords = array();
4382
      for ($i = 1; $i < $len; $i += 2) {
4383
        $numberOfWords[] = $strParts[$i];
4384
      }
4385
4386
    } elseif ($format === 2) {
4387 62
4388
      $numberOfWords = array();
4389 62
      $offset = self::strlen($strParts[0]);
4390
      for ($i = 1; $i < $len; $i += 2) {
4391 62
        $numberOfWords[$offset] = $strParts[$i];
4392 4
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4393
      }
4394
4395
    } else {
4396
4397 61
      $numberOfWords = ($len - 1) / 2;
4398 2
4399 61
    }
4400 60
4401 60
    return $numberOfWords;
4402 2
  }
4403
4404
  /**
4405
   * Case-insensitive string comparison.
4406 61
   *
4407 61
   * INFO: Case-insensitive version of UTF8::strcmp()
4408 1
   *
4409
   * @param string $str1
4410
   * @param string $str2
4411 61
   *
4412 2
   * @return int <p>
4413 2
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4414
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4415 61
   *             <strong>0</strong> if they are equal.
4416
   *             </p>
4417
   */
4418
  public static function strcasecmp($str1, $str2)
4419
  {
4420
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4421
  }
4422
4423
  /**
4424
   * alias for "UTF8::strstr()"
4425
   *
4426
   * @see UTF8::strstr()
4427
   *
4428
   * @param string  $haystack
4429
   * @param string  $needle
4430 1
   * @param bool    $before_needle
4431
   * @param string  $encoding
4432 1
   * @param boolean $cleanUtf8
4433
   *
4434
   * @return string|false
4435
   */
4436
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4437
  {
4438
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4439
  }
4440
4441
  /**
4442
   * Case-sensitive string comparison.
4443
   *
4444
   * @param string $str1
4445
   * @param string $str2
4446
   *
4447
   * @return int  <p>
4448
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4449 2
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4450
   *              <strong>0</strong> if they are equal.
4451 2
   *              </p>
4452
   */
4453
  public static function strcmp($str1, $str2)
4454
  {
4455
    /** @noinspection PhpUndefinedClassInspection */
4456
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4457
        \Normalizer::normalize($str1, \Normalizer::NFD),
4458
        \Normalizer::normalize($str2, \Normalizer::NFD)
4459
    );
4460
  }
4461
4462
  /**
4463
   * Find length of initial segment not matching mask.
4464
   *
4465
   * @param string $str
4466
   * @param string $charList
4467 1
   * @param int    $offset
4468
   * @param int    $length
4469 1
   *
4470
   * @return int|null
4471
   */
4472
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4473
  {
4474
    if ('' === $charList .= '') {
4475
      return null;
4476
    }
4477
4478
    if ($offset || 2147483647 !== $length) {
4479
      $str = (string)self::substr($str, $offset, $length);
4480
    }
4481
4482
    $str = (string)$str;
4483
    if (!isset($str[0])) {
4484
      return null;
4485 2
    }
4486
4487 2
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4488 2
      /** @noinspection OffsetOperationsInspection */
4489
      return self::strlen($length[1]);
4490 2
    }
4491
4492
    return self::strlen($str);
4493
  }
4494
4495
  /**
4496
   * alias for "UTF8::stristr()"
4497
   *
4498
   * @see UTF8::stristr()
4499
   *
4500
   * @param string  $haystack
4501
   * @param string  $needle
4502
   * @param bool    $before_needle
4503 1
   * @param string  $encoding
4504
   * @param boolean $cleanUtf8
4505 1
   *
4506 1
   * @return string|false
4507
   */
4508 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4509 1
  {
4510
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4511
  }
4512 1
4513 1
  /**
4514
   * Create a UTF-8 string from code points.
4515 1
   *
4516
   * INFO: opposite to UTF8::codepoints()
4517
   *
4518
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4519
   *
4520
   * @return string <p>UTF-8 encoded string.</p>
4521
   */
4522
  public static function string(array $array)
4523
  {
4524
    return implode(
4525
        '',
4526
        array_map(
4527
            array(
4528
                '\\voku\\helper\\UTF8',
4529
                'chr',
4530
            ),
4531
            $array
4532
        )
4533
    );
4534
  }
4535 15
4536
  /**
4537 15
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4538 15
   *
4539
   * @param string $str <p>The input string.</p>
4540 15
   *
4541 2
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4542
   */
4543
  public static function string_has_bom($str)
4544
  {
4545 14
    foreach (self::$bom as $bomString => $bomByteLength) {
4546
      if (0 === strpos($str, $bomString)) {
4547
        return true;
4548
      }
4549 14
    }
4550
4551
    return false;
4552
  }
4553 14
4554
  /**
4555
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4556 2
   *
4557 2
   * @link http://php.net/manual/en/function.strip-tags.php
4558 2
   *
4559
   * @param string $str            <p>
4560 14
   *                               The input string.
4561
   *                               </p>
4562
   * @param string $allowable_tags [optional] <p>
4563
   *                               You can use the optional second parameter to specify tags which should
4564
   *                               not be stripped.
4565
   *                               </p>
4566 14
   *                               <p>
4567 2
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4568 14
   *                               can not be changed with allowable_tags.
4569 14
   *                               </p>
4570 14
   *
4571 1
   * @return string <p>The stripped string.</p>
4572
   */
4573
  public static function strip_tags($str, $allowable_tags = null)
4574 14
  {
4575 14
    // clean broken utf8
4576
    $str = self::clean($str);
4577
4578
    return strip_tags($str, $allowable_tags);
4579
  }
4580
4581
  /**
4582
   * Finds position of first occurrence of a string within another, case insensitive.
4583
   *
4584
   * @link http://php.net/manual/en/function.mb-stripos.php
4585
   *
4586
   * @param string  $haystack  <p>
4587
   *                           The string from which to get the position of the first occurrence
4588
   *                           of needle
4589
   *                           </p>
4590
   * @param string  $needle    <p>
4591
   *                           The string to find in haystack
4592
   *                           </p>
4593
   * @param int     $offset    [optional] <p>
4594
   *                           The position in haystack
4595
   *                           to start searching
4596
   *                           </p>
4597
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4598
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4599
   *
4600
   * @return int|false <p>
4601
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4602
   *                   or false if needle is not found.
4603
   *                   </p>
4604
   */
4605
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4606
  {
4607
    $haystack = (string)$haystack;
4608
    $needle = (string)$needle;
4609
4610
    if (!isset($haystack[0], $needle[0])) {
4611
      return false;
4612
    }
4613
4614
    if ($cleanUtf8 === true) {
4615
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4616
      // if invalid characters are found in $haystack before $needle
4617
      $haystack = self::clean($haystack);
4618
      $needle = self::clean($needle);
4619
    }
4620 1
4621 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4622 1
        $encoding === 'UTF-8'
4623 1
        ||
4624 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4625
    ) {
4626 1
      $encoding = 'UTF-8';
4627
    } else {
4628
      $encoding = self::normalize_encoding($encoding);
4629
    }
4630
4631
    if (
4632
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4633 1
        &&
4634
        self::$support['intl'] === true
4635
        &&
4636
        Bootup::is_php('5.4')
4637
    ) {
4638
      return \grapheme_stripos($haystack, $needle, $offset);
4639
    }
4640
4641
    // fallback to "mb_"-function via polyfill
4642
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4643 4
  }
4644
4645 4
  /**
4646
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4647 4
   *
4648 2
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4649
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4650
   * @param bool    $before_needle [optional] <p>
4651 3
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4652
   *                               haystack before the first occurrence of the needle (excluding the needle).
4653
   *                               </p>
4654
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4655
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4656
   *
4657
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4658
   */
4659
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4660
  {
4661
    if ('' === $needle .= '') {
4662
      return false;
4663
    }
4664
4665
    if ($encoding !== 'UTF-8') {
4666
      $encoding = self::normalize_encoding($encoding);
4667
    }
4668
4669
    if ($cleanUtf8 === true) {
4670
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4671
      // if invalid characters are found in $haystack before $needle
4672
      $needle = self::clean($needle);
4673
      $haystack = self::clean($haystack);
4674
    }
4675
4676 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4677 1
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
4678
        ||
4679 1
        self::$support['mbstring'] === true
4680 1
    ) {
4681 1
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4682
    }
4683 1
4684
    if (self::$support['intl'] === true) {
4685
      return \grapheme_stristr($haystack, $needle, $before_needle);
4686
    }
4687
4688
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4689
4690 1
    if (!isset($match[1])) {
4691
      return false;
4692
    }
4693
4694
    if ($before_needle) {
4695
      return $match[1];
4696
    }
4697
4698
    return self::substr($haystack, self::strlen($match[1]));
4699
  }
4700
4701
  /**
4702
   * Get the string length, not the byte-length!
4703
   *
4704
   * @link     http://php.net/manual/en/function.mb-strlen.php
4705
   *
4706
   * @param string  $str       <p>The string being checked for length.</p>
4707 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4708
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4709 1
   *
4710
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4711
   *             character counted as +1)</p>
4712
   */
4713
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4714
  {
4715
    $str = (string)$str;
4716
4717
    if (!isset($str[0])) {
4718
      return 0;
4719
    }
4720
4721 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4722
        $encoding === 'UTF-8'
4723
        ||
4724
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4725
    ) {
4726
      $encoding = 'UTF-8';
4727
    } else {
4728
      $encoding = self::normalize_encoding($encoding);
4729 11
    }
4730
4731 11
    switch ($encoding) {
4732
      case 'ASCII':
4733 11
      case 'CP850':
4734 2
        return strlen($str);
4735 2
    }
4736
4737 11
    if ($cleanUtf8 === true) {
4738
      $str = self::clean($str);
4739 11
    }
4740 2
4741
    if (
4742
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
4743
       ||
4744 10
       self::$support['mbstring'] === true
4745 10
    ) {
4746
      return \mb_strlen($str, $encoding);
4747
    }
4748
4749 10
    if (self::$support['iconv'] === true) {
4750
      $returnTmp = \iconv_strlen($str, $encoding);
4751 10
      if ($returnTmp !== false) {
4752
        return $returnTmp;
4753
      }
4754 3
    }
4755 3
4756 3
    if (self::$support['intl'] === true) {
4757
      $returnTmp = \grapheme_strlen($str);
4758 10
      if ($returnTmp !== null) {
4759
        return $returnTmp;
4760
      }
4761
    }
4762
4763
    preg_match_all('/./us', $str, $parts);
4764 10
    $returnTmp = count($parts[0]);
4765 1
    if ($returnTmp !== 0) {
4766 10
      return $returnTmp;
4767 10
    }
4768 10
4769 1
    // fallback to "mb_"-function via polyfill
4770
    return \mb_strlen($str);
4771
  }
4772
4773
  /**
4774 10
   * Case insensitive string comparisons using a "natural order" algorithm.
4775 10
   *
4776 10
   * INFO: natural order version of UTF8::strcasecmp()
4777 10
   *
4778
   * @param string $str1 <p>The first string.</p>
4779
   * @param string $str2 <p>The second string.</p>
4780
   *
4781
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
4782
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
4783
   *             <strong>0</strong> if they are equal
4784
   */
4785
  public static function strnatcasecmp($str1, $str2)
4786
  {
4787
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4788
  }
4789
4790
  /**
4791
   * String comparisons using a "natural order" algorithm
4792
   *
4793
   * INFO: natural order version of UTF8::strcmp()
4794
   *
4795
   * @link  http://php.net/manual/en/function.strnatcmp.php
4796
   *
4797
   * @param string $str1 <p>The first string.</p>
4798
   * @param string $str2 <p>The second string.</p>
4799
   *
4800
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4801
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
4802
   *             <strong>0</strong> if they are equal
4803
   */
4804
  public static function strnatcmp($str1, $str2)
4805
  {
4806
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4807
  }
4808
4809
  /**
4810
   * Case-insensitive string comparison of the first n characters.
4811
   *
4812
   * @link  http://php.net/manual/en/function.strncasecmp.php
4813 10
   *
4814
   * @param string $str1 <p>The first string.</p>
4815
   * @param string $str2 <p>The second string.</p>
4816 10
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4817 10
   *
4818
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4819 10
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4820 2
   *             <strong>0</strong> if they are equal
4821 2
   */
4822
  public static function strncasecmp($str1, $str2, $len)
4823 10
  {
4824 10
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4825 2
  }
4826
4827
  /**
4828 8
   * String comparison of the first n characters.
4829
   *
4830
   * @link  http://php.net/manual/en/function.strncmp.php
4831
   *
4832
   * @param string $str1 <p>The first string.</p>
4833
   * @param string $str2 <p>The second string.</p>
4834
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4835
   *
4836
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4837
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4838
   *             <strong>0</strong> if they are equal
4839
   */
4840
  public static function strncmp($str1, $str2, $len)
4841
  {
4842
    $str1 = self::substr($str1, 0, $len);
4843
    $str2 = self::substr($str2, 0, $len);
4844
4845 2
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 4842 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 4843 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4846
  }
4847 2
4848
  /**
4849
   * Search a string for any of a set of characters.
4850
   *
4851
   * @link  http://php.net/manual/en/function.strpbrk.php
4852
   *
4853
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4854 2
   * @param string $char_list <p>This parameter is case sensitive.</p>
4855 1
   *
4856 1
   * @return string String starting from the character found, or false if it is not found.
4857
   */
4858
  public static function strpbrk($haystack, $char_list)
4859
  {
4860 2
    $haystack = (string)$haystack;
4861 2
    $char_list = (string)$char_list;
4862 2
4863 2
    if (!isset($haystack[0], $char_list[0])) {
4864
      return false;
4865
    }
4866
4867
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4868
      return substr($haystack, strpos($haystack, $m[0]));
4869
    } else {
4870
      return false;
4871
    }
4872
  }
4873
4874
  /**
4875
   * Find position of first occurrence of string in a string.
4876
   *
4877
   * @link http://php.net/manual/en/function.mb-strpos.php
4878
   *
4879
   * @param string  $haystack  <p>The string being checked.</p>
4880
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
4881
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4882 11
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4883
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4884 11
   *
4885 11
   * @return int|false <p>
4886 11
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
4887
   *                   If needle is not found it returns false.
4888 11
   *                   </p>
4889 1
   */
4890 1
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4891 1
  {
4892
    $haystack = (string)$haystack;
4893 11
    $needle = (string)$needle;
4894
4895 11
    if (!isset($haystack[0], $needle[0])) {
4896
      return false;
4897 11
    }
4898 1
4899 1
    // init
4900
    $offset = (int)$offset;
4901
4902 11
    // iconv and mbstring do not support integer $needle
4903 11
4904
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4905 11
      $needle = (string)self::chr($needle);
4906
    }
4907 11
4908
    if ($cleanUtf8 === true) {
4909
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4910
      // if invalid characters are found in $haystack before $needle
4911
      $needle = self::clean($needle);
4912
      $haystack = self::clean($haystack);
4913
    }
4914
4915
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4916
      self::checkForSupport();
4917
    }
4918
4919 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4920
        $encoding === 'UTF-8'
4921 21
        ||
4922
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4923
    ) {
4924 21
      $encoding = 'UTF-8';
4925
    } else {
4926 21
      $encoding = self::normalize_encoding($encoding);
4927 6
    }
4928
4929 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4930 19
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
4931
        ||
4932
        self::$support['mbstring'] === true
4933
    ) {
4934
      return \mb_strpos($haystack, $needle, $offset, $encoding);
4935
    }
4936 19
4937 2
    if (self::$support['iconv'] === true) {
4938 2
      // ignore invalid negative offset to keep compatibility
4939
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4940 19
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4941
    }
4942
4943 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4944
      $returnTmp =  \grapheme_strpos($haystack, $needle, $offset);
4945
      if ($returnTmp !== false) {
4946
        return $returnTmp;
4947
      }
4948
    }
4949
4950 3
    // fallback via vanilla php
4951
4952 3
    $haystack = self::substr($haystack, $offset);
4953
4954
    if ($offset < 0) {
4955
      $offset = 0;
4956
    }
4957
4958
    $pos = strpos($haystack, $needle);
4959
    if ($pos === false) {
4960
      return false;
4961
    }
4962
4963
    return $offset + self::strlen(substr($haystack, 0, $pos));
4964
  }
4965
4966 16
  /**
4967
   * Finds the last occurrence of a character in a string within another.
4968 16
   *
4969
   * @link http://php.net/manual/en/function.mb-strrchr.php
4970 16
   *
4971 2
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
4972
   * @param string $needle        <p>The string to find in haystack</p>
4973
   * @param bool   $before_needle [optional] <p>
4974 15
   *                              Determines which portion of haystack
4975
   *                              this function returns.
4976
   *                              If set to true, it returns all of haystack
4977
   *                              from the beginning to the last occurrence of needle.
4978
   *                              If set to false, it returns all of haystack
4979
   *                              from the last occurrence of needle to the end,
4980 15
   *                              </p>
4981 2
   * @param string $encoding      [optional] <p>
4982 2
   *                              Character encoding name to use.
4983
   *                              If it is omitted, internal character encoding is used.
4984 15
   *                              </p>
4985
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4986
   *
4987
   * @return string|false The portion of haystack or false if needle is not found.
4988
   */
4989 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4990
  {
4991
    if ($encoding !== 'UTF-8') {
4992
      $encoding = self::normalize_encoding($encoding);
4993
    }
4994
4995
    if ($cleanUtf8 === true) {
4996
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4997
      // if invalid characters are found in $haystack before $needle
4998
      $needle = self::clean($needle);
4999
      $haystack = self::clean($haystack);
5000
    }
5001 1
5002
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5003 1
  }
5004 1
5005 1
  /**
5006 1
   * Reverses characters order in the string.
5007 1
   *
5008
   * @param string $str The input string
5009 1
   *
5010 1
   * @return string The string with characters in the reverse sequence
5011 1
   */
5012 1
  public static function strrev($str)
5013 1
  {
5014
    $str = (string)$str;
5015 1
5016 1
    if (!isset($str[0])) {
5017
      return '';
5018 1
    }
5019
5020
    return implode('', array_reverse(self::split($str)));
5021
  }
5022
5023
  /**
5024
   * Finds the last occurrence of a character in a string within another, case insensitive.
5025
   *
5026
   * @link http://php.net/manual/en/function.mb-strrichr.php
5027
   *
5028
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5029
   * @param string  $needle        <p>The string to find in haystack.</p>
5030 1
   * @param bool    $before_needle [optional] <p>
5031
   *                               Determines which portion of haystack
5032 1
   *                               this function returns.
5033 1
   *                               If set to true, it returns all of haystack
5034 1
   *                               from the beginning to the last occurrence of needle.
5035
   *                               If set to false, it returns all of haystack
5036 1
   *                               from the last occurrence of needle to the end,
5037
   *                               </p>
5038
   * @param string  $encoding      [optional] <p>
5039
   *                               Character encoding name to use.
5040 1
   *                               If it is omitted, internal character encoding is used.
5041 1
   *                               </p>
5042
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5043 1
   *
5044
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5045
   */
5046 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5047
  {
5048
    if ($encoding !== 'UTF-8') {
5049
      $encoding = self::normalize_encoding($encoding);
5050
    }
5051
5052
    if ($cleanUtf8 === true) {
5053
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5054
      // if invalid characters are found in $haystack before $needle
5055
      $needle = self::clean($needle);
5056
      $haystack = self::clean($haystack);
5057
    }
5058
5059 47
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5060
  }
5061
5062 47
  /**
5063
   * Find position of last occurrence of a case-insensitive string.
5064 47
   *
5065 9
   * @param string  $haystack  <p>The string to look in.</p>
5066
   * @param string  $needle    <p>The string to look for.</p>
5067
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5068 45
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5069
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5070
   *
5071
   * @return int|false <p>
5072 1
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5073 1
   *                   not found, it returns false.
5074
   *                   </p>
5075 45
   */
5076 45
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5077 37
  {
5078 37
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5079
      $needle = (string)self::chr($needle);
5080 45
    }
5081 2
5082
    // init
5083
    $haystack = (string)$haystack;
5084 43
    $needle = (string)$needle;
5085 20
    $offset = (int)$offset;
5086 20
5087 41
    if (!isset($haystack[0], $needle[0])) {
5088
      return false;
5089
    }
5090 43
5091 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5092
        $cleanUtf8 === true
5093
        ||
5094
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5095
    ) {
5096 43
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5097 2
5098 43
      $needle = self::clean($needle);
5099 43
      $haystack = self::clean($haystack);
5100 43
    }
5101 1
5102
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5103
      self::checkForSupport();
5104 43
    }
5105 43
5106 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5107
        $encoding === 'UTF-8'
5108
        ||
5109
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5110
    ) {
5111
      $encoding = 'UTF-8';
5112
    } else {
5113
      $encoding = self::normalize_encoding($encoding);
5114
    }
5115
5116 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5117
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5118
        ||
5119
        self::$support['mbstring'] === true
5120
    ) {
5121
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5122
    }
5123
5124 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5125
      $returnTmp =  \grapheme_strripos($haystack, $needle, $offset);
5126
      if ($returnTmp !== false ) {
5127
        return $returnTmp;
5128
      }
5129
    }
5130
5131
    return self::strrpos(self::strtolower($haystack, $encoding), self::strtolower($needle, $encoding), $offset, $encoding, $cleanUtf8);
5132
  }
5133
5134
  /**
5135 1
   * Find position of last occurrence of a string in a string.
5136
   *
5137 1
   * @link http://php.net/manual/en/function.mb-strrpos.php
5138 1
   *
5139
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5140 1
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5141
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5142
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5143
   *                              the end of the string.
5144
   *                              </p>
5145
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5146
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5147
   *
5148
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5149
   *                   is not found, it returns false.</p>
5150
   */
5151
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5152
  {
5153
    if (((int)$needle) === $needle && ($needle >= 0)) {
5154
      $needle = (string)self::chr($needle);
5155
    }
5156
5157
    // init
5158
    $haystack = (string)$haystack;
5159
    $needle = (string)$needle;
5160
    $offset = (int)$offset;
5161 1
5162
    if (!isset($haystack[0], $needle[0])) {
5163 1
      return false;
5164 1
    }
5165
5166 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5167 1
        $cleanUtf8 === true
5168
        ||
5169
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5170 1
    ) {
5171 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5172 1
5173
      $needle = self::clean($needle);
5174 1
      $haystack = self::clean($haystack);
5175 1
    }
5176
5177
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5178 1
      self::checkForSupport();
5179 1
    }
5180
5181 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5182 1
        $encoding === 'UTF-8'
5183 1
        ||
5184
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5185 1
    ) {
5186
      $encoding = 'UTF-8';
5187
    } else {
5188
      $encoding = self::normalize_encoding($encoding);
5189
    }
5190
5191 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5192 1
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5193
        ||
5194
        self::$support['mbstring'] === true
5195
    ) {
5196
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5197
    }
5198
5199 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5200
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5201
      if ($returnTmp !== false ) {
5202
        return $returnTmp;
5203
      }
5204
    }
5205
5206
    // fallback via vanilla php
5207 6
5208
    if ($offset > 0) {
5209 6
      $haystack = self::substr($haystack, $offset);
5210 1
    } elseif ($offset < 0) {
5211
      $haystack = self::substr($haystack, 0, $offset);
5212
      $offset = 0;
5213 1
    }
5214 1
5215 1
    $pos = strrpos($haystack, $needle);
5216 1
    if ($pos === false) {
5217
      return false;
5218
    }
5219
5220 1
    return $offset + self::strlen(substr($haystack, 0, $pos));
5221 1
  }
5222 1
5223 1
  /**
5224 1
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5225 1
   * mask.
5226 1
   *
5227 1
   * @param string $str    <p>The input string.</p>
5228
   * @param string $mask   <p>The mask of chars</p>
5229
   * @param int    $offset [optional]
5230
   * @param int    $length [optional]
5231 1
   *
5232 1
   * @return int
5233 1
   */
5234 1
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5235 1
  {
5236 1
    // init
5237 1
    $length = (int)$length;
5238 1
    $offset = (int)$offset;
5239
5240
    if ($offset || 2147483647 !== $length) {
5241 1
      $str = self::substr($str, $offset, $length);
5242 1
    }
5243 1
5244 1
    $str = (string)$str;
5245
    if (!isset($str[0], $mask[0])) {
5246
      return 0;
5247
    }
5248 1
5249
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5250 6
  }
5251 1
5252 1
  /**
5253 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5254 1
   *
5255
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5256 1
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5257
   * @param bool    $before_needle [optional] <p>
5258
   *                               If <b>TRUE</b>, strstr() returns the part of the
5259 6
   *                               haystack before the first occurrence of the needle (excluding the needle).
5260 6
   *                               </p>
5261
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5262 6
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5263 4
   *
5264 4
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5265
   */
5266 6
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5267
  {
5268 6
    $haystack = (string)$haystack;
5269
    $needle = (string)$needle;
5270
5271
    if (!isset($haystack[0], $needle[0])) {
5272
      return false;
5273
    }
5274
5275
    if ($cleanUtf8 === true) {
5276
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5277
      // if invalid characters are found in $haystack before $needle
5278
      $needle = self::clean($needle);
5279
      $haystack = self::clean($haystack);
5280 1
    }
5281
5282 1
    if ($encoding !== 'UTF-8') {
5283
      $encoding = self::normalize_encoding($encoding);
5284 1
    }
5285 1
5286
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5287
      self::checkForSupport();
5288 1
    }
5289 1
5290 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5291
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5292 1
        ||
5293
        self::$support['mbstring'] === true
5294
    ) {
5295 1
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5296 1
    }
5297
5298 1
    if (self::$support['intl'] === true) {
5299 1
      return \grapheme_strstr($haystack, $needle, $before_needle);
5300
    }
5301 1
5302
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5303 1
5304 1
    if (!isset($match[1])) {
5305
      return false;
5306 1
    }
5307
5308 1
    if ($before_needle) {
5309
      return $match[1];
5310 1
    }
5311
5312 1
    return self::substr($haystack, self::strlen($match[1]));
5313
  }
5314
5315
  /**
5316
   * Unicode transformation for case-less matching.
5317
   *
5318
   * @link http://unicode.org/reports/tr21/tr21-5.html
5319
   *
5320
   * @param string  $str       <p>The input string.</p>
5321
   * @param bool    $full      [optional] <p>
5322
   *                           <b>true</b>, replace full case folding chars (default)<br />
5323
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5324
   *                           </p>
5325
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5326 7
   *
5327
   * @return string
5328 7
   */
5329
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5330
  {
5331
    // init
5332
    $str = (string)$str;
5333
5334
    if (!isset($str[0])) {
5335
      return '';
5336
    }
5337
5338
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5339
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5340 1
5341
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5342 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5343
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5344
    }
5345
5346
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5347
5348
    if ($full) {
5349
5350
      static $fullCaseFold = null;
5351
5352
      if ($fullCaseFold === null) {
5353
        $fullCaseFold = self::getData('caseFolding_full');
5354 1
      }
5355
5356 1
      /** @noinspection OffsetOperationsInspection */
5357
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5358
    }
5359
5360
    if ($cleanUtf8 === true) {
5361
      $str = self::clean($str);
5362
    }
5363
5364
    return self::strtolower($str);
5365
  }
5366
5367
  /**
5368 1
   * Make a string lowercase.
5369
   *
5370 1
   * @link http://php.net/manual/en/function.mb-strtolower.php
5371
   *
5372
   * @param string  $str       <p>The string being lowercased.</p>
5373
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5374
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5375
   *
5376
   * @return string str with all alphabetic characters converted to lowercase.
5377
   */
5378 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5379
  {
5380
    // init
5381
    $str = (string)$str;
5382
5383
    if (!isset($str[0])) {
5384
      return '';
5385 13
    }
5386
5387 13
    if ($cleanUtf8 === true) {
5388
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5389
      // if invalid characters are found in $haystack before $needle
5390 13
      $str = self::clean($str);
5391
    }
5392 13
5393 3
    if ($encoding !== 'UTF-8') {
5394
      $encoding = self::normalize_encoding($encoding);
5395
    }
5396 11
5397
    return \mb_strtolower($str, $encoding);
5398
  }
5399 11
5400 7
  /**
5401
   * Generic case sensitive transformation for collation matching.
5402
   *
5403 5
   * @param string $str <p>The input string</p>
5404 1
   *
5405
   * @return string
5406
   */
5407
  private static function strtonatfold($str)
5408 1
  {
5409 1
    /** @noinspection PhpUndefinedClassInspection */
5410
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5411
  }
5412 1
5413 1
  /**
5414
   * Make a string uppercase.
5415
   *
5416 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5417
   *
5418
   * @param string  $str       <p>The string being uppercased.</p>
5419 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5420
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5421 5
   *
5422 5
   * @return string str with all alphabetic characters converted to uppercase.
5423 5
   */
5424 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5425 5
  {
5426
    $str = (string)$str;
5427 5
5428 5
    if (!isset($str[0])) {
5429
      return '';
5430
    }
5431 5
5432
    if ($cleanUtf8 === true) {
5433
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5434 5
      // if invalid characters are found in $haystack before $needle
5435 5
      $str = self::clean($str);
5436 5
    }
5437
5438 5
    if ($encoding !== 'UTF-8') {
5439 2
      $encoding = self::normalize_encoding($encoding);
5440
    }
5441 2
5442 2
    return \mb_strtoupper($str, $encoding);
5443 2
  }
5444
5445 2
  /**
5446 1
   * Translate characters or replace sub-strings.
5447
   *
5448 1
   * @link  http://php.net/manual/en/function.strtr.php
5449 1
   *
5450 1
   * @param string          $str  <p>The string being translated.</p>
5451
   * @param string|string[] $from <p>The string replacing from.</p>
5452 1
   * @param string|string[] $to   <p>The string being translated to to.</p>
5453
   *
5454
   * @return string <p>
5455
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5456
   *                corresponding character in to.
5457
   *                </p>
5458
   */
5459
  public static function strtr($str, $from, $to = INF)
5460
  {
5461
    if (INF !== $to) {
5462
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5462 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5463
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5463 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5464
      $countFrom = count($from);
5465
      $countTo = count($to);
5466
5467 1
      if ($countFrom > $countTo) {
5468 2
        $from = array_slice($from, 0, $countTo);
5469
      } elseif ($countFrom < $countTo) {
5470 5
        $to = array_slice($to, 0, $countFrom);
5471
      }
5472
5473
      $from = array_combine($from, $to);
5474
    }
5475 5
5476
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5459 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5477
  }
5478
5479
  /**
5480 5
   * Return the width of a string.
5481 5
   *
5482 1
   * @param string  $str       <p>The input string.</p>
5483 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5484
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5485 1
   *
5486 1
   * @return int
5487 1
   */
5488
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5489 1
  {
5490
    if ($encoding !== 'UTF-8') {
5491 5
      $encoding = self::normalize_encoding($encoding);
5492 5
    }
5493 5
5494 5
    if ($cleanUtf8 === true) {
5495 1
      // iconv and mbstring are not tolerant to invalid encoding
5496
      // further, their behaviour is inconsistent with that of PHP's substr
5497 5
5498
      $str = self::clean($str);
5499 5
    }
5500
5501
    return \mb_strwidth($str, $encoding);
5502
  }
5503
5504
  /**
5505
   * Get part of a string.
5506
   *
5507
   * @link http://php.net/manual/en/function.mb-substr.php
5508
   *
5509 2
   * @param string  $str       <p>The string being checked.</p>
5510
   * @param int     $start     <p>The first position used in str.</p>
5511 2
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5512
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5513 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5514
   *
5515
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5516 1
   */
5517 1
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5518
  {
5519 1
    // init
5520
    $str = (string)$str;
5521
5522 2
    if (!isset($str[0])) {
5523
      return '';
5524 2
    }
5525 1
5526
    if ($cleanUtf8 === true) {
5527
      // iconv and mbstring are not tolerant to invalid encoding
5528 2
      // further, their behaviour is inconsistent with that of PHP's substr
5529
5530
      $str = self::clean($str);
5531
    }
5532
5533
    $str_length = 0;
5534
    if ($start || $length === null) {
5535
      $str_length = (int)self::strlen($str);
5536
    }
5537
5538
    if ($start && $start > $str_length) {
5539
      return false;
5540 1
    }
5541
5542 1
    if ($length === null) {
5543
      $length = $str_length;
5544
    } else {
5545
      $length = (int)$length;
5546
    }
5547
5548
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5549
      self::checkForSupport();
5550
    }
5551
5552 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5553
        $encoding === 'UTF-8'
5554
        ||
5555
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5556
    ) {
5557
      $encoding = 'UTF-8';
5558
    } else {
5559
      $encoding = self::normalize_encoding($encoding);
5560
    }
5561
5562
    if (
5563
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5564
        ||
5565
        self::$support['mbstring'] === true
5566
    ) {
5567
      return \mb_substr($str, $start, $length, $encoding);
5568 20
    }
5569
5570 20
    if (self::$support['iconv'] === true) {
5571 2
      return \iconv_substr($str, $start, $length, $encoding);
5572
    }
5573
5574 2
    if (self::$support['intl'] === true) {
5575 2
      return \grapheme_substr($str, $start, $length);
5576
    }
5577 2
5578
    // fallback via vanilla php
5579
5580 20
    // split to array, and remove invalid characters
5581
    $array = self::split($str);
5582 20
5583 4
    // extract relevant part, and join to make sting again
5584
    return implode('', array_slice($array, $start, $length));
5585
  }
5586 19
5587 19
  /**
5588
   * Binary safe comparison of two strings from an offset, up to length characters.
5589
   *
5590 19
   * @param string  $main_str           <p>The main string being compared.</p>
5591 19
   * @param string  $str                <p>The secondary string being compared.</p>
5592
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5593 19
   *                                    the end of the string.</p>
5594 19
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5595 19
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5596 19
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5597
   *                                    insensitive.</p>
5598 19
   *
5599
   * @return int
5600 16
   */
5601 16
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5602 16
  {
5603 16
    $main_str = self::substr($main_str, $offset, $length);
5604 5
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5603 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5605 5
5606 5
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5603 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5604 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5603 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5604 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5607
  }
5608
5609 19
  /**
5610
   * Count the number of substring occurrences.
5611 17
   *
5612 13
   * @link  http://php.net/manual/en/function.substr-count.php
5613 13
   *
5614 13
   * @param string  $haystack  <p>The string to search in.</p>
5615 8
   * @param string  $needle    <p>The substring to search for.</p>
5616 8
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5617 8
   * @param int     $length    [optional] <p>
5618
   *                           The maximum length after the specified offset to search for the
5619
   *                           substring. It outputs a warning if the offset plus the length is
5620 19
   *                           greater than the haystack length.
5621
   *                           </p>
5622 9
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5623 4
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5624 4
   *
5625 4
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5626 6
   */
5627 6
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5628 6
  {
5629
    // init
5630
    $haystack = (string)$haystack;
5631 9
    $needle = (string)$needle;
5632 6
5633 6
    if (!isset($haystack[0], $needle[0])) {
5634 6
      return false;
5635
    }
5636
5637 19
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5638
      $offset = (int)$offset;
5639 4
      $length = (int)$length;
5640 4
5641 2
      if (
5642 2
          $length + $offset <= 0
5643 3
          &&
5644 3
          Bootup::is_php('7.1') === false
5645 3
      ) {
5646
        return false;
5647
      }
5648 4
5649 16
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5650
    }
5651 19
5652
    if ($encoding !== 'UTF-8') {
5653
      $encoding = self::normalize_encoding($encoding);
5654 19
    }
5655 19
5656
    if ($cleanUtf8 === true) {
5657 3
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5658 19
      // if invalid characters are found in $haystack before $needle
5659
      $needle = self::clean($needle);
5660 19
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5661
    }
5662
5663 19 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5664 19
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
5665 19
        ||
5666 2
        self::$support['mbstring'] === true
5667 19
    ) {
5668
      return \mb_substr_count($haystack, $needle, $encoding);
5669 19
    }
5670
5671 19
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5672
    return count($matches);
5673
  }
5674
5675
  /**
5676
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5677
   *
5678
   * @param string $haystack <p>The string to search in.</p>
5679
   * @param string $needle   <p>The substring to search for.</p>
5680
   *
5681
   * @return string <p>Return the sub-string.</p>
5682
   */
5683 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5684
  {
5685
    $haystack = (string)$haystack;
5686
    $needle = (string)$needle;
5687 26
5688
    if (!isset($haystack[0])) {
5689 26
      return '';
5690
    }
5691 26
5692 5
    if (!isset($needle[0])) {
5693
      return $haystack;
5694
    }
5695
5696 22
    if (self::str_istarts_with($haystack, $needle) === true) {
5697 6
      $haystack = self::substr($haystack, self::strlen($needle));
5698
    }
5699
5700 16
    return $haystack;
5701
  }
5702
5703
  /**
5704
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5705
   *
5706
   * @param string $haystack <p>The string to search in.</p>
5707
   * @param string $needle   <p>The substring to search for.</p>
5708
   *
5709
   * @return string <p>Return the sub-string.</p>
5710
   */
5711 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5712 14
  {
5713
    $haystack = (string)$haystack;
5714 14
    $needle = (string)$needle;
5715
5716
    if (!isset($haystack[0])) {
5717
      return '';
5718
    }
5719
5720
    if (!isset($needle[0])) {
5721
      return $haystack;
5722
    }
5723
5724
    if (self::str_iends_with($haystack, $needle) === true) {
5725
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5726
    }
5727
5728 1
    return $haystack;
5729
  }
5730 1
5731
  /**
5732
   * Removes an prefix ($needle) from start of the string ($haystack).
5733
   *
5734
   * @param string $haystack <p>The string to search in.</p>
5735
   * @param string $needle   <p>The substring to search for.</p>
5736
   *
5737
   * @return string <p>Return the sub-string.</p>
5738
   */
5739 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5740
  {
5741
    $haystack = (string)$haystack;
5742
    $needle = (string)$needle;
5743
5744 8
    if (!isset($haystack[0])) {
5745
      return '';
5746 8
    }
5747 2
5748
    if (!isset($needle[0])) {
5749
      return $haystack;
5750 7
    }
5751 7
5752 7
    if (self::str_starts_with($haystack, $needle) === true) {
5753
      $haystack = self::substr($haystack, self::strlen($needle));
5754 7
    }
5755 1
5756 1
    return $haystack;
5757 7
  }
5758
5759
  /**
5760 7
   * Replace text within a portion of a string.
5761
   *
5762 7
   * source: https://gist.github.com/stemar/8287074
5763 7
   *
5764
   * @param string|string[] $str         <p>The input string or an array of stings.</p>
5765
   * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
5766
   * @param int|int[]       $start
5767 7
   * @param int|int[]|void  $length      [optional]
5768
   *
5769
   * @return string|string[]
5770
   */
5771 1
  public static function substr_replace($str, $replacement, $start, $length = null)
5772 1
  {
5773 1
    if (is_array($str)) {
5774 7
      $num = count($str);
5775 7
5776 7
      // $replacement
5777
      if (is_array($replacement)) {
5778 7
        $replacement = array_slice($replacement, 0, $num);
5779 7
      } else {
5780
        $replacement = array_pad(array($replacement), $num, $replacement);
5781 7
      }
5782
5783
      // $start
5784 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5785
        $start = array_slice($start, 0, $num);
5786
        foreach ($start as &$valueTmp) {
5787
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5788
        }
5789
        unset($valueTmp);
5790
      } else {
5791
        $start = array_pad(array($start), $num, $start);
5792
      }
5793
5794
      // $length
5795
      if (!isset($length)) {
5796
        $length = array_fill(0, $num, 0);
5797 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5798
        $length = array_slice($length, 0, $num);
5799
        foreach ($length as &$valueTmpV2) {
5800
          if (isset($valueTmpV2)) {
5801 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5802
          } else {
5803 1
            $valueTmpV2 = 0;
5804
          }
5805 1
        }
5806 1
        unset($valueTmpV2);
5807
      } else {
5808
        $length = array_pad(array($length), $num, $length);
5809 1
      }
5810
5811 1
      // Recursive call
5812
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5813 1
    } else {
5814 1
      if (is_array($replacement)) {
5815 1
        if (count($replacement) > 0) {
5816 1
          $replacement = $replacement[0];
5817
        } else {
5818 1
          $replacement = '';
5819 1
        }
5820 1
      }
5821
    }
5822 1
5823
    preg_match_all('/./us', (string)$str, $smatches);
5824
    preg_match_all('/./us', (string)$replacement, $rmatches);
5825
5826
    if ($length === null) {
5827
      $length = (int)\mb_strlen($str);
5828
    }
5829
5830 1
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5831
5832
    return implode('', $smatches[0]);
5833
  }
5834
5835
  /**
5836
   * Removes an suffix ($needle) from end of the string ($haystack).
5837
   *
5838
   * @param string $haystack <p>The string to search in.</p>
5839
   * @param string $needle   <p>The substring to search for.</p>
5840
   *
5841
   * @return string <p>Return the sub-string.</p>
5842
   */
5843 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5844
  {
5845
    $haystack = (string)$haystack;
5846
    $needle = (string)$needle;
5847
5848
    if (!isset($haystack[0])) {
5849
      return '';
5850
    }
5851
5852
    if (!isset($needle[0])) {
5853
      return $haystack;
5854
    }
5855
5856
    if (self::str_ends_with($haystack, $needle) === true) {
5857
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
5858
    }
5859
5860
    return $haystack;
5861
  }
5862
5863
  /**
5864
   * Returns a case swapped version of the string.
5865
   *
5866
   * @param string  $str       <p>The input string.</p>
5867
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5868
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5869
   *
5870
   * @return string <p>Each character's case swapped.</p>
5871
   */
5872
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5873
  {
5874
    $str = (string)$str;
5875
5876
    if (!isset($str[0])) {
5877
      return '';
5878
    }
5879
5880
    if ($encoding !== 'UTF-8') {
5881
      $encoding = self::normalize_encoding($encoding);
5882
    }
5883
5884
    if ($cleanUtf8 === true) {
5885
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5886
      // if invalid characters are found in $haystack before $needle
5887
      $str = self::clean($str);
5888
    }
5889
5890
    $strSwappedCase = preg_replace_callback(
5891
        '/[\S]/u',
5892
        function ($match) use ($encoding) {
5893
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5894
5895
          if ($match[0] === $marchToUpper) {
5896
            return UTF8::strtolower($match[0], $encoding);
5897
          } else {
5898
            return $marchToUpper;
5899
          }
5900
        },
5901
        $str
5902
    );
5903
5904
    return $strSwappedCase;
5905
  }
5906
5907
  /**
5908
   * alias for "UTF8::to_ascii()"
5909
   *
5910
   * @see UTF8::to_ascii()
5911
   *
5912
   * @param string $s
5913
   * @param string $subst_chr
5914
   * @param bool   $strict
5915
   *
5916
   * @return string
5917
   */
5918
  public static function toAscii($s, $subst_chr = '?', $strict = false)
5919
  {
5920
    return self::to_ascii($s, $subst_chr, $strict);
5921
  }
5922
5923
  /**
5924
   * alias for "UTF8::to_iso8859()"
5925
   *
5926
   * @see UTF8::to_iso8859()
5927
   *
5928
   * @param string $str
5929
   *
5930
   * @return string|string[]
5931
   */
5932
  public static function toIso8859($str)
5933
  {
5934
    return self::to_iso8859($str);
5935
  }
5936
5937
  /**
5938
   * alias for "UTF8::to_latin1()"
5939
   *
5940
   * @see UTF8::to_latin1()
5941
   *
5942
   * @param $str
5943
   *
5944
   * @return string
5945
   */
5946
  public static function toLatin1($str)
5947
  {
5948
    return self::to_latin1($str);
5949
  }
5950
5951
  /**
5952
   * alias for "UTF8::to_utf8()"
5953
   *
5954
   * @see UTF8::to_utf8()
5955
   *
5956
   * @param string $str
5957
   *
5958
   * @return string
5959
   */
5960
  public static function toUTF8($str)
5961
  {
5962
    return self::to_utf8($str);
5963
  }
5964
5965
  /**
5966
   * Convert a string into ASCII.
5967
   *
5968
   * @param string $str     <p>The input string.</p>
5969
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
5970
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
5971
   *                        performance</p>
5972
   *
5973
   * @return string
5974
   *
5975
   * @throws \Exception
5976
   */
5977
  public static function to_ascii($str, $unknown = '?', $strict = false)
5978
  {
5979
    static $UTF8_TO_ASCII;
5980
5981
    // init
5982
    $str = (string)$str;
5983
5984
    if (!isset($str[0])) {
5985
      return '';
5986
    }
5987
5988
    $str = self::clean($str, false, true, true);
5989
5990
    // check if we only have ASCII
5991
    if (self::is_ascii($str) === true) {
5992
      return $str;
5993
    }
5994
5995
    if ($strict === true) {
5996
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5997
        self::checkForSupport();
5998
      }
5999
6000
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6001
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6002
6003
        // check again, if we only have ASCII, now ...
6004
        if (self::is_ascii($str) === true) {
6005
          return $str;
6006
        }
6007
6008
      } else {
6009
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6010
      }
6011
    }
6012
6013
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6014
    $chars = $ar[0];
6015
    foreach ($chars as &$c) {
6016
6017
      $ordC0 = ord($c[0]);
6018
6019
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6020
        continue;
6021
      }
6022
6023
      $ordC1 = ord($c[1]);
6024
6025
      // ASCII - next please
6026
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6027
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6028
      }
6029
6030
      if ($ordC0 >= 224) {
6031
        $ordC2 = ord($c[2]);
6032
6033
        if ($ordC0 <= 239) {
6034
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6035
        }
6036
6037
        if ($ordC0 >= 240) {
6038
          $ordC3 = ord($c[3]);
6039
6040
          if ($ordC0 <= 247) {
6041
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6042
          }
6043
6044
          if ($ordC0 >= 248) {
6045
            $ordC4 = ord($c[4]);
6046
6047 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6048
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6049
            }
6050
6051
            if ($ordC0 >= 252) {
6052
              $ordC5 = ord($c[5]);
6053
6054 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6055
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6056
              }
6057 1
            }
6058
          }
6059 1
        }
6060
      }
6061
6062
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6063
        $c = $unknown;
6064
        continue;
6065
      }
6066
6067
      if (!isset($ord)) {
6068
        $c = $unknown;
6069 6
        continue;
6070
      }
6071 6
6072 6
      $bank = $ord >> 8;
6073
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6074 6
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6075
        if (file_exists($bankfile)) {
6076 6
          /** @noinspection PhpIncludeInspection */
6077 3
          require $bankfile;
6078
        } else {
6079
          $UTF8_TO_ASCII[$bank] = array();
6080
        }
6081 6
      }
6082
6083 6
      $newchar = $ord & 255;
6084 1
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6085 1
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6086 1
      } else {
6087
        $c = $unknown;
6088 6
      }
6089
    }
6090
6091
    return implode('', $chars);
6092
  }
6093
6094
  /**
6095
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6096
   *
6097
   * @param string|string[] $str
6098 6
   *
6099
   * @return string|string[]
6100 6
   */
6101
  public static function to_iso8859($str)
6102 6
  {
6103 6
    if (is_array($str)) {
6104
6105
      /** @noinspection ForeachSourceInspection */
6106 5
      foreach ($str as $k => $v) {
6107 5
        /** @noinspection AlterInForeachInspection */
6108
        /** @noinspection OffsetOperationsInspection */
6109 5
        $str[$k] = self::to_iso8859($v);
6110 1
      }
6111 1
6112 1
      return $str;
6113
    }
6114 5
6115
    $str = (string)$str;
6116
6117
    if (!isset($str[0])) {
6118
      return '';
6119
    }
6120
6121
    return self::utf8_decode($str);
6122
  }
6123
6124
  /**
6125
   * alias for "UTF8::to_iso8859()"
6126
   *
6127
   * @see UTF8::to_iso8859()
6128
   *
6129
   * @param string|string[] $str
6130
   *
6131
   * @return string|string[]
6132
   */
6133
  public static function to_latin1($str)
6134
  {
6135
    return self::to_iso8859($str);
6136
  }
6137
6138
  /**
6139
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6140
   *
6141
   * - It decode UTF-8 codepoints and unicode escape sequences.
6142
   *
6143
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6144 1
   *
6145
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6146 1
   *
6147
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6148
   *    are followed by any of these:  ("group B")
6149
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6150
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6151
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6152
   * is also a valid unicode character, and will be left unchanged.
6153
   *
6154
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6155
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6156
   *
6157
   * @param string|string[] $str <p>Any string or array.</p>
6158 1
   *
6159
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6160 1
   */
6161
  public static function to_utf8($str)
6162 1
  {
6163 1
    if (is_array($str)) {
6164
      /** @noinspection ForeachSourceInspection */
6165
      foreach ($str as $k => $v) {
6166 1
        /** @noinspection AlterInForeachInspection */
6167
        /** @noinspection OffsetOperationsInspection */
6168 1
        $str[$k] = self::to_utf8($v);
6169 1
      }
6170
6171
      return $str;
6172 1
    }
6173
6174
    $str = (string)$str;
6175 1
6176 1
    if (!isset($str[0])) {
6177 1
      return $str;
6178 1
    }
6179 1
6180
    $max = strlen($str);
6181
    $buf = '';
6182 1
6183
    /** @noinspection ForeachInvariantsInspection */
6184
    for ($i = 0; $i < $max; $i++) {
6185
      $c1 = $str[$i];
6186
6187
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6188
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6189
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6190
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6191
6192
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6193
6194
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6195
            $buf .= $c1 . $c2;
6196
            $i++;
6197
          } else { // not valid UTF8 - convert it
6198
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6199
            $cc2 = ($c1 & "\x3f") | "\x80";
6200
            $buf .= $cc1 . $cc2;
6201 10
          }
6202
6203 10 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6204 10
6205
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6206 10
            $buf .= $c1 . $c2 . $c3;
6207 3
            $i += 2;
6208
          } else { // not valid UTF8 - convert it
6209
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6210 8
            $cc2 = ($c1 & "\x3f") | "\x80";
6211 8
            $buf .= $cc1 . $cc2;
6212 8
          }
6213
6214 8
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6215
6216 8 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6217
            $buf .= $c1 . $c2 . $c3 . $c4;
6218 8
            $i += 3;
6219 1
          } else { // not valid UTF8 - convert it
6220 1
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6221 1
            $cc2 = ($c1 & "\x3f") | "\x80";
6222
            $buf .= $cc1 . $cc2;
6223 8
          }
6224 8
6225
        } else { // doesn't look like UTF8, but should be converted
6226 8
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6227 8
          $cc2 = (($c1 & "\x3f") | "\x80");
6228 8
          $buf .= $cc1 . $cc2;
6229 8
        }
6230 8
6231
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6232 8
6233 8
        $ordC1 = ord($c1);
6234 8
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6235 8
          $buf .= self::$win1252ToUtf8[$ordC1];
6236
        } else {
6237 8
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6238 6
          $cc2 = (($c1 & "\x3f") | "\x80");
6239 6
          $buf .= $cc1 . $cc2;
6240 6
        }
6241 6
6242
      } else { // it doesn't need conversion
6243 6
        $buf .= $c1;
6244 3
      }
6245 3
    }
6246
6247 6
    // decode unicode escape sequences
6248 6
    $buf = preg_replace_callback(
6249
        '/\\\\u([0-9a-f]{4})/i',
6250 8
        function ($match) {
6251
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6252
        },
6253
        $buf
6254
    );
6255
6256
    // decode UTF-8 codepoints
6257
    $buf = preg_replace_callback(
6258 1
        '/&#\d{2,6};/',
6259
        function ($match) {
6260 1
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6261
        },
6262
        $buf
6263
    );
6264
6265
    return $buf;
6266
  }
6267
6268
  /**
6269
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6270
   *
6271
   * INFO: This is slower then "trim()"
6272
   *
6273
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6274
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6275
   *
6276
   * @param string $str   <p>The string to be trimmed</p>
6277
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6278
   *
6279
   * @return string <p>The trimmed string.</p>
6280
   */
6281
  public static function trim($str = '', $chars = INF)
6282
  {
6283
    $str = (string)$str;
6284
6285
    if (!isset($str[0])) {
6286
      return '';
6287
    }
6288
6289
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6290
    if ($chars === INF || !$chars) {
6291
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6292
    }
6293
6294
    return self::rtrim(self::ltrim($str, $chars), $chars);
6295
  }
6296
6297
  /**
6298
   * Makes string's first char uppercase.
6299
   *
6300
   * @param string  $str       <p>The input string.</p>
6301
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6302
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6303
   *
6304
   * @return string <p>The resulting string</p>
6305
   */
6306
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6307
  {
6308
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6309
  }
6310
6311
  /**
6312
   * alias for "UTF8::ucfirst()"
6313
   *
6314
   * @see UTF8::ucfirst()
6315
   *
6316
   * @param string  $word
6317
   * @param string  $encoding
6318
   * @param boolean $cleanUtf8
6319
   *
6320
   * @return string
6321
   */
6322
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6323
  {
6324
    return self::ucfirst($word, $encoding, $cleanUtf8);
6325
  }
6326
6327
  /**
6328
   * Uppercase for all words in the string.
6329
   *
6330
   * @param string   $str        <p>The input string.</p>
6331
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6332
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6333
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6334
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6335
   *
6336
   * @return string
6337
   */
6338
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6339
  {
6340
    if (!$str) {
6341
      return '';
6342
    }
6343
6344
    $charlist = self::rxClass($charlist, '\pL');
6345
    $words = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
6346
    $newwords = array();
6347
6348
    if (count($exceptions) > 0) {
6349
      $useExceptions = true;
6350
    } else {
6351
      $useExceptions = false;
6352
    }
6353
6354
    foreach ($words as $word) {
6355
6356
      if (!$word) {
6357
        continue;
6358
      }
6359
6360
      if (
6361
          ($useExceptions === false)
6362
          ||
6363
          (
6364
              $useExceptions === true
6365
              &&
6366
              !in_array($word, $exceptions, true)
6367
          )
6368
      ) {
6369
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6370
      }
6371
6372
      $newwords[] = $word;
6373
    }
6374
6375
    return implode('', $newwords);
6376
  }
6377
6378
  /**
6379
   * Multi decode html entity & fix urlencoded-win1252-chars.
6380
   *
6381
   * e.g:
6382
   * 'test+test'                     => 'test test'
6383
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6384
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6385
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6386
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6387
   * 'Düsseldorf'                   => 'Düsseldorf'
6388
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6389
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6390
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6391
   *
6392
   * @param string $str          <p>The input string.</p>
6393
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6394
   *
6395
   * @return string
6396
   */
6397 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6398
  {
6399
    $str = (string)$str;
6400
6401
    if (!isset($str[0])) {
6402
      return '';
6403
    }
6404
6405
    $pattern = '/%u([0-9a-f]{3,4})/i';
6406
    if (preg_match($pattern, $str)) {
6407
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6408
    }
6409
6410
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6411
6412
    do {
6413
      $str_compare = $str;
6414
6415
      $str = self::fix_simple_utf8(
6416
          urldecode(
6417
              self::html_entity_decode(
6418
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6419
                  $flags
6420
              )
6421
          )
6422
      );
6423
6424
    } while ($multi_decode === true && $str_compare !== $str);
6425
6426
    return (string)$str;
6427
  }
6428
6429
  /**
6430
   * Multi decode html entity & fix urlencoded-win1252-chars.
6431
   *
6432
   * e.g:
6433
   * 'test+test'                     => 'test+test'
6434
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6435
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6436
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6437
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6438
   * 'Düsseldorf'                   => 'Düsseldorf'
6439
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6440
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6441
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6442
   *
6443
   * @param string $str          <p>The input string.</p>
6444
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6445
   *
6446
   * @return string
6447
   */
6448 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6449
  {
6450
    $str = (string)$str;
6451
6452
    if (!isset($str[0])) {
6453
      return '';
6454
    }
6455
6456
    $pattern = '/%u([0-9a-f]{3,4})/i';
6457
    if (preg_match($pattern, $str)) {
6458
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
6459
    }
6460
6461
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6462
6463
    do {
6464
      $str_compare = $str;
6465
6466
      $str = self::fix_simple_utf8(
6467
          rawurldecode(
6468
              self::html_entity_decode(
6469
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6470
                  $flags
6471
              )
6472
          )
6473
      );
6474
6475
    } while ($multi_decode === true && $str_compare !== $str);
6476
6477
    return (string)$str;
6478
  }
6479
6480
  /**
6481
   * Return a array with "urlencoded"-win1252 -> UTF-8
6482
   *
6483
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6484
   *
6485
   * @return array
6486
   */
6487
  public static function urldecode_fix_win1252_chars()
6488
  {
6489
    static $array = array(
6490
        '%20' => ' ',
6491
        '%21' => '!',
6492
        '%22' => '"',
6493
        '%23' => '#',
6494
        '%24' => '$',
6495
        '%25' => '%',
6496
        '%26' => '&',
6497
        '%27' => "'",
6498
        '%28' => '(',
6499
        '%29' => ')',
6500
        '%2A' => '*',
6501
        '%2B' => '+',
6502
        '%2C' => ',',
6503
        '%2D' => '-',
6504
        '%2E' => '.',
6505
        '%2F' => '/',
6506
        '%30' => '0',
6507
        '%31' => '1',
6508
        '%32' => '2',
6509
        '%33' => '3',
6510
        '%34' => '4',
6511
        '%35' => '5',
6512
        '%36' => '6',
6513
        '%37' => '7',
6514
        '%38' => '8',
6515
        '%39' => '9',
6516
        '%3A' => ':',
6517
        '%3B' => ';',
6518
        '%3C' => '<',
6519
        '%3D' => '=',
6520
        '%3E' => '>',
6521
        '%3F' => '?',
6522
        '%40' => '@',
6523
        '%41' => 'A',
6524
        '%42' => 'B',
6525
        '%43' => 'C',
6526
        '%44' => 'D',
6527
        '%45' => 'E',
6528
        '%46' => 'F',
6529
        '%47' => 'G',
6530
        '%48' => 'H',
6531
        '%49' => 'I',
6532
        '%4A' => 'J',
6533
        '%4B' => 'K',
6534
        '%4C' => 'L',
6535
        '%4D' => 'M',
6536
        '%4E' => 'N',
6537
        '%4F' => 'O',
6538
        '%50' => 'P',
6539
        '%51' => 'Q',
6540
        '%52' => 'R',
6541
        '%53' => 'S',
6542
        '%54' => 'T',
6543
        '%55' => 'U',
6544
        '%56' => 'V',
6545
        '%57' => 'W',
6546
        '%58' => 'X',
6547
        '%59' => 'Y',
6548
        '%5A' => 'Z',
6549
        '%5B' => '[',
6550
        '%5C' => '\\',
6551
        '%5D' => ']',
6552
        '%5E' => '^',
6553
        '%5F' => '_',
6554
        '%60' => '`',
6555
        '%61' => 'a',
6556
        '%62' => 'b',
6557
        '%63' => 'c',
6558
        '%64' => 'd',
6559
        '%65' => 'e',
6560
        '%66' => 'f',
6561
        '%67' => 'g',
6562
        '%68' => 'h',
6563
        '%69' => 'i',
6564
        '%6A' => 'j',
6565
        '%6B' => 'k',
6566
        '%6C' => 'l',
6567
        '%6D' => 'm',
6568
        '%6E' => 'n',
6569
        '%6F' => 'o',
6570
        '%70' => 'p',
6571
        '%71' => 'q',
6572
        '%72' => 'r',
6573
        '%73' => 's',
6574
        '%74' => 't',
6575
        '%75' => 'u',
6576
        '%76' => 'v',
6577
        '%77' => 'w',
6578
        '%78' => 'x',
6579
        '%79' => 'y',
6580
        '%7A' => 'z',
6581
        '%7B' => '{',
6582
        '%7C' => '|',
6583
        '%7D' => '}',
6584
        '%7E' => '~',
6585
        '%7F' => '',
6586
        '%80' => '`',
6587
        '%81' => '',
6588
        '%82' => '‚',
6589
        '%83' => 'ƒ',
6590
        '%84' => '„',
6591
        '%85' => '…',
6592
        '%86' => '†',
6593
        '%87' => '‡',
6594
        '%88' => 'ˆ',
6595
        '%89' => '‰',
6596
        '%8A' => 'Š',
6597
        '%8B' => '‹',
6598
        '%8C' => 'Œ',
6599
        '%8D' => '',
6600
        '%8E' => 'Ž',
6601
        '%8F' => '',
6602
        '%90' => '',
6603
        '%91' => '‘',
6604
        '%92' => '’',
6605
        '%93' => '“',
6606
        '%94' => '”',
6607
        '%95' => '•',
6608
        '%96' => '–',
6609
        '%97' => '—',
6610
        '%98' => '˜',
6611
        '%99' => '™',
6612
        '%9A' => 'š',
6613
        '%9B' => '›',
6614
        '%9C' => 'œ',
6615
        '%9D' => '',
6616
        '%9E' => 'ž',
6617
        '%9F' => 'Ÿ',
6618
        '%A0' => '',
6619
        '%A1' => '¡',
6620
        '%A2' => '¢',
6621
        '%A3' => '£',
6622
        '%A4' => '¤',
6623
        '%A5' => '¥',
6624
        '%A6' => '¦',
6625
        '%A7' => '§',
6626
        '%A8' => '¨',
6627
        '%A9' => '©',
6628
        '%AA' => 'ª',
6629
        '%AB' => '«',
6630
        '%AC' => '¬',
6631
        '%AD' => '',
6632
        '%AE' => '®',
6633
        '%AF' => '¯',
6634
        '%B0' => '°',
6635
        '%B1' => '±',
6636
        '%B2' => '²',
6637
        '%B3' => '³',
6638
        '%B4' => '´',
6639
        '%B5' => 'µ',
6640
        '%B6' => '¶',
6641
        '%B7' => '·',
6642
        '%B8' => '¸',
6643
        '%B9' => '¹',
6644
        '%BA' => 'º',
6645
        '%BB' => '»',
6646
        '%BC' => '¼',
6647
        '%BD' => '½',
6648
        '%BE' => '¾',
6649
        '%BF' => '¿',
6650
        '%C0' => 'À',
6651
        '%C1' => 'Á',
6652
        '%C2' => 'Â',
6653
        '%C3' => 'Ã',
6654
        '%C4' => 'Ä',
6655
        '%C5' => 'Å',
6656
        '%C6' => 'Æ',
6657
        '%C7' => 'Ç',
6658
        '%C8' => 'È',
6659
        '%C9' => 'É',
6660
        '%CA' => 'Ê',
6661
        '%CB' => 'Ë',
6662
        '%CC' => 'Ì',
6663
        '%CD' => 'Í',
6664
        '%CE' => 'Î',
6665
        '%CF' => 'Ï',
6666
        '%D0' => 'Ð',
6667
        '%D1' => 'Ñ',
6668
        '%D2' => 'Ò',
6669
        '%D3' => 'Ó',
6670
        '%D4' => 'Ô',
6671
        '%D5' => 'Õ',
6672
        '%D6' => 'Ö',
6673
        '%D7' => '×',
6674
        '%D8' => 'Ø',
6675
        '%D9' => 'Ù',
6676
        '%DA' => 'Ú',
6677
        '%DB' => 'Û',
6678
        '%DC' => 'Ü',
6679
        '%DD' => 'Ý',
6680
        '%DE' => 'Þ',
6681
        '%DF' => 'ß',
6682
        '%E0' => 'à',
6683
        '%E1' => 'á',
6684
        '%E2' => 'â',
6685
        '%E3' => 'ã',
6686
        '%E4' => 'ä',
6687
        '%E5' => 'å',
6688
        '%E6' => 'æ',
6689
        '%E7' => 'ç',
6690
        '%E8' => 'è',
6691
        '%E9' => 'é',
6692
        '%EA' => 'ê',
6693
        '%EB' => 'ë',
6694
        '%EC' => 'ì',
6695
        '%ED' => 'í',
6696
        '%EE' => 'î',
6697
        '%EF' => 'ï',
6698
        '%F0' => 'ð',
6699
        '%F1' => 'ñ',
6700
        '%F2' => 'ò',
6701
        '%F3' => 'ó',
6702
        '%F4' => 'ô',
6703
        '%F5' => 'õ',
6704
        '%F6' => 'ö',
6705
        '%F7' => '÷',
6706
        '%F8' => 'ø',
6707
        '%F9' => 'ù',
6708
        '%FA' => 'ú',
6709
        '%FB' => 'û',
6710
        '%FC' => 'ü',
6711
        '%FD' => 'ý',
6712
        '%FE' => 'þ',
6713
        '%FF' => 'ÿ',
6714
    );
6715
6716
    return $array;
6717
  }
6718
6719
  /**
6720
   * Decodes an UTF-8 string to ISO-8859-1.
6721
   *
6722
   * @param string $str <p>The input string.</p>
6723
   *
6724
   * @return string
6725
   */
6726
  public static function utf8_decode($str)
6727
  {
6728
    // init
6729
    $str = (string)$str;
6730
6731
    if (!isset($str[0])) {
6732
      return '';
6733
    }
6734
6735
    $str = (string)self::to_utf8($str);
6736
6737
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6738
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6739
6740
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6741
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
6742
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
6743
    }
6744
6745
    /** @noinspection PhpInternalEntityUsedInspection */
6746
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
6747
  }
6748
6749
  /**
6750
   * Encodes an ISO-8859-1 string to UTF-8.
6751
   *
6752
   * @param string $str <p>The input string.</p>
6753
   *
6754
   * @return string
6755
   */
6756
  public static function utf8_encode($str)
6757
  {
6758
    // init
6759
    $str = (string)$str;
6760
6761
    if (!isset($str[0])) {
6762
      return '';
6763
    }
6764
6765
    $str = \utf8_encode($str);
6766
6767
    if (false === strpos($str, "\xC2")) {
6768
      return $str;
6769
    } else {
6770
6771
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
6772
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
6773
6774
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
6775
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
6776
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
6777
      }
6778
6779
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
6780
    }
6781
  }
6782
6783
  /**
6784
   * fix -> utf8-win1252 chars
6785
   *
6786
   * @param string $str <p>The input string.</p>
6787
   *
6788
   * @return string
6789
   *
6790
   * @deprecated use "UTF8::fix_simple_utf8()"
6791
   */
6792
  public static function utf8_fix_win1252_chars($str)
6793
  {
6794
    return self::fix_simple_utf8($str);
6795
  }
6796
6797
  /**
6798
   * Returns an array with all utf8 whitespace characters.
6799
   *
6800
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6801
   *
6802
   * @author: Derek E. [email protected]
6803
   *
6804
   * @return array <p>
6805
   *               An array with all known whitespace characters as values and the type of whitespace as keys
6806
   *               as defined in above URL.
6807
   *               </p>
6808
   */
6809
  public static function whitespace_table()
6810
  {
6811
    return self::$whitespaceTable;
6812
  }
6813
6814
  /**
6815
   * Limit the number of words in a string.
6816
   *
6817
   * @param string $str      <p>The input string.</p>
6818
   * @param int    $words    <p>The limit of words as integer.</p>
6819
   * @param string $strAddOn <p>Replacement for the striped string.</p>
6820
   *
6821
   * @return string
6822
   */
6823
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6824
  {
6825
    $str = (string)$str;
6826
6827
    if (!isset($str[0])) {
6828
      return '';
6829
    }
6830
6831
    $words = (int)$words;
6832
6833
    if ($words < 1) {
6834
      return '';
6835
    }
6836
6837
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6838
6839
    if (
6840
        !isset($matches[0])
6841
        ||
6842
        self::strlen($str) === self::strlen($matches[0])
6843
    ) {
6844
      return $str;
6845
    }
6846
6847
    return self::rtrim($matches[0]) . $strAddOn;
6848
  }
6849
6850
  /**
6851
   * Wraps a string to a given number of characters
6852
   *
6853
   * @link  http://php.net/manual/en/function.wordwrap.php
6854
   *
6855
   * @param string $str   <p>The input string.</p>
6856
   * @param int    $width [optional] <p>The column width.</p>
6857
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
6858
   * @param bool   $cut   [optional] <p>
6859
   *                      If the cut is set to true, the string is
6860
   *                      always wrapped at or before the specified width. So if you have
6861
   *                      a word that is larger than the given width, it is broken apart.
6862
   *                      </p>
6863
   *
6864
   * @return string <p>The given string wrapped at the specified column.</p>
6865
   */
6866
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6867
  {
6868
    $str = (string)$str;
6869
    $break = (string)$break;
6870
6871
    if (!isset($str[0], $break[0])) {
6872
      return '';
6873
    }
6874
6875
    $w = '';
6876
    $strSplit = explode($break, $str);
6877
    $count = count($strSplit);
6878
6879
    $chars = array();
6880
    /** @noinspection ForeachInvariantsInspection */
6881
    for ($i = 0; $i < $count; ++$i) {
6882
6883
      if ($i) {
6884
        $chars[] = $break;
6885
        $w .= '#';
6886
      }
6887
6888
      $c = $strSplit[$i];
6889
      unset($strSplit[$i]);
6890
6891
      foreach (self::split($c) as $c) {
6892
        $chars[] = $c;
6893
        $w .= ' ' === $c ? ' ' : '?';
6894
      }
6895
    }
6896
6897
    $strReturn = '';
6898
    $j = 0;
6899
    $b = $i = -1;
6900
    $w = wordwrap($w, $width, '#', $cut);
6901
6902
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6903
      for (++$i; $i < $b; ++$i) {
6904
        $strReturn .= $chars[$j];
6905
        unset($chars[$j++]);
6906
      }
6907
6908
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6909
        unset($chars[$j++]);
6910
      }
6911
6912
      $strReturn .= $break;
6913
    }
6914
6915
    return $strReturn . implode('', $chars);
6916
  }
6917
6918
  /**
6919
   * Returns an array of Unicode White Space characters.
6920
   *
6921
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
6922
   */
6923
  public static function ws()
6924
  {
6925
    return self::$whitespace;
6926
  }
6927
6928
}
6929