Completed
Push — master ( 34603f...754113 )
by Lars
03:59
created

UTF8   D

Complexity

Total Complexity 660

Size/Duplication

Total Lines 6156
Duplicated Lines 5.15 %

Coupling/Cohesion

Components 3
Dependencies 3

Test Coverage

Coverage 75.31%

Importance

Changes 62
Bugs 18 Features 14
Metric Value
wmc 660
c 62
b 18
f 14
lcom 3
cbo 3
dl 317
loc 6156
ccs 1165
cts 1547
cp 0.7531
rs 4.4102

145 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A access() 0 6 1
A add_bom_to_string() 0 8 2
A bom() 0 4 1
A callback() 0 4 1
B case_table() 0 1001 1
A checkForSupport() 0 10 2
A chr() 0 13 3
A chr_map() 0 6 1
A chr_size_list() 0 8 2
B chr_to_decimal() 0 32 6
A chr_to_hex() 0 4 1
A chunk_split() 0 4 1
B clean() 0 36 4
A cleanup() 0 23 2
B codepoints() 0 26 3
A count_chars() 0 8 1
A decimal_to_chr() 0 10 1
A encode() 0 14 3
C file_get_contents() 0 47 8
A file_has_bom() 0 4 1
C filter() 34 43 13
A filter_input() 10 10 2
A filter_input_array() 10 10 2
A filter_var() 10 10 2
A filter_var_array() 10 10 2
A fits_inside() 0 4 1
A fix_simple_utf8() 0 18 3
A fix_utf8() 0 20 4
D getCharDirection() 0 92 115
A getData() 0 9 2
B hash() 0 40 5
A hex_to_int() 0 8 2
A html_encode() 0 12 1
B html_entity_decode() 0 36 6
A entityCallback() 0 12 2
A htmlentities() 0 4 1
A htmlspecialchars() 0 4 1
A iconv_loaded() 0 4 2
A int_to_hex() 0 12 3
A intl_loaded() 0 4 2
A isAscii() 0 4 1
A isBase64() 0 4 1
A isBom() 0 4 1
A isJson() 0 18 4
A isUtf8() 0 4 1
A is_ascii() 0 4 1
A is_base64() 0 14 3
B is_binary() 0 17 5
A is_binary_file() 0 12 2
A is_bom() 0 4 1
C is_utf16() 47 47 14
C is_utf32() 47 47 14
D is_utf8() 21 124 22
A json_decode() 0 12 2
A json_encode() 0 12 2
A lcfirst() 0 4 1
A ltrim() 12 12 3
A max() 8 8 2
A max_chr_width() 0 9 2
A mbstring_loaded() 0 10 2
A min() 8 8 2
B normalizeEncoding() 0 28 2
A normalize_msword() 0 12 2
A normalize_whitespace() 0 18 3
A number_format() 0 20 4
C ord() 0 23 9
A parse_str() 0 9 1
A pcre_utf8_support() 0 5 1
D range() 14 38 9
B removeBOM() 15 40 6
A remove_duplicates() 0 14 4
A remove_invisible_characters() 0 20 3
A replace_diamond_question_mark() 0 14 1
A rtrim() 12 12 3
C rxClass() 0 36 7
A showSupport() 0 6 2
A single_chr_html_encode() 0 8 2
C split() 12 69 22
C str_detect_encoding() 0 71 13
A str_ireplace() 0 18 3
B str_limit_after_word() 0 29 5
C str_pad() 9 35 7
A str_repeat() 0 6 1
A str_replace() 0 4 1
A str_shuffle() 0 8 1
A str_sort() 0 16 3
C str_split() 0 41 7
A str_to_binary() 0 19 3
F str_transliterate() 6 92 21
B str_word_count() 0 25 5
A strcasecmp() 0 4 1
A strcmp() 0 7 2
B strcspn() 0 19 5
A string() 0 12 1
A string_has_bom() 0 4 1
A strip_tags() 0 7 1
B stripos() 0 24 6
A stristr() 0 11 2
B strlen() 0 22 6
A strnatcasecmp() 0 4 1
A strnatcmp() 0 4 2
A strncasecmp() 0 4 1
A strncmp() 0 4 1
A strpbrk() 0 8 2
C strpos() 6 54 13
A strrchr() 0 6 1
A strrev() 0 4 1
A strrichr() 0 6 1
A strripos() 0 4 1
C strrpos() 6 51 12
A strspn() 0 8 4
A strstr() 0 6 1
B strtocasefold() 0 27 4
A strtolower() 0 13 2
A strtonatfold() 0 4 1
B strtoupper() 0 31 4
A strtr() 0 19 4
A strwidth() 0 7 1
C substr() 0 46 8
A substr_compare() 0 7 2
A substr_count() 0 16 4
C substr_replace() 0 66 14
B swapCase() 0 26 3
A toAscii() 0 4 1
A toLatin1() 0 4 1
A toUTF8() 0 4 1
C to_ascii() 0 71 15
A to_iso8859() 0 4 1
A to_latin1() 0 4 1
D to_utf8() 20 106 26
A to_win1252() 0 16 4
A trim() 0 15 4
A ucfirst() 0 4 1
A ucword() 0 4 1
C ucwords() 0 33 7
A urldecode() 0 23 3
B urldecode_fix_win1252_chars() 0 231 1
A utf8_decode() 0 23 3
A utf8_encode() 0 19 3
A utf8_fix_win1252_chars() 0 4 1
A whitespace_table() 0 4 1
A words_limit() 0 20 4
C wordwrap() 0 56 13
A ws() 0 4 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complex Class

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Intl\Normalizer\Normalizer;
7
use Symfony\Polyfill\Xml\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * @var array
86
   */
87
  protected static $whitespace = array(
88
      0     => "\x0",
89
      //NUL Byte
90
      9     => "\x9",
91
      //Tab
92
      10    => "\xa",
93
      //New Line
94
      11    => "\xb",
95
      //Vertical Tab
96
      13    => "\xd",
97
      //Carriage Return
98
      32    => "\x20",
99
      //Ordinary Space
100
      160   => "\xc2\xa0",
101
      //NO-BREAK SPACE
102
      5760  => "\xe1\x9a\x80",
103
      //OGHAM SPACE MARK
104
      6158  => "\xe1\xa0\x8e",
105
      //MONGOLIAN VOWEL SEPARATOR
106
      8192  => "\xe2\x80\x80",
107
      //EN QUAD
108
      8193  => "\xe2\x80\x81",
109
      //EM QUAD
110
      8194  => "\xe2\x80\x82",
111
      //EN SPACE
112
      8195  => "\xe2\x80\x83",
113
      //EM SPACE
114
      8196  => "\xe2\x80\x84",
115
      //THREE-PER-EM SPACE
116
      8197  => "\xe2\x80\x85",
117
      //FOUR-PER-EM SPACE
118
      8198  => "\xe2\x80\x86",
119
      //SIX-PER-EM SPACE
120
      8199  => "\xe2\x80\x87",
121
      //FIGURE SPACE
122
      8200  => "\xe2\x80\x88",
123
      //PUNCTUATION SPACE
124
      8201  => "\xe2\x80\x89",
125
      //THIN SPACE
126
      8202  => "\xe2\x80\x8a",
127
      //HAIR SPACE
128
      8232  => "\xe2\x80\xa8",
129
      //LINE SEPARATOR
130
      8233  => "\xe2\x80\xa9",
131
      //PARAGRAPH SEPARATOR
132
      8239  => "\xe2\x80\xaf",
133
      //NARROW NO-BREAK SPACE
134
      8287  => "\xe2\x81\x9f",
135
      //MEDIUM MATHEMATICAL SPACE
136
      12288 => "\xe3\x80\x80"
137
      //IDEOGRAPHIC SPACE
138
  );
139
140
  /**
141
   * @var array
142
   */
143
  protected static $whitespaceTable = array(
144
      'SPACE'                     => "\x20",
145
      'NO-BREAK SPACE'            => "\xc2\xa0",
146
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
147
      'EN QUAD'                   => "\xe2\x80\x80",
148
      'EM QUAD'                   => "\xe2\x80\x81",
149
      'EN SPACE'                  => "\xe2\x80\x82",
150
      'EM SPACE'                  => "\xe2\x80\x83",
151
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
152
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
153
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
154
      'FIGURE SPACE'              => "\xe2\x80\x87",
155
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
156
      'THIN SPACE'                => "\xe2\x80\x89",
157
      'HAIR SPACE'                => "\xe2\x80\x8a",
158
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
159
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
160
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
161
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  protected static $commonCaseFold = array(
168
      'ſ'            => 's',
169
      "\xCD\x85"     => 'ι',
170
      'ς'            => 'σ',
171
      "\xCF\x90"     => 'β',
172
      "\xCF\x91"     => 'θ',
173
      "\xCF\x95"     => 'φ',
174
      "\xCF\x96"     => 'π',
175
      "\xCF\xB0"     => 'κ',
176
      "\xCF\xB1"     => 'ρ',
177
      "\xCF\xB5"     => 'ε',
178
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
179
      "\xE1\xBE\xBE" => 'ι',
180
  );
181
182
  /**
183
   * @var array
184
   */
185
  protected static $brokenUtf8ToUtf8 = array(
186
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
187
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
188
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
189
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
190
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
191
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
192
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
193
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
194
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
195
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
196
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
197
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
198
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
199
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
200
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
201
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
202
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
203
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
204
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
205
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
206
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
207
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
208
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
209
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
210
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
211
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
212
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
213
      'ü'       => 'ü',
214
      'ä'       => 'ä',
215
      'ö'       => 'ö',
216
      'Ö'       => 'Ö',
217
      'ß'       => 'ß',
218
      'Ã '       => 'à',
219
      'á'       => 'á',
220
      'â'       => 'â',
221
      'ã'       => 'ã',
222
      'ù'       => 'ù',
223
      'ú'       => 'ú',
224
      'û'       => 'û',
225
      'Ù'       => 'Ù',
226
      'Ú'       => 'Ú',
227
      'Û'       => 'Û',
228
      'Ü'       => 'Ü',
229
      'ò'       => 'ò',
230
      'ó'       => 'ó',
231
      'ô'       => 'ô',
232
      'è'       => 'è',
233
      'é'       => 'é',
234
      'ê'       => 'ê',
235
      'ë'       => 'ë',
236
      'À'       => 'À',
237
      'Á'       => 'Á',
238
      'Â'       => 'Â',
239
      'Ã'       => 'Ã',
240
      'Ä'       => 'Ä',
241
      'Ã…'       => 'Å',
242
      'Ç'       => 'Ç',
243
      'È'       => 'È',
244
      'É'       => 'É',
245
      'Ê'       => 'Ê',
246
      'Ë'       => 'Ë',
247
      'ÃŒ'       => 'Ì',
248
      'Í'       => 'Í',
249
      'ÃŽ'       => 'Î',
250
      'Ï'       => 'Ï',
251
      'Ñ'       => 'Ñ',
252
      'Ã’'       => 'Ò',
253
      'Ó'       => 'Ó',
254
      'Ô'       => 'Ô',
255
      'Õ'       => 'Õ',
256
      'Ø'       => 'Ø',
257
      'Ã¥'       => 'å',
258
      'æ'       => 'æ',
259
      'ç'       => 'ç',
260
      'ì'       => 'ì',
261
      'í'       => 'í',
262
      'î'       => 'î',
263
      'ï'       => 'ï',
264
      'ð'       => 'ð',
265
      'ñ'       => 'ñ',
266
      'õ'       => 'õ',
267
      'ø'       => 'ø',
268
      'ý'       => 'ý',
269
      'ÿ'       => 'ÿ',
270
      '€'      => '€',
271
  );
272
273
  /**
274
   * @var array
275
   */
276
  protected static $utf8ToWin1252 = array(
277
      "\xe2\x82\xac" => "\x80", // EURO SIGN
278
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
279
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
280
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
281
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
282
      "\xe2\x80\xa0" => "\x86", // DAGGER
283
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
284
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
285
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
286
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
287
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
288
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
289
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
290
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
291
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
292
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
293
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
294
      "\xe2\x80\xa2" => "\x95", // BULLET
295
      "\xe2\x80\x93" => "\x96", // EN DASH
296
      "\xe2\x80\x94" => "\x97", // EM DASH
297
      "\xcb\x9c"     => "\x98", // SMALL TILDE
298
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
299
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
300
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
301
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
302
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
303
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
304
  );
305
306
  /**
307
   * @var array
308
   */
309
  protected static $utf8MSWord = array(
310
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
311
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
312
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
313
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
314
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
315
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
316
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
317
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
318
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
319
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
320
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
321
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
322
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
323
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
324
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
325
  );
326
327
  /**
328
   * @var array
329
   */
330
  private static $support = array();
331
332
  /**
333
   * __construct()
334
   */
335 1
  public function __construct()
336
  {
337 1
    self::checkForSupport();
338 1
  }
339
340
  /**
341
   * Returns a single UTF-8 character from string.
342
   *
343 151
   * @param    string $str A UTF-8 string.
344
   * @param    int    $pos The position of character to return.
345 151
   *
346
   * @return   string Single Multi-Byte character.
347 1
   */
348 1
  public static function access($str, $pos)
349 1
  {
350 1
    // Return the character at the specified position: $str[1] like functionality.
351
352 1
    return self::substr($str, $pos, 1);
353 1
  }
354 1
355 1
  /**
356 151
   * Prepends BOM character to the string and returns the whole string.
357
   *
358
   * INFO: If BOM already existed there, the Input string is returned.
359
   *
360
   * @param    string $str The input string
361
   *
362
   * @return   string The output string that contains BOM
363 2
   */
364
  public static function add_bom_to_string($str)
365 2
  {
366
    if (!self::is_bom(substr($str, 0, 3))) {
367 2
      $str = self::bom() . $str;
368 2
    }
369 2
370
    return $str;
371 2
  }
372
373
  /**
374
   * Returns the Byte Order Mark Character.
375
   *
376
   * @return   string Byte Order Mark
377
   */
378
  public static function bom()
379 1
  {
380
    return "\xEF\xBB\xBF";
381 1
  }
382
383
  /**
384
   * @alias of UTF8::chr_map()
385
   *
386
   * @param $callback
387
   * @param $str
388
   *
389 1
   * @return array
390
   */
391 1
  public static function callback($callback, $str)
392
  {
393
    return self::chr_map($callback, $str);
394
  }
395
396
  /**
397
   * Returns an array of all lower and upper case UTF-8 encoded characters.
398
   *
399 30
   * @return   string An array with lower case chars as keys and upper chars as values.
400
   */
401
  protected static function case_table()
402 30
  {
403
    static $case = array(
404
405
      // lower => upper
406
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
407
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
408
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
409
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
410
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
411
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
412
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
413 6
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
414
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
415 6
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
416
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
417
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
418
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
419
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
420
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
421
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
422
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
423
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
424
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
425
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
426 7
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
427
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
428 7
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
429
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
430 7
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
431
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
432 7
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
433 2
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
434
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
435
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
436 6
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
437
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
438 6
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
439 3
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
440
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
441 3
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
442
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
443 3
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
444
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
445
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
446 3
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
447
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
448 3
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
449 3
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
450
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
451
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
452 3
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
453 3
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
454 3
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
455
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
456
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
457
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
458
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
459
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
460
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
461
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
462
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
463
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
464
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
465
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
466 3
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
467
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
468 1
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
469 1
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
470 1
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
471
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
472 1
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
473 1
      "\xea\x9e\x87"     => "\xea\x9e\x86",
474 1
      "\xea\x9e\x85"     => "\xea\x9e\x84",
475 1
      "\xea\x9e\x83"     => "\xea\x9e\x82",
476
      "\xea\x9e\x81"     => "\xea\x9e\x80",
477 1
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
478
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
479
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
480 1
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
481
      "\xea\x9d\xad"     => "\xea\x9d\xac",
482
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
483 1
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
484
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
485 3
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
486 1
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
487 1
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
488
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
489 3
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
490 3
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
491
      "\xea\x9d\x99"     => "\xea\x9d\x98",
492 3
      "\xea\x9d\x97"     => "\xea\x9d\x96",
493 3
      "\xea\x9d\x95"     => "\xea\x9d\x94",
494
      "\xea\x9d\x93"     => "\xea\x9d\x92",
495 6
      "\xea\x9d\x91"     => "\xea\x9d\x90",
496
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
497
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
498
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
499
      "\xea\x9d\x89"     => "\xea\x9d\x88",
500
      "\xea\x9d\x87"     => "\xea\x9d\x86",
501
      "\xea\x9d\x85"     => "\xea\x9d\x84",
502
      "\xea\x9d\x83"     => "\xea\x9d\x82",
503
      "\xea\x9d\x81"     => "\xea\x9d\x80",
504
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
505
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
506
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
507
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
508
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
509 24
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
510
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
511
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
512
      "\xea\x9c\xad"     => "\xea\x9c\xac",
513
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
514
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
515
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
516
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
517
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
518
      "\xea\x9a\x97"     => "\xea\x9a\x96",
519
      "\xea\x9a\x95"     => "\xea\x9a\x94",
520
      "\xea\x9a\x93"     => "\xea\x9a\x92",
521
      "\xea\x9a\x91"     => "\xea\x9a\x90",
522
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
523
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
524
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
525 24
      "\xea\x9a\x89"     => "\xea\x9a\x88",
526 24
      "\xea\x9a\x87"     => "\xea\x9a\x86",
527
      "\xea\x9a\x85"     => "\xea\x9a\x84",
528 24
      "\xea\x9a\x83"     => "\xea\x9a\x82",
529 24
      "\xea\x9a\x81"     => "\xea\x9a\x80",
530
      "\xea\x99\xad"     => "\xea\x99\xac",
531 24
      "\xea\x99\xab"     => "\xea\x99\xaa",
532 7
      "\xea\x99\xa9"     => "\xea\x99\xa8",
533 7
      "\xea\x99\xa7"     => "\xea\x99\xa6",
534
      "\xea\x99\xa5"     => "\xea\x99\xa4",
535 24
      "\xea\x99\xa3"     => "\xea\x99\xa2",
536 1
      "\xea\x99\x9f"     => "\xea\x99\x9e",
537 1
      "\xea\x99\x9d"     => "\xea\x99\x9c",
538
      "\xea\x99\x9b"     => "\xea\x99\x9a",
539 24
      "\xea\x99\x99"     => "\xea\x99\x98",
540 6
      "\xea\x99\x97"     => "\xea\x99\x96",
541 6
      "\xea\x99\x95"     => "\xea\x99\x94",
542
      "\xea\x99\x93"     => "\xea\x99\x92",
543 24
      "\xea\x99\x91"     => "\xea\x99\x90",
544
      "\xea\x99\x8f"     => "\xea\x99\x8e",
545
      "\xea\x99\x8d"     => "\xea\x99\x8c",
546
      "\xea\x99\x8b"     => "\xea\x99\x8a",
547
      "\xea\x99\x89"     => "\xea\x99\x88",
548
      "\xea\x99\x87"     => "\xea\x99\x86",
549
      "\xea\x99\x85"     => "\xea\x99\x84",
550
      "\xea\x99\x83"     => "\xea\x99\x82",
551
      "\xea\x99\x81"     => "\xea\x99\x80",
552
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
553
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
554 25
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
555
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
556 25
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
557
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
558 25
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
559 25
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
560 25
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
561
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
562 25
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
563 25
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
564 25
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
565
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
566 25
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
567
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
568
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
569
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
570
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
571
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
572
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
573
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
574
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
575
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
576
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
577
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
578
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
579
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
580
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
581
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
582 25
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
583
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
584
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
585 25
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
586
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
587
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
588
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
589 25
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
590 25
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
591 25
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
592 25
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
593
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
594 25
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
595
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
596
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
597 25
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
598 25
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
599
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
600 25
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
601
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
602
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
603
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
604
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
605
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
606
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
607
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
608
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
609
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
610
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
611 8
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
612
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
613 8
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
614
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
615 8
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
616
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
617 2
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
618
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
619 2
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
620
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
621 1
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
622 1
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
623
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
624 2
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
625 2
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
626
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
627 8
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
628
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
629
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
630
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
631
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
632
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
633
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
634
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
635
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
636
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
637
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
638
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
639 1
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
640
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
641 1
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
642
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
643
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
644
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
645
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
646
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
647
      "\xe2\xb1\xa6"     => "\xc8\xbe",
648
      "\xe2\xb1\xa5"     => "\xc8\xba",
649
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
650
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
651 2
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
652
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
653 2
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
654 2
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
655
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
656 2
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
657 1
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
658 1
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
659 1
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
660
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
661 2
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
662
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
663
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
664
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
665
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
666
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
667
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
668
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
669
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
670
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
671 7
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
672
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
673
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
674
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
675 7
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
676 1
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
677 1
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
678
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
679
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
680 7
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
681 1
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
682 1
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
683
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
684
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
685 7
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
686 2
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
687 2
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
688
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
689
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
690 7
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
691 1
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
692 1
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
693
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
694
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
695 7
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
696 1
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
697 1
      "\xe2\x86\x84"     => "\xe2\x86\x83",
698
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
699 7
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
700
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
701
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
702
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
703
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
704
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
705
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
706
      "\xe1\xbe\xbe"     => "\xce\x99",
707
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
708
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
709 2
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
710
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
711 2
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
712 2
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
713 2
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
714
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
715
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
716
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
717
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
718
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
719
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
720
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
721
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
722
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
723
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
724
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
725
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
726
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
727
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
728
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
729
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
730
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
731
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
732
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
733
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
734
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
735
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
736 8
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
737
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
738 8
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
739
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
740 8
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
741
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
742 8
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
743 2
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
744
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
745
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
746 7
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
747
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
748 7
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
749 7
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
750 7
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
751
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
752 7
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
753
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
754 7
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
755 6
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
756
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
757
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
758 4
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
759
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
760
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
761 4
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
762 4
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
763 4
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
764
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
765 4
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
766 3
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
767
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
768 3
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
769 3
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
770 3
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
771
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
772 3
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
773
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
774
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
775
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
776
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
777
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
778
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
779
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
780
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
781
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
782
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
783
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
784
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
785
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
786
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
787
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
788
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
789
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
790
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
791
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
792
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
793
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
794
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
795 3
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
796
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
797 4
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
798
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
799
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
800
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
801
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
802 4
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
803
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
804
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
805
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
806
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
807 4
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
808 4
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
809 2
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
810 2
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
811
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
812 2
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
813 2
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
814
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
815
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
816 2
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
817
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
818 4
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
819 4
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
820 4
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
821 4
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
822
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
823
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
824 7
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
825
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
826 7
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
827
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
828
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
829
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
830
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
831
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
832
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
833
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
834
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
835
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
836
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
837
      "\xe1\xba\xad"     => "\xe1\xba\xac",
838
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
839
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
840
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
841
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
842
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
843
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
844
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
845
      "\xe1\xba\x95"     => "\xe1\xba\x94",
846
      "\xe1\xba\x93"     => "\xe1\xba\x92",
847
      "\xe1\xba\x91"     => "\xe1\xba\x90",
848
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
849
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
850
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
851
      "\xe1\xba\x89"     => "\xe1\xba\x88",
852
      "\xe1\xba\x87"     => "\xe1\xba\x86",
853
      "\xe1\xba\x85"     => "\xe1\xba\x84",
854
      "\xe1\xba\x83"     => "\xe1\xba\x82",
855
      "\xe1\xba\x81"     => "\xe1\xba\x80",
856
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
857
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
858
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
859
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
860
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
861
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
862
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
863
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
864
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
865
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
866
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
867
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
868
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
869
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
870
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
871
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
872
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
873
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
874
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
875
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
876
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
877
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
878
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
879
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
880
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
881
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
882
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
883
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
884
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
885
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
886
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
887
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
888
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
889
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
890
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
891
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
892
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
893
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
894
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
895
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
896
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
897
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
898
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
899
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
900
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
901
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
902
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
903
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
904
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
905
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
906
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
907
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
908
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
909
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
910
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
911
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
912
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
913
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
914
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
915
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
916
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
917
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
918
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
919
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
920
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
921
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
922
      "\xd6\x86"         => "\xd5\x96",
923
      "\xd6\x85"         => "\xd5\x95",
924
      "\xd6\x84"         => "\xd5\x94",
925
      "\xd6\x83"         => "\xd5\x93",
926
      "\xd6\x82"         => "\xd5\x92",
927
      "\xd6\x81"         => "\xd5\x91",
928
      "\xd6\x80"         => "\xd5\x90",
929
      "\xd5\xbf"         => "\xd5\x8f",
930
      "\xd5\xbe"         => "\xd5\x8e",
931
      "\xd5\xbd"         => "\xd5\x8d",
932
      "\xd5\xbc"         => "\xd5\x8c",
933
      "\xd5\xbb"         => "\xd5\x8b",
934
      "\xd5\xba"         => "\xd5\x8a",
935
      "\xd5\xb9"         => "\xd5\x89",
936
      "\xd5\xb8"         => "\xd5\x88",
937
      "\xd5\xb7"         => "\xd5\x87",
938
      "\xd5\xb6"         => "\xd5\x86",
939
      "\xd5\xb5"         => "\xd5\x85",
940
      "\xd5\xb4"         => "\xd5\x84",
941
      "\xd5\xb3"         => "\xd5\x83",
942
      "\xd5\xb2"         => "\xd5\x82",
943
      "\xd5\xb1"         => "\xd5\x81",
944 2
      "\xd5\xb0"         => "\xd5\x80",
945
      "\xd5\xaf"         => "\xd4\xbf",
946 2
      "\xd5\xae"         => "\xd4\xbe",
947
      "\xd5\xad"         => "\xd4\xbd",
948
      "\xd5\xac"         => "\xd4\xbc",
949
      "\xd5\xab"         => "\xd4\xbb",
950
      "\xd5\xaa"         => "\xd4\xba",
951
      "\xd5\xa9"         => "\xd4\xb9",
952
      "\xd5\xa8"         => "\xd4\xb8",
953
      "\xd5\xa7"         => "\xd4\xb7",
954
      "\xd5\xa6"         => "\xd4\xb6",
955
      "\xd5\xa5"         => "\xd4\xb5",
956
      "\xd5\xa4"         => "\xd4\xb4",
957
      "\xd5\xa3"         => "\xd4\xb3",
958
      "\xd5\xa2"         => "\xd4\xb2",
959
      "\xd5\xa1"         => "\xd4\xb1",
960
      "\xd4\xa5"         => "\xd4\xa4",
961
      "\xd4\xa3"         => "\xd4\xa2",
962
      "\xd4\xa1"         => "\xd4\xa0",
963
      "\xd4\x9f"         => "\xd4\x9e",
964
      "\xd4\x9d"         => "\xd4\x9c",
965
      "\xd4\x9b"         => "\xd4\x9a",
966
      "\xd4\x99"         => "\xd4\x98",
967
      "\xd4\x97"         => "\xd4\x96",
968
      "\xd4\x95"         => "\xd4\x94",
969
      "\xd4\x93"         => "\xd4\x92",
970
      "\xd4\x91"         => "\xd4\x90",
971
      "\xd4\x8f"         => "\xd4\x8e",
972
      "\xd4\x8d"         => "\xd4\x8c",
973
      "\xd4\x8b"         => "\xd4\x8a",
974
      "\xd4\x89"         => "\xd4\x88",
975
      "\xd4\x87"         => "\xd4\x86",
976
      "\xd4\x85"         => "\xd4\x84",
977
      "\xd4\x83"         => "\xd4\x82",
978
      "\xd4\x81"         => "\xd4\x80",
979
      "\xd3\xbf"         => "\xd3\xbe",
980
      "\xd3\xbd"         => "\xd3\xbc",
981
      "\xd3\xbb"         => "\xd3\xba",
982
      "\xd3\xb9"         => "\xd3\xb8",
983
      "\xd3\xb7"         => "\xd3\xb6",
984
      "\xd3\xb5"         => "\xd3\xb4",
985
      "\xd3\xb3"         => "\xd3\xb2",
986
      "\xd3\xb1"         => "\xd3\xb0",
987
      "\xd3\xaf"         => "\xd3\xae",
988
      "\xd3\xad"         => "\xd3\xac",
989
      "\xd3\xab"         => "\xd3\xaa",
990
      "\xd3\xa9"         => "\xd3\xa8",
991
      "\xd3\xa7"         => "\xd3\xa6",
992
      "\xd3\xa5"         => "\xd3\xa4",
993
      "\xd3\xa3"         => "\xd3\xa2",
994
      "\xd3\xa1"         => "\xd3\xa0",
995
      "\xd3\x9f"         => "\xd3\x9e",
996
      "\xd3\x9d"         => "\xd3\x9c",
997
      "\xd3\x9b"         => "\xd3\x9a",
998
      "\xd3\x99"         => "\xd3\x98",
999
      "\xd3\x97"         => "\xd3\x96",
1000
      "\xd3\x95"         => "\xd3\x94",
1001
      "\xd3\x93"         => "\xd3\x92",
1002
      "\xd3\x91"         => "\xd3\x90",
1003
      "\xd3\x8f"         => "\xd3\x80",
1004
      "\xd3\x8e"         => "\xd3\x8d",
1005
      "\xd3\x8c"         => "\xd3\x8b",
1006
      "\xd3\x8a"         => "\xd3\x89",
1007
      "\xd3\x88"         => "\xd3\x87",
1008
      "\xd3\x86"         => "\xd3\x85",
1009
      "\xd3\x84"         => "\xd3\x83",
1010
      "\xd3\x82"         => "\xd3\x81",
1011
      "\xd2\xbf"         => "\xd2\xbe",
1012
      "\xd2\xbd"         => "\xd2\xbc",
1013
      "\xd2\xbb"         => "\xd2\xba",
1014
      "\xd2\xb9"         => "\xd2\xb8",
1015
      "\xd2\xb7"         => "\xd2\xb6",
1016
      "\xd2\xb5"         => "\xd2\xb4",
1017
      "\xd2\xb3"         => "\xd2\xb2",
1018
      "\xd2\xb1"         => "\xd2\xb0",
1019
      "\xd2\xaf"         => "\xd2\xae",
1020
      "\xd2\xad"         => "\xd2\xac",
1021
      "\xd2\xab"         => "\xd2\xaa",
1022
      "\xd2\xa9"         => "\xd2\xa8",
1023
      "\xd2\xa7"         => "\xd2\xa6",
1024
      "\xd2\xa5"         => "\xd2\xa4",
1025
      "\xd2\xa3"         => "\xd2\xa2",
1026
      "\xd2\xa1"         => "\xd2\xa0",
1027
      "\xd2\x9f"         => "\xd2\x9e",
1028
      "\xd2\x9d"         => "\xd2\x9c",
1029
      "\xd2\x9b"         => "\xd2\x9a",
1030
      "\xd2\x99"         => "\xd2\x98",
1031
      "\xd2\x97"         => "\xd2\x96",
1032
      "\xd2\x95"         => "\xd2\x94",
1033
      "\xd2\x93"         => "\xd2\x92",
1034
      "\xd2\x91"         => "\xd2\x90",
1035
      "\xd2\x8f"         => "\xd2\x8e",
1036
      "\xd2\x8d"         => "\xd2\x8c",
1037
      "\xd2\x8b"         => "\xd2\x8a",
1038
      "\xd2\x81"         => "\xd2\x80",
1039
      "\xd1\xbf"         => "\xd1\xbe",
1040
      "\xd1\xbd"         => "\xd1\xbc",
1041
      "\xd1\xbb"         => "\xd1\xba",
1042
      "\xd1\xb9"         => "\xd1\xb8",
1043
      "\xd1\xb7"         => "\xd1\xb6",
1044
      "\xd1\xb5"         => "\xd1\xb4",
1045
      "\xd1\xb3"         => "\xd1\xb2",
1046
      "\xd1\xb1"         => "\xd1\xb0",
1047
      "\xd1\xaf"         => "\xd1\xae",
1048
      "\xd1\xad"         => "\xd1\xac",
1049
      "\xd1\xab"         => "\xd1\xaa",
1050
      "\xd1\xa9"         => "\xd1\xa8",
1051
      "\xd1\xa7"         => "\xd1\xa6",
1052
      "\xd1\xa5"         => "\xd1\xa4",
1053
      "\xd1\xa3"         => "\xd1\xa2",
1054
      "\xd1\xa1"         => "\xd1\xa0",
1055
      "\xd1\x9f"         => "\xd0\x8f",
1056
      "\xd1\x9e"         => "\xd0\x8e",
1057
      "\xd1\x9d"         => "\xd0\x8d",
1058 1
      "\xd1\x9c"         => "\xd0\x8c",
1059
      "\xd1\x9b"         => "\xd0\x8b",
1060 1
      "\xd1\x9a"         => "\xd0\x8a",
1061
      "\xd1\x99"         => "\xd0\x89",
1062
      "\xd1\x98"         => "\xd0\x88",
1063
      "\xd1\x97"         => "\xd0\x87",
1064
      "\xd1\x96"         => "\xd0\x86",
1065
      "\xd1\x95"         => "\xd0\x85",
1066
      "\xd1\x94"         => "\xd0\x84",
1067
      "\xd1\x93"         => "\xd0\x83",
1068
      "\xd1\x92"         => "\xd0\x82",
1069
      "\xd1\x91"         => "\xd0\x81",
1070 16
      "\xd1\x90"         => "\xd0\x80",
1071
      "\xd1\x8f"         => "\xd0\xaf",
1072 16
      "\xd1\x8e"         => "\xd0\xae",
1073
      "\xd1\x8d"         => "\xd0\xad",
1074
      "\xd1\x8c"         => "\xd0\xac",
1075
      "\xd1\x8b"         => "\xd0\xab",
1076
      "\xd1\x8a"         => "\xd0\xaa",
1077
      "\xd1\x89"         => "\xd0\xa9",
1078
      "\xd1\x88"         => "\xd0\xa8",
1079
      "\xd1\x87"         => "\xd0\xa7",
1080
      "\xd1\x86"         => "\xd0\xa6",
1081
      "\xd1\x85"         => "\xd0\xa5",
1082
      "\xd1\x84"         => "\xd0\xa4",
1083
      "\xd1\x83"         => "\xd0\xa3",
1084
      "\xd1\x82"         => "\xd0\xa2",
1085
      "\xd1\x81"         => "\xd0\xa1",
1086
      "\xd1\x80"         => "\xd0\xa0",
1087 31
      "\xd0\xbf"         => "\xd0\x9f",
1088
      "\xd0\xbe"         => "\xd0\x9e",
1089 31
      "\xd0\xbd"         => "\xd0\x9d",
1090
      "\xd0\xbc"         => "\xd0\x9c",
1091 31
      "\xd0\xbb"         => "\xd0\x9b",
1092 3
      "\xd0\xba"         => "\xd0\x9a",
1093
      "\xd0\xb9"         => "\xd0\x99",
1094
      "\xd0\xb8"         => "\xd0\x98",
1095 29
      "\xd0\xb7"         => "\xd0\x97",
1096
      "\xd0\xb6"         => "\xd0\x96",
1097
      "\xd0\xb5"         => "\xd0\x95",
1098
      "\xd0\xb4"         => "\xd0\x94",
1099
      "\xd0\xb3"         => "\xd0\x93",
1100
      "\xd0\xb2"         => "\xd0\x92",
1101
      "\xd0\xb1"         => "\xd0\x91",
1102 29
      "\xd0\xb0"         => "\xd0\x90",
1103
      "\xcf\xbb"         => "\xcf\xba",
1104 29
      "\xcf\xb8"         => "\xcf\xb7",
1105 29
      "\xcf\xb5"         => "\xce\x95",
1106 29
      "\xcf\xb2"         => "\xcf\xb9",
1107 29
      "\xcf\xb1"         => "\xce\xa1",
1108 29
      "\xcf\xb0"         => "\xce\x9a",
1109 29
      "\xcf\xaf"         => "\xcf\xae",
1110
      "\xcf\xad"         => "\xcf\xac",
1111
      "\xcf\xab"         => "\xcf\xaa",
1112 29
      "\xcf\xa9"         => "\xcf\xa8",
1113
      "\xcf\xa7"         => "\xcf\xa6",
1114 27
      "\xcf\xa5"         => "\xcf\xa4",
1115 29
      "\xcf\xa3"         => "\xcf\xa2",
1116
      "\xcf\xa1"         => "\xcf\xa0",
1117 25
      "\xcf\x9f"         => "\xcf\x9e",
1118 25
      "\xcf\x9d"         => "\xcf\x9c",
1119 25
      "\xcf\x9b"         => "\xcf\x9a",
1120 25
      "\xcf\x99"         => "\xcf\x98",
1121 27
      "\xcf\x97"         => "\xcf\x8f",
1122
      "\xcf\x96"         => "\xce\xa0",
1123 11
      "\xcf\x95"         => "\xce\xa6",
1124 11
      "\xcf\x91"         => "\xce\x98",
1125 11
      "\xcf\x90"         => "\xce\x92",
1126 11
      "\xcf\x8e"         => "\xce\x8f",
1127 21
      "\xcf\x8d"         => "\xce\x8e",
1128
      "\xcf\x8c"         => "\xce\x8c",
1129 5
      "\xcf\x8b"         => "\xce\xab",
1130 5
      "\xcf\x8a"         => "\xce\xaa",
1131 5
      "\xcf\x89"         => "\xce\xa9",
1132 5
      "\xcf\x88"         => "\xce\xa8",
1133 11
      "\xcf\x87"         => "\xce\xa7",
1134
      "\xcf\x86"         => "\xce\xa6",
1135
      "\xcf\x85"         => "\xce\xa5",
1136
      "\xcf\x84"         => "\xce\xa4",
1137
      "\xcf\x83"         => "\xce\xa3",
1138
      "\xcf\x82"         => "\xce\xa3",
1139
      "\xcf\x81"         => "\xce\xa1",
1140
      "\xcf\x80"         => "\xce\xa0",
1141
      "\xce\xbf"         => "\xce\x9f",
1142 3
      "\xce\xbe"         => "\xce\x9e",
1143 3
      "\xce\xbd"         => "\xce\x9d",
1144 3
      "\xce\xbc"         => "\xce\x9c",
1145 3
      "\xce\xbb"         => "\xce\x9b",
1146 7
      "\xce\xba"         => "\xce\x9a",
1147
      "\xce\xb9"         => "\xce\x99",
1148 3
      "\xce\xb8"         => "\xce\x98",
1149 3
      "\xce\xb7"         => "\xce\x97",
1150 3
      "\xce\xb6"         => "\xce\x96",
1151 3
      "\xce\xb5"         => "\xce\x95",
1152 3
      "\xce\xb4"         => "\xce\x94",
1153
      "\xce\xb3"         => "\xce\x93",
1154
      "\xce\xb2"         => "\xce\x92",
1155
      "\xce\xb1"         => "\xce\x91",
1156 3
      "\xce\xaf"         => "\xce\x8a",
1157
      "\xce\xae"         => "\xce\x89",
1158 29
      "\xce\xad"         => "\xce\x88",
1159
      "\xce\xac"         => "\xce\x86",
1160
      "\xcd\xbd"         => "\xcf\xbf",
1161 27
      "\xcd\xbc"         => "\xcf\xbe",
1162
      "\xcd\xbb"         => "\xcf\xbd",
1163 25
      "\xcd\xb7"         => "\xcd\xb6",
1164 25
      "\xcd\xb3"         => "\xcd\xb2",
1165 25
      "\xcd\xb1"         => "\xcd\xb0",
1166 25
      "\xca\x92"         => "\xc6\xb7",
1167
      "\xca\x8c"         => "\xc9\x85",
1168
      "\xca\x8b"         => "\xc6\xb2",
1169
      "\xca\x8a"         => "\xc6\xb1",
1170
      "\xca\x89"         => "\xc9\x84",
1171 25
      "\xca\x88"         => "\xc6\xae",
1172
      "\xca\x83"         => "\xc6\xa9",
1173
      "\xca\x80"         => "\xc6\xa6",
1174
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1175
      "\xc9\xb5"         => "\xc6\x9f",
1176
      "\xc9\xb2"         => "\xc6\x9d",
1177 25
      "\xc9\xb1"         => "\xe2\xb1\xae",
1178 25
      "\xc9\xaf"         => "\xc6\x9c",
1179 25
      "\xc9\xab"         => "\xe2\xb1\xa2",
1180 25
      "\xc9\xa9"         => "\xc6\x96",
1181
      "\xc9\xa8"         => "\xc6\x97",
1182 25
      "\xc9\xa5"         => "\xea\x9e\x8d",
1183
      "\xc9\xa3"         => "\xc6\x94",
1184 25
      "\xc9\xa0"         => "\xc6\x93",
1185 25
      "\xc9\x9b"         => "\xc6\x90",
1186 5
      "\xc9\x99"         => "\xc6\x8f",
1187
      "\xc9\x97"         => "\xc6\x8a",
1188
      "\xc9\x96"         => "\xc6\x89",
1189 25
      "\xc9\x94"         => "\xc6\x86",
1190 25
      "\xc9\x93"         => "\xc6\x81",
1191 25
      "\xc9\x92"         => "\xe2\xb1\xb0",
1192 25
      "\xc9\x91"         => "\xe2\xb1\xad",
1193 25
      "\xc9\x90"         => "\xe2\xb1\xaf",
1194
      "\xc9\x8f"         => "\xc9\x8e",
1195
      "\xc9\x8d"         => "\xc9\x8c",
1196
      "\xc9\x8b"         => "\xc9\x8a",
1197
      "\xc9\x89"         => "\xc9\x88",
1198 13
      "\xc9\x87"         => "\xc9\x86",
1199
      "\xc9\x82"         => "\xc9\x81",
1200
      "\xc9\x80"         => "\xe2\xb1\xbf",
1201 29
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1202
      "\xc8\xbc"         => "\xc8\xbb",
1203 11
      "\xc8\xb3"         => "\xc8\xb2",
1204
      "\xc8\xb1"         => "\xc8\xb0",
1205
      "\xc8\xaf"         => "\xc8\xae",
1206
      "\xc8\xad"         => "\xc8\xac",
1207
      "\xc8\xab"         => "\xc8\xaa",
1208
      "\xc8\xa9"         => "\xc8\xa8",
1209
      "\xc8\xa7"         => "\xc8\xa6",
1210
      "\xc8\xa5"         => "\xc8\xa4",
1211
      "\xc8\xa3"         => "\xc8\xa2",
1212
      "\xc8\x9f"         => "\xc8\x9e",
1213
      "\xc8\x9d"         => "\xc8\x9c",
1214
      "\xc8\x9b"         => "\xc8\x9a",
1215
      "\xc8\x99"         => "\xc8\x98",
1216
      "\xc8\x97"         => "\xc8\x96",
1217
      "\xc8\x95"         => "\xc8\x94",
1218 6
      "\xc8\x93"         => "\xc8\x92",
1219
      "\xc8\x91"         => "\xc8\x90",
1220 6
      "\xc8\x8f"         => "\xc8\x8e",
1221
      "\xc8\x8d"         => "\xc8\x8c",
1222
      "\xc8\x8b"         => "\xc8\x8a",
1223
      "\xc8\x89"         => "\xc8\x88",
1224 6
      "\xc8\x87"         => "\xc8\x86",
1225
      "\xc8\x85"         => "\xc8\x84",
1226
      "\xc8\x83"         => "\xc8\x82",
1227
      "\xc8\x81"         => "\xc8\x80",
1228
      "\xc7\xbf"         => "\xc7\xbe",
1229
      "\xc7\xbd"         => "\xc7\xbc",
1230
      "\xc7\xbb"         => "\xc7\xba",
1231
      "\xc7\xb9"         => "\xc7\xb8",
1232
      "\xc7\xb5"         => "\xc7\xb4",
1233
      "\xc7\xb3"         => "\xc7\xb2",
1234
      "\xc7\xaf"         => "\xc7\xae",
1235
      "\xc7\xad"         => "\xc7\xac",
1236
      "\xc7\xab"         => "\xc7\xaa",
1237
      "\xc7\xa9"         => "\xc7\xa8",
1238
      "\xc7\xa7"         => "\xc7\xa6",
1239
      "\xc7\xa5"         => "\xc7\xa4",
1240
      "\xc7\xa3"         => "\xc7\xa2",
1241
      "\xc7\xa1"         => "\xc7\xa0",
1242
      "\xc7\x9f"         => "\xc7\x9e",
1243
      "\xc7\x9d"         => "\xc6\x8e",
1244
      "\xc7\x9c"         => "\xc7\x9b",
1245
      "\xc7\x9a"         => "\xc7\x99",
1246
      "\xc7\x98"         => "\xc7\x97",
1247 37
      "\xc7\x96"         => "\xc7\x95",
1248
      "\xc7\x94"         => "\xc7\x93",
1249 37
      "\xc7\x92"         => "\xc7\x91",
1250
      "\xc7\x90"         => "\xc7\x8f",
1251 37
      "\xc7\x8e"         => "\xc7\x8d",
1252
      "\xc7\x8c"         => "\xc7\x8b",
1253 37
      "\xc7\x89"         => "\xc7\x88",
1254 9
      "\xc7\x86"         => "\xc7\x85",
1255
      "\xc6\xbf"         => "\xc7\xb7",
1256
      "\xc6\xbd"         => "\xc6\xbc",
1257
      "\xc6\xb9"         => "\xc6\xb8",
1258 35
      "\xc6\xb6"         => "\xc6\xb5",
1259
      "\xc6\xb4"         => "\xc6\xb3",
1260 35
      "\xc6\xb0"         => "\xc6\xaf",
1261
      "\xc6\xad"         => "\xc6\xac",
1262
      "\xc6\xa8"         => "\xc6\xa7",
1263
      "\xc6\xa5"         => "\xc6\xa4",
1264 1
      "\xc6\xa3"         => "\xc6\xa2",
1265 1
      "\xc6\xa1"         => "\xc6\xa0",
1266
      "\xc6\x9e"         => "\xc8\xa0",
1267 35
      "\xc6\x9a"         => "\xc8\xbd",
1268 21
      "\xc6\x99"         => "\xc6\x98",
1269 21
      "\xc6\x95"         => "\xc7\xb6",
1270 31
      "\xc6\x92"         => "\xc6\x91",
1271
      "\xc6\x8c"         => "\xc6\x8b",
1272
      "\xc6\x88"         => "\xc6\x87",
1273 35
      "\xc6\x85"         => "\xc6\x84",
1274
      "\xc6\x83"         => "\xc6\x82",
1275
      "\xc6\x80"         => "\xc9\x83",
1276 35
      "\xc5\xbf"         => "\x53",
1277 1
      "\xc5\xbe"         => "\xc5\xbd",
1278 1
      "\xc5\xbc"         => "\xc5\xbb",
1279
      "\xc5\xba"         => "\xc5\xb9",
1280 35
      "\xc5\xb7"         => "\xc5\xb6",
1281
      "\xc5\xb5"         => "\xc5\xb4",
1282
      "\xc5\xb3"         => "\xc5\xb2",
1283
      "\xc5\xb1"         => "\xc5\xb0",
1284
      "\xc5\xaf"         => "\xc5\xae",
1285
      "\xc5\xad"         => "\xc5\xac",
1286
      "\xc5\xab"         => "\xc5\xaa",
1287
      "\xc5\xa9"         => "\xc5\xa8",
1288
      "\xc5\xa7"         => "\xc5\xa6",
1289
      "\xc5\xa5"         => "\xc5\xa4",
1290
      "\xc5\xa3"         => "\xc5\xa2",
1291
      "\xc5\xa1"         => "\xc5\xa0",
1292
      "\xc5\x9f"         => "\xc5\x9e",
1293
      "\xc5\x9d"         => "\xc5\x9c",
1294
      "\xc5\x9b"         => "\xc5\x9a",
1295
      "\xc5\x99"         => "\xc5\x98",
1296
      "\xc5\x97"         => "\xc5\x96",
1297
      "\xc5\x95"         => "\xc5\x94",
1298
      "\xc5\x93"         => "\xc5\x92",
1299
      "\xc5\x91"         => "\xc5\x90",
1300
      "\xc5\x8f"         => "\xc5\x8e",
1301
      "\xc5\x8d"         => "\xc5\x8c",
1302
      "\xc5\x8b"         => "\xc5\x8a",
1303
      "\xc5\x88"         => "\xc5\x87",
1304
      "\xc5\x86"         => "\xc5\x85",
1305
      "\xc5\x84"         => "\xc5\x83",
1306
      "\xc5\x82"         => "\xc5\x81",
1307
      "\xc5\x80"         => "\xc4\xbf",
1308
      "\xc4\xbe"         => "\xc4\xbd",
1309
      "\xc4\xbc"         => "\xc4\xbb",
1310
      "\xc4\xba"         => "\xc4\xb9",
1311
      "\xc4\xb7"         => "\xc4\xb6",
1312
      "\xc4\xb5"         => "\xc4\xb4",
1313
      "\xc4\xb3"         => "\xc4\xb2",
1314
      "\xc4\xb1"         => "\x49",
1315
      "\xc4\xaf"         => "\xc4\xae",
1316
      "\xc4\xad"         => "\xc4\xac",
1317
      "\xc4\xab"         => "\xc4\xaa",
1318
      "\xc4\xa9"         => "\xc4\xa8",
1319 63
      "\xc4\xa7"         => "\xc4\xa6",
1320
      "\xc4\xa5"         => "\xc4\xa4",
1321 63
      "\xc4\xa3"         => "\xc4\xa2",
1322
      "\xc4\xa1"         => "\xc4\xa0",
1323 63
      "\xc4\x9f"         => "\xc4\x9e",
1324 4
      "\xc4\x9d"         => "\xc4\x9c",
1325
      "\xc4\x9b"         => "\xc4\x9a",
1326
      "\xc4\x99"         => "\xc4\x98",
1327
      "\xc4\x97"         => "\xc4\x96",
1328 62
      "\xc4\x95"         => "\xc4\x94",
1329
      "\xc4\x93"         => "\xc4\x92",
1330
      "\xc4\x91"         => "\xc4\x90",
1331 62
      "\xc4\x8f"         => "\xc4\x8e",
1332
      "\xc4\x8d"         => "\xc4\x8c",
1333
      "\xc4\x8b"         => "\xc4\x8a",
1334
      "\xc4\x89"         => "\xc4\x88",
1335 62
      "\xc4\x87"         => "\xc4\x86",
1336
      "\xc4\x85"         => "\xc4\x84",
1337
      "\xc4\x83"         => "\xc4\x82",
1338 62
      "\xc4\x81"         => "\xc4\x80",
1339
      "\xc3\xbf"         => "\xc5\xb8",
1340
      "\xc3\xbe"         => "\xc3\x9e",
1341 62
      "\xc3\xbd"         => "\xc3\x9d",
1342
      "\xc3\xbc"         => "\xc3\x9c",
1343
      "\xc3\xbb"         => "\xc3\x9b",
1344
      "\xc3\xba"         => "\xc3\x9a",
1345
      "\xc3\xb9"         => "\xc3\x99",
1346
      "\xc3\xb8"         => "\xc3\x98",
1347
      "\xc3\xb6"         => "\xc3\x96",
1348
      "\xc3\xb5"         => "\xc3\x95",
1349
      "\xc3\xb4"         => "\xc3\x94",
1350
      "\xc3\xb3"         => "\xc3\x93",
1351
      "\xc3\xb2"         => "\xc3\x92",
1352
      "\xc3\xb1"         => "\xc3\x91",
1353 24
      "\xc3\xb0"         => "\xc3\x90",
1354
      "\xc3\xaf"         => "\xc3\x8f",
1355 24
      "\xc3\xae"         => "\xc3\x8e",
1356
      "\xc3\xad"         => "\xc3\x8d",
1357 24
      "\xc3\xac"         => "\xc3\x8c",
1358 5
      "\xc3\xab"         => "\xc3\x8b",
1359
      "\xc3\xaa"         => "\xc3\x8a",
1360
      "\xc3\xa9"         => "\xc3\x89",
1361
      "\xc3\xa8"         => "\xc3\x88",
1362 23
      "\xc3\xa7"         => "\xc3\x87",
1363 23
      "\xc3\xa6"         => "\xc3\x86",
1364 23
      "\xc3\xa5"         => "\xc3\x85",
1365
      "\xc3\xa4"         => "\xc3\x84",
1366 23
      "\xc3\xa3"         => "\xc3\x83",
1367
      "\xc3\xa2"         => "\xc3\x82",
1368 23
      "\xc3\xa1"         => "\xc3\x81",
1369
      "\xc3\xa0"         => "\xc3\x80",
1370
      "\xc2\xb5"         => "\xce\x9c",
1371
      "\x7a"             => "\x5a",
1372 23
      "\x79"             => "\x59",
1373 23
      "\x78"             => "\x58",
1374 23
      "\x77"             => "\x57",
1375 23
      "\x76"             => "\x56",
1376 23
      "\x75"             => "\x55",
1377
      "\x74"             => "\x54",
1378 23
      "\x73"             => "\x53",
1379
      "\x72"             => "\x52",
1380
      "\x71"             => "\x51",
1381
      "\x70"             => "\x50",
1382
      "\x6f"             => "\x4f",
1383
      "\x6e"             => "\x4e",
1384
      "\x6d"             => "\x4d",
1385
      "\x6c"             => "\x4c",
1386
      "\x6b"             => "\x4b",
1387
      "\x6a"             => "\x4a",
1388
      "\x69"             => "\x49",
1389
      "\x68"             => "\x48",
1390
      "\x67"             => "\x47",
1391
      "\x66"             => "\x46",
1392
      "\x65"             => "\x45",
1393
      "\x64"             => "\x44",
1394
      "\x63"             => "\x43",
1395
      "\x62"             => "\x42",
1396
      "\x61"             => "\x41",
1397
1398
    );
1399
1400
    return $case;
1401
  }
1402
1403
  /**
1404
   * check for UTF8-Support
1405
   */
1406
  public static function checkForSupport()
1407
  {
1408
    if (!isset(self::$support['mbstring'])) {
1409 23
1410 5
      self::$support['mbstring'] = self::mbstring_loaded();
1411
      self::$support['iconv'] = self::iconv_loaded();
1412 5
      self::$support['intl'] = self::intl_loaded();
1413 5
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1414
    }
1415 23
  }
1416
1417
  /**
1418
   * Generates a UTF-8 encoded character from the given code point.
1419 23
   *
1420
   * @param    int $code_point The code point for which to generate a character.
1421
   *
1422
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1423
   */
1424
  public static function chr($code_point)
1425
  {
1426
    self::checkForSupport();
1427
1428
    if (($i = (int)$code_point) !== $code_point) {
1429
      // $code_point is a string, lets extract int code point from it
1430 40
      if (!($i = (int)self::hex_to_int($code_point))) {
1431
        return '';
1432 40
      }
1433
    }
1434 40
1435
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1436 40
  }
1437 30
1438
  /**
1439
   * Applies callback to all characters of a string.
1440 16
   *
1441
   * @param    string $callback The callback function.
1442 16
   * @param    string $str      UTF-8 string to run callback on.
1443 15
   *
1444
   * @return   array The outcome of callback.
1445 15
   */
1446 14
1447 15
  public static function chr_map($callback, $str)
1448 1
  {
1449 1
    $chars = self::split($str);
1450
1451
    return array_map($callback, $chars);
1452 16
  }
1453
1454 16
  /**
1455
   * Generates an array of byte length of each character of a Unicode string.
1456 16
   *
1457 16
   * 1 byte => U+0000  - U+007F
1458 16
   * 2 byte => U+0080  - U+07FF
1459
   * 3 byte => U+0800  - U+FFFF
1460
   * 4 byte => U+10000 - U+10FFFF
1461
   *
1462 16
   * @param    string $str The original Unicode string.
1463
   *
1464 16
   * @return   array An array of byte lengths of each character.
1465
   */
1466
  public static function chr_size_list($str)
1467
  {
1468
    if (!$str) {
1469
      return array();
1470
    }
1471
1472
    return array_map('strlen', self::split($str));
1473
  }
1474
1475 17
  /**
1476
   * Get a decimal code representation of a specific character.
1477
   *
1478 17
   * @param   string $chr The input character
1479
   *
1480 17
   * @return  int
1481
   */
1482
  public static function chr_to_decimal($chr)
1483
  {
1484
    $chr = (string)$chr;
1485
    $code = self::ord($chr[0]);
1486 17
    $bytes = 1;
1487 17
1488 17
    if (!($code & 0x80)) {
1489 17
      // 0xxxxxxx
1490 17
      return $code;
1491 16
    }
1492 16
1493 17
    if (($code & 0xe0) === 0xc0) {
1494
      // 110xxxxx
1495
      $bytes = 2;
1496
      $code &= ~0xc0;
1497
    } elseif (($code & 0xf0) == 0xe0) {
1498 17
      // 1110xxxx
1499 17
      $bytes = 3;
1500
      $code &= ~0xe0;
1501
    } elseif (($code & 0xf8) === 0xf0) {
1502 1
      // 11110xxx
1503 1
      $bytes = 4;
1504
      $code &= ~0xf0;
1505
    }
1506 1
1507 1
    for ($i = 2; $i <= $bytes; $i++) {
1508 1
      // 10xxxxxx
1509 1
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1510 1
    }
1511
1512 1
    return $code;
1513
  }
1514 1
1515
  /**
1516
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1517
   *
1518
   * @param    string $chr The input character
1519
   * @param    string $pfix
1520
   *
1521
   * @return   string The code point encoded as U+xxxx
1522
   */
1523
  public static function chr_to_hex($chr, $pfix = 'U+')
1524 1
  {
1525
    return self::int_to_hex(self::ord($chr), $pfix);
1526
  }
1527 1
1528
  /**
1529 1
   * Splits a string into smaller chunks and multiple lines, using the specified
1530
   * line ending character.
1531
   *
1532
   * @param    string $body     The original string to be split.
1533
   * @param    int    $chunklen The maximum character length of a chunk.
1534
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1535
   *
1536
   * @return   string The chunked string
1537
   */
1538
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1539
  {
1540
    return implode($end, self::split($body, $chunklen));
1541
  }
1542 5
1543
  /**
1544 5
   * accepts a string and removes all non-UTF-8 characters from it.
1545
   *
1546
   * @param string $str                     The string to be sanitized.
1547
   * @param bool   $remove_bom
1548 5
   * @param bool   $normalize_whitespace
1549
   * @param bool   $normalize_msword        e.g.: "…" => "..."
1550
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
1551 5
   *
1552
   * @return string Clean UTF-8 encoded string
1553
   */
1554
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1555 5
  {
1556 5
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1557
    // caused connection reset problem on larger strings
1558
1559
    $regx = '/
1560
       (
1561
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
1562
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
1563
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
1564
        |   [\xE1-\xEC][\x80-\xBF]{2}
1565
        |   \xED[\x80-\x9F][\x80-\xBF]
1566
        |   [\xEE-\xEF][\x80-\xBF]{2}
1567
        ){1,50}                          # ...one or more times
1568
       )
1569
       | .                               # anything else
1570
       /x';
1571
    $str = preg_replace($regx, '$1', $str);
1572 1
1573
    $str = self::replace_diamond_question_mark($str, '');
1574 1
    $str = self::remove_invisible_characters($str);
1575
1576
    if ($normalize_whitespace === true) {
1577
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1578
    }
1579
1580
    if ($normalize_msword === true) {
1581
      $str = self::normalize_msword($str);
1582
    }
1583
1584
    if ($remove_bom === true) {
1585
      $str = self::removeBOM($str);
1586 7
    }
1587
1588 7
    return $str;
1589 2
  }
1590
1591
  /**
1592
   * Clean-up a and show only printable UTF-8 chars at the end.
1593 5
   *
1594
   * @param string|false $str
1595 5
   *
1596
   * @return string
1597
   */
1598
  public static function cleanup($str)
1599
  {
1600
    $str = (string)$str;
1601
1602
    if (!isset($str[0])) {
1603
      return '';
1604
    }
1605
1606
    // init
1607 1
    self::checkForSupport();
1608
1609 1
    // fixed ISO <-> UTF-8 Errors
1610
    $str = self::fix_simple_utf8($str);
1611
1612
    // remove all none UTF-8 symbols
1613
    // && remove diamond question mark (�)
1614
    // && remove remove invisible characters (e.g. "\0")
1615
    // && remove BOM
1616
    // && normalize whitespace chars (but keep non-breaking-spaces)
1617
    $str = self::clean($str, true, true, false, true);
1618
1619
    return (string)$str;
1620
  }
1621 2
1622
  /**
1623 2
   * Accepts a string and returns an array of Unicode code points.
1624
   *
1625
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
1626
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
1627
   *                          default, code points will be returned as integers.
1628
   *
1629
   * @return   array The array of code points
1630
   */
1631
  public static function codepoints($arg, $u_style = false)
1632
  {
1633 3
    if (is_string($arg)) {
1634
      $arg = self::split($arg);
1635 3
    }
1636
1637
    $arg = array_map(
1638
        array(
1639
            '\\voku\\helper\\UTF8',
1640
            'ord',
1641
        ),
1642
        $arg
1643
    );
1644
1645
    if ($u_style) {
1646
      $arg = array_map(
1647
          array(
1648 10
              '\\voku\\helper\\UTF8',
1649
              'int_to_hex',
1650 10
          ),
1651 10
          $arg
1652 10
      );
1653
    }
1654 10
1655 1
    return $arg;
1656 1
  }
1657 1
1658
  /**
1659 10
   * Returns count of characters used in a string.
1660
   *
1661 10
   * @param    string $str The input string.
1662
   *
1663 10
   * @return   array An associative array of Character as keys and
1664 1
   *           their count as values.
1665 1
   */
1666
  public static function count_chars($str) // there is no $mode parameters
1667
  {
1668 10
    $array = array_count_values(self::split($str));
1669 10
1670
    ksort($array);
1671 10
1672
    return $array;
1673
  }
1674
1675
  /**
1676
   * Get a UTF-8 character from its decimal code representation.
1677
   *
1678
   * @param   int $code Code.
1679
   *
1680
   * @return  string
1681
   */
1682
  public static function decimal_to_chr($code)
1683
  {
1684
    self::checkForSupport();
1685
1686
    return mb_convert_encoding(
1687 19
        '&#x' . dechex($code) . ';',
1688
        'UTF-8',
1689 19
        'HTML-ENTITIES'
1690
    );
1691 19
  }
1692 5
1693
  /**
1694
   * Encode to UTF8 or LATIN1.
1695
   *
1696 17
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1697
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1698 17
   *
1699
   * @param string $encodingLabel ISO-8859-1 || UTF-8
1700
   * @param string $str
1701
   *
1702
   * @return false|string Will return false on error.
1703
   */
1704
  public static function encode($encodingLabel, $str)
1705
  {
1706
    $encodingLabel = self::normalizeEncoding($encodingLabel);
1707
1708
    if ($encodingLabel === 'UTF-8') {
1709
      return self::to_utf8($str);
1710 1
    }
1711
1712 1
    if ($encodingLabel === 'ISO-8859-1') {
1713
      return self::to_latin1($str);
1714 1
    }
1715 1
1716
    return false;
1717
  }
1718 1
1719
  /**
1720 1
   * Reads entire file into a string.
1721
   *
1722 1
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
1723 1
   *
1724 1
   * @link http://php.net/manual/en/function.file-get-contents.php
1725 1
   *
1726
   * @param string   $filename      <p>
1727 1
   *                                Name of the file to read.
1728 1
   *                                </p>
1729 1
   * @param int      $flags         [optional] <p>
1730
   *                                Prior to PHP 6, this parameter is called
1731 1
   *                                use_include_path and is a bool.
1732
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1733
   *                                to trigger include path
1734
   *                                search.
1735
   *                                </p>
1736
   *                                <p>
1737
   *                                The value of flags can be any combination of
1738
   *                                the following flags (with some restrictions), joined with the
1739
   *                                binary OR (|)
1740
   *                                operator.
1741 8
   *                                </p>
1742
   *                                <p>
1743 8
   *                                <table>
1744 8
   *                                Available flags
1745
   *                                <tr valign="top">
1746 8
   *                                <td>Flag</td>
1747
   *                                <td>Description</td>
1748 8
   *                                </tr>
1749 2
   *                                <tr valign="top">
1750
   *                                <td>
1751
   *                                FILE_USE_INCLUDE_PATH
1752 8
   *                                </td>
1753 1
   *                                <td>
1754 1
   *                                Search for filename in the include directory.
1755 1
   *                                See include_path for more
1756
   *                                information.
1757 8
   *                                </td>
1758
   *                                </tr>
1759
   *                                <tr valign="top">
1760
   *                                <td>
1761
   *                                FILE_TEXT
1762
   *                                </td>
1763
   *                                <td>
1764
   *                                As of PHP 6, the default encoding of the read
1765
   *                                data is UTF-8. You can specify a different encoding by creating a
1766
   *                                custom context or by changing the default using
1767
   *                                stream_default_encoding. This flag cannot be
1768
   *                                used with FILE_BINARY.
1769
   *                                </td>
1770
   *                                </tr>
1771
   *                                <tr valign="top">
1772
   *                                <td>
1773
   *                                FILE_BINARY
1774
   *                                </td>
1775
   *                                <td>
1776
   *                                With this flag, the file is read in binary mode. This is the default
1777
   *                                setting and cannot be used with FILE_TEXT.
1778
   *                                </td>
1779
   *                                </tr>
1780
   *                                </table>
1781
   *                                </p>
1782
   * @param resource $context       [optional] <p>
1783
   *                                A valid context resource created with
1784
   *                                stream_context_create. If you don't need to use a
1785
   *                                custom context, you can skip this parameter by &null;.
1786
   *                                </p>
1787
   * @param int      $offset        [optional] <p>
1788
   *                                The offset where the reading starts.
1789
   *                                </p>
1790
   * @param int      $maxlen        [optional] <p>
1791
   *                                Maximum length of data read. The default is to read until end
1792
   *                                of file is reached.
1793
   *                                </p>
1794
   * @param int      $timeout
1795
   *
1796
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
1797
   *                                default utf-8 chars
1798
   *
1799
   * @return string The function returns the read data or false on failure.
1800
   */
1801
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1802
  {
1803
    // init
1804
    $timeout = (int)$timeout;
1805
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1806
1807
    if ($timeout && $context === null) {
1808
      $context = stream_context_create(
1809
          array(
1810
              'http' =>
1811
                  array(
1812
                      'timeout' => $timeout,
1813
                  ),
1814
          )
1815
      );
1816
    }
1817
1818
    if (is_int($maxlen)) {
1819
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1820
    } else {
1821
      $data = file_get_contents($filename, $flags, $context, $offset);
1822
    }
1823
1824
    // return false on error
1825
    if ($data === false) {
1826
      return false;
1827
    }
1828
1829
    if ($convertToUtf8 === true) {
1830 14
      self::checkForSupport();
1831
1832 14
      $encoding = self::str_detect_encoding($data);
1833
      if ($encoding && $encoding != 'UTF-8') {
0 ignored issues
show
Bug Best Practice introduced by
The expression $encoding of type string|false is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1834 14
1835 3
        $data = mb_convert_encoding(
1836
            $data,
1837
            'UTF-8',
1838 14
            self::normalizeEncoding($encoding)
1839 4
        );
1840
      }
1841
1842 14
      $data = self::cleanup($data);
1843 2
    }
1844 2
1845 2
    // clean utf-8 string
1846
    return $data;
1847
  }
1848 2
1849
  /**
1850
   * Checks if a file starts with BOM character.
1851 14
   *
1852
   * @param    string $file_path Path to a valid file.
1853
   *
1854 14
   * @return   bool True if the file has BOM at the start, False otherwise.
1855 14
   */
1856 14
  public static function file_has_bom($file_path)
1857
  {
1858 14
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
1859 14
  }
1860
1861 14
  /**
1862
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1863
   *
1864
   * @param mixed  $var
1865
   * @param int    $normalization_form
1866
   * @param string $leading_combining
1867
   *
1868
   * @return mixed
1869
   */
1870
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
1871
  {
1872
    switch (gettype($var)) {
1873 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1874
        foreach ($var as $k => $v) {
1875
          /** @noinspection AlterInForeachInspection */
1876
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1877
        }
1878
        break;
1879 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1880
        foreach ($var as $k => $v) {
1881
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
1882
        }
1883
        break;
1884
      case 'string':
1885 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1886
          // Workaround https://bugs.php.net/65732
1887
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1888
        }
1889 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1890 20
          if (Normalizer::isNormalized($var, $normalization_form)) {
1891
            $n = '-';
1892 20
          } else {
1893 2
            $n = Normalizer::normalize($var, $normalization_form);
1894
1895 2
            if (isset($n[0])) {
1896 2
              $var = $n;
1897
            } else {
1898 2
              $var = self::encode('UTF-8', $var);
1899
            }
1900
1901 20
          }
1902
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
1903 20
            // Prevent leading combining chars
1904 9
            // for NFC-safe concatenations.
1905
            $var = $leading_combining . $var;
1906
          }
1907 20
        }
1908
        break;
1909 20
    }
1910 20
1911 20
    return $var;
1912
  }
1913 20
1914 20
  /**
1915 20
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1916 20
   *
1917
   * @param int    $type
1918 20
   * @param string $var
1919
   * @param int    $filter
1920 18
   * @param mixed  $option
1921 17
   *
1922 17
   * @return mixed
1923 17
   */
1924 5 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1925 5
  {
1926 5
    if (4 > func_num_args()) {
1927
      $var = filter_input($type, $var, $filter);
1928
    } else {
1929 20
      $var = filter_input($type, $var, $filter, $option);
1930
    }
1931 18
1932 14
    return self::filter($var);
1933 14
  }
1934 14
1935 8
  /**
1936 8
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1937 8
   *
1938
   * @param int   $type
1939
   * @param mixed $definition
1940 19
   * @param bool  $add_empty
1941
   *
1942 7
   * @return mixed
1943 1
   */
1944 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1945 1
  {
1946 6
    if (2 > func_num_args()) {
1947 6
      $a = filter_input_array($type);
1948 6
    } else {
1949
      $a = filter_input_array($type, $definition, $add_empty);
1950
    }
1951 7
1952 6
    return self::filter($a);
1953 6
  }
1954 6
1955
  /**
1956
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1957 20
   *
1958
   * @param mixed $var
1959 2
   * @param int   $filter
1960 2
   * @param mixed $option
1961
   *
1962
   * @return mixed
1963 2
   */
1964 2 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1965 2
  {
1966
    if (3 > func_num_args()) {
1967
      $var = filter_var($var, $filter);
1968 2
    } else {
1969 18
      $var = filter_var($var, $filter, $option);
1970
    }
1971 20
1972
    return self::filter($var);
1973 20
  }
1974
1975
  /**
1976 20
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1977 20
   *
1978
   * @param array $data
1979 3
   * @param mixed $definition
1980 20
   * @param bool  $add_empty
1981
   *
1982 20
   * @return mixed
1983
   */
1984 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1985 20
  {
1986 20
    if (2 > func_num_args()) {
1987 20
      $a = filter_var_array($data);
1988 2
    } else {
1989 20
      $a = filter_var_array($data, $definition, $add_empty);
1990
    }
1991 20
1992
    return self::filter($a);
1993 20
  }
1994
1995
  /**
1996
   * Checks if the number of Unicode characters in a string are not
1997
   * more than the specified integer.
1998
   *
1999
   * @param    string $str      The original string to be checked.
2000
   * @param    int    $box_size The size in number of chars to be checked against string.
2001
   *
2002
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2003
   */
2004
  public static function fits_inside($str, $box_size)
2005
  {
2006
    return (self::strlen($str) <= $box_size);
2007
  }
2008
2009
  /**
2010
   * Fixing a broken UTF-8 string.
2011
   *
2012
   * @param string $str
2013
   *
2014
   * @return string
2015
   */
2016
  public static function fix_simple_utf8($str)
2017
  {
2018
    static $brokenUtf8ToUtf8Keys = null;
2019
    static $brokenUtf8ToUtf8Values = null;
2020
2021
    $str = (string)$str;
2022
2023
    if (!isset($str[0])) {
2024
      return '';
2025
    }
2026
2027
    if ($brokenUtf8ToUtf8Keys === null) {
2028
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2029
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2030
    }
2031
2032
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2033
  }
2034
2035
  /**
2036
   * Fix a double (or multiple) encoded UTF8 string.
2037
   *
2038
   * @param array|string $str
2039
   *
2040
   * @return string
2041
   */
2042
  public static function fix_utf8($str)
2043
  {
2044
    if (is_array($str)) {
2045
2046
      foreach ($str as $k => $v) {
2047
        /** @noinspection AlterInForeachInspection */
2048
        $str[$k] = self::fix_utf8($v);
2049
      }
2050
2051
      return $str;
2052
    }
2053
2054 1
    $last = '';
2055
    while ($last <> $str) {
2056 1
      $last = $str;
2057
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2057 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2058 1
    }
2059
2060
    return $str;
2061
  }
2062
2063
  /**
2064
   * Get character of a specific character.
2065
   *
2066
   * @param   string $chr Character.
2067
   *
2068
   * @return  string 'RTL' or 'LTR'
2069
   */
2070
  public static function getCharDirection($chr)
2071
  {
2072
    $c = static::chr_to_decimal($chr);
2073
2074
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2075
      return 'LTR';
2076
    }
2077
2078
    if (0x85e >= $c) {
2079
2080
      if (0x5be === $c ||
2081
          0x5c0 === $c ||
2082
          0x5c3 === $c ||
2083
          0x5c6 === $c ||
2084
          (0x5d0 <= $c && 0x5ea >= $c) ||
2085
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2086
          0x608 === $c ||
2087
          0x60b === $c ||
2088
          0x60d === $c ||
2089
          0x61b === $c ||
2090
          (0x61e <= $c && 0x64a >= $c) ||
2091
          (0x66d <= $c && 0x66f >= $c) ||
2092
          (0x671 <= $c && 0x6d5 >= $c) ||
2093
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2094
          (0x6ee <= $c && 0x6ef >= $c) ||
2095
          (0x6fa <= $c && 0x70d >= $c) ||
2096
          0x710 === $c ||
2097
          (0x712 <= $c && 0x72f >= $c) ||
2098
          (0x74d <= $c && 0x7a5 >= $c) ||
2099
          0x7b1 === $c ||
2100
          (0x7c0 <= $c && 0x7ea >= $c) ||
2101
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2102
          0x7fa === $c ||
2103
          (0x800 <= $c && 0x815 >= $c) ||
2104
          0x81a === $c ||
2105
          0x824 === $c ||
2106
          0x828 === $c ||
2107
          (0x830 <= $c && 0x83e >= $c) ||
2108
          (0x840 <= $c && 0x858 >= $c) ||
2109
          0x85e === $c
2110
      ) {
2111
        return 'RTL';
2112
      }
2113
2114
    } elseif (0x200f === $c) {
2115
2116
      return 'RTL';
2117
2118
    } elseif (0xfb1d <= $c) {
2119
2120
      if (0xfb1d === $c ||
2121
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2122
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2123
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2124
          0xfb3e === $c ||
2125
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2126
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2127
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2128
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2129
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2130
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2131
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2132
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2133
          (0xfe76 <= $c && 0xfefc >= $c) ||
2134
          (0x10800 <= $c && 0x10805 >= $c) ||
2135
          0x10808 === $c ||
2136
          (0x1080a <= $c && 0x10835 >= $c) ||
2137
          (0x10837 <= $c && 0x10838 >= $c) ||
2138
          0x1083c === $c ||
2139
          (0x1083f <= $c && 0x10855 >= $c) ||
2140
          (0x10857 <= $c && 0x1085f >= $c) ||
2141 2
          (0x10900 <= $c && 0x1091b >= $c) ||
2142
          (0x10920 <= $c && 0x10939 >= $c) ||
2143
          0x1093f === $c ||
2144 2
          0x10a00 === $c ||
2145 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2146
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2147 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2148 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2149
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2150
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2151
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2152 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2153 2
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2154
          (0x10b78 <= $c && 0x10b7f >= $c)
2155 2
      ) {
2156 2
        return 'RTL';
2157
      }
2158 2
    }
2159 1
2160 1
    return 'LTR';
2161 2
  }
2162
2163
  /**
2164
   * get data from "/data/*.ser"
2165 2
   *
2166
   * @param string $file
2167
   *
2168
   * @return bool|string|array|int false on error
2169 2
   */
2170 2
  protected static function getData($file)
2171
  {
2172 2
    $file = __DIR__ . '/data/' . $file . '.ser';
2173 2
    if (file_exists($file)) {
2174 1
      return unserialize(file_get_contents($file));
2175 1
    } else {
2176
      return false;
2177 2
    }
2178 2
  }
2179
2180
  /**
2181 2
   * Creates a random string of UTF-8 characters.
2182
   *
2183
   * @param    int $len The length of string in characters.
2184
   *
2185
   * @return   string String consisting of random characters.
2186
   */
2187
  public static function hash($len = 8)
2188
  {
2189
    static $chars = array();
2190
    static $chars_len = null;
2191 2
2192
    if ($len <= 0) {
2193
      return '';
2194 2
    }
2195
2196
    // init
2197
    self::checkForSupport();
2198 2
2199
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2200
      if (self::$support['pcre_utf8'] === true) {
2201
        $chars = array_map(
2202
            array(
2203
                '\\voku\\helper\\UTF8',
2204 2
                'chr',
2205
            ),
2206
            range(48, 79)
2207
        );
2208
2209
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2210 2
2211
        $chars = array_values(array_filter($chars));
2212
      } else {
2213
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2214
      }
2215
2216 2
      $chars_len = count($chars);
2217
    }
2218
2219
    $hash = '';
2220
2221
    for (; $len; --$len) {
2222 2
      $hash .= $chars[mt_rand() % $chars_len];
2223
    }
2224
2225
    return $hash;
2226 2
  }
2227 2
2228
  /**
2229
   * Converts hexadecimal U+xxxx code point representation to Integer.
2230
   *
2231 2
   * INFO: opposite to UTF8::int_to_hex( )
2232 2
   *
2233 2
   * @param    string $str The hexadecimal code point representation.
2234 2
   *
2235 2
   * @return   int The code point, or 0 on failure.
2236 2
   */
2237
  public static function hex_to_int($str)
2238 2
  {
2239 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2240 1
      return intval($match[1], 16);
2241 1
    }
2242 1
2243 1
    return 0;
2244
  }
2245 1
2246
  /**
2247
   * Converts a UTF-8 string to a series of HTML numbered entities.
2248 1
   *
2249
   * e.g.: &#123;&#39;&#1740;
2250 2
   *
2251
   * @param  string $str The Unicode string to be encoded as numbered entities.
2252
   *
2253
   * @return string HTML numbered entities.
2254 2
   */
2255
  public static function html_encode($str)
2256
  {
2257
    return implode(
2258
        array_map(
2259
            array(
2260
                '\\voku\\helper\\UTF8',
2261
                'single_chr_html_encode',
2262
            ),
2263
            self::split($str)
2264 3
        )
2265
    );
2266
  }
2267 3
2268
  /**
2269
   * UTF-8 version of html_entity_decode()
2270 3
   *
2271
   * The reason we are not using html_entity_decode() by itself is because
2272 3
   * while it is not technically correct to leave out the semicolon
2273 3
   * at the end of an entity most browsers will still interpret the entity
2274 3
   * correctly. html_entity_decode() does not convert entities without
2275 3
   * semicolons, so we are left with our own little solution here. Bummer.
2276 2
   *
2277
   * Convert all HTML entities to their applicable characters
2278 3
   *
2279
   * @link http://php.net/manual/en/function.html-entity-decode.php
2280
   *
2281
   * @param string $str      <p>
2282
   *                         The input string.
2283
   *                         </p>
2284
   * @param int    $flags    [optional] <p>
2285
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2286
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2287
   *                         <table>
2288
   *                         Available <i>flags</i> constants
2289 1
   *                         <tr valign="top">
2290
   *                         <td>Constant Name</td>
2291 1
   *                         <td>Description</td>
2292 1
   *                         </tr>
2293
   *                         <tr valign="top">
2294 1
   *                         <td><b>ENT_COMPAT</b></td>
2295 1
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2296 1
   *                         </tr>
2297 1
   *                         <tr valign="top">
2298 1
   *                         <td><b>ENT_QUOTES</b></td>
2299 1
   *                         <td>Will convert both double and single quotes.</td>
2300 1
   *                         </tr>
2301 1
   *                         <tr valign="top">
2302 1
   *                         <td><b>ENT_NOQUOTES</b></td>
2303 1
   *                         <td>Will leave both double and single quotes unconverted.</td>
2304 1
   *                         </tr>
2305 1
   *                         <tr valign="top">
2306 1
   *                         <td><b>ENT_HTML401</b></td>
2307 1
   *                         <td>
2308
   *                         Handle code as HTML 4.01.
2309 1
   *                         </td>
2310 1
   *                         </tr>
2311 1
   *                         <tr valign="top">
2312 1
   *                         <td><b>ENT_XML1</b></td>
2313 1
   *                         <td>
2314 1
   *                         Handle code as XML 1.
2315 1
   *                         </td>
2316 1
   *                         </tr>
2317 1
   *                         <tr valign="top">
2318 1
   *                         <td><b>ENT_XHTML</b></td>
2319 1
   *                         <td>
2320 1
   *                         Handle code as XHTML.
2321 1
   *                         </td>
2322 1
   *                         </tr>
2323
   *                         <tr valign="top">
2324 1
   *                         <td><b>ENT_HTML5</b></td>
2325 1
   *                         <td>
2326 1
   *                         Handle code as HTML 5.
2327
   *                         </td>
2328 1
   *                         </tr>
2329
   *                         </table>
2330
   *                         </p>
2331
   * @param string $encoding [optional] <p>
2332 1
   *                         Encoding to use.
2333
   *                         </p>
2334 1
   *
2335
   * @return string the decoded string.
2336
   */
2337
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2338
  {
2339
    $str = (string)$str;
2340
2341
    if (!isset($str[0])) {
2342
      return '';
2343
    }
2344
2345 2
    if (strpos($str, '&') === false) {
2346
      return $str;
2347 2
    }
2348
2349 2
    if ($flags === null) {
2350
      if (Bootup::is_php('5.4') === true) {
2351 2
        $flags = ENT_COMPAT | ENT_HTML5;
2352
      } else {
2353
        $flags = ENT_COMPAT;
2354
      }
2355
    }
2356
2357
    do {
2358
      $str_compare = $str;
2359
2360
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2361 1
2362
      // decode numeric & UTF16 two byte entities
2363 1
      $str = html_entity_decode(
2364 1
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2365
          $flags,
2366 1
          $encoding
2367 1
      );
2368 1
2369 1
    } while ($str_compare !== $str);
2370 1
2371 1
    return $str;
2372 1
  }
2373 1
2374 1
  /**
2375
   * Callback function for preg_replace_callback use.
2376
   *
2377 1
   * @param  array $matches PREG matches
2378 1
   *
2379 1
   * @return string
2380
   */
2381 1
  protected static function entityCallback(&$matches)
2382 1
  {
2383 1
    self::checkForSupport();
2384
2385
    $return = mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2386
2387
    if ($return === "'") {
2388
      return '&#x27;';
2389
    }
2390
2391
    return $return;
2392
  }
2393
2394
  /**
2395
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2396 1
   *
2397
   * @link http://php.net/manual/en/function.htmlentities.php
2398
   *
2399
   * @param string $str           <p>
2400
   *                              The input string.
2401
   *                              </p>
2402
   * @param int    $flags         [optional] <p>
2403
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2404 1
   *                              invalid code unit sequences and the used document type. The default is
2405
   *                              ENT_COMPAT | ENT_HTML401.
2406 1
   *                              <table>
2407
   *                              Available <i>flags</i> constants
2408
   *                              <tr valign="top">
2409
   *                              <td>Constant Name</td>
2410
   *                              <td>Description</td>
2411
   *                              </tr>
2412
   *                              <tr valign="top">
2413
   *                              <td><b>ENT_COMPAT</b></td>
2414
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2415
   *                              </tr>
2416 5
   *                              <tr valign="top">
2417
   *                              <td><b>ENT_QUOTES</b></td>
2418 5
   *                              <td>Will convert both double and single quotes.</td>
2419
   *                              </tr>
2420 5
   *                              <tr valign="top">
2421
   *                              <td><b>ENT_NOQUOTES</b></td>
2422
   *                              <td>Will leave both double and single quotes unconverted.</td>
2423
   *                              </tr>
2424
   *                              <tr valign="top">
2425 5
   *                              <td><b>ENT_IGNORE</b></td>
2426
   *                              <td>
2427
   *                              Silently discard invalid code unit sequences instead of returning
2428 5
   *                              an empty string. Using this flag is discouraged as it
2429
   *                              may have security implications.
2430
   *                              </td>
2431
   *                              </tr>
2432
   *                              <tr valign="top">
2433
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2434
   *                              <td>
2435 5
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2436
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2437 5
   *                              </td>
2438
   *                              </tr>
2439
   *                              <tr valign="top">
2440
   *                              <td><b>ENT_DISALLOWED</b></td>
2441
   *                              <td>
2442
   *                              Replace invalid code points for the given document type with a
2443
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2444
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2445
   *                              instance, to ensure the well-formedness of XML documents with
2446
   *                              embedded external content.
2447
   *                              </td>
2448
   *                              </tr>
2449
   *                              <tr valign="top">
2450
   *                              <td><b>ENT_HTML401</b></td>
2451
   *                              <td>
2452
   *                              Handle code as HTML 4.01.
2453
   *                              </td>
2454
   *                              </tr>
2455
   *                              <tr valign="top">
2456
   *                              <td><b>ENT_XML1</b></td>
2457
   *                              <td>
2458
   *                              Handle code as XML 1.
2459
   *                              </td>
2460
   *                              </tr>
2461
   *                              <tr valign="top">
2462
   *                              <td><b>ENT_XHTML</b></td>
2463
   *                              <td>
2464
   *                              Handle code as XHTML.
2465
   *                              </td>
2466
   *                              </tr>
2467
   *                              <tr valign="top">
2468
   *                              <td><b>ENT_HTML5</b></td>
2469
   *                              <td>
2470
   *                              Handle code as HTML 5.
2471
   *                              </td>
2472
   *                              </tr>
2473
   *                              </table>
2474
   *                              </p>
2475
   * @param string $encoding      [optional] <p>
2476
   *                              Like <b>htmlspecialchars</b>,
2477
   *                              <b>htmlentities</b> takes an optional third argument
2478
   *                              <i>encoding</i> which defines encoding used in
2479
   *                              conversion.
2480
   *                              Although this argument is technically optional, you are highly
2481
   *                              encouraged to specify the correct value for your code.
2482
   *                              </p>
2483
   * @param bool   $double_encode [optional] <p>
2484
   *                              When <i>double_encode</i> is turned off PHP will not
2485
   *                              encode existing html entities. The default is to convert everything.
2486
   *                              </p>
2487
   *
2488 1
   *
2489
   * @return string the encoded string.
2490 1
   * </p>
2491
   * <p>
2492 1
   * If the input <i>string</i> contains an invalid code unit
2493
   * sequence within the given <i>encoding</i> an empty string
2494
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2495
   * <b>ENT_SUBSTITUTE</b> flags are set.
2496
   */
2497
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2498
  {
2499
    return htmlentities($str, $flags, $encoding, $double_encode);
2500
  }
2501
2502
  /**
2503
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
2504
   *
2505
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2506
   *
2507
   * @param string $str           <p>
2508
   *                              The string being converted.
2509
   *                              </p>
2510
   * @param int    $flags         [optional] <p>
2511
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2512
   *                              invalid code unit sequences and the used document type. The default is
2513
   *                              ENT_COMPAT | ENT_HTML401.
2514
   *                              <table>
2515
   *                              Available <i>flags</i> constants
2516
   *                              <tr valign="top">
2517
   *                              <td>Constant Name</td>
2518
   *                              <td>Description</td>
2519
   *                              </tr>
2520
   *                              <tr valign="top">
2521
   *                              <td><b>ENT_COMPAT</b></td>
2522
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2523 1
   *                              </tr>
2524
   *                              <tr valign="top">
2525 1
   *                              <td><b>ENT_QUOTES</b></td>
2526
   *                              <td>Will convert both double and single quotes.</td>
2527 1
   *                              </tr>
2528
   *                              <tr valign="top">
2529
   *                              <td><b>ENT_NOQUOTES</b></td>
2530
   *                              <td>Will leave both double and single quotes unconverted.</td>
2531
   *                              </tr>
2532
   *                              <tr valign="top">
2533
   *                              <td><b>ENT_IGNORE</b></td>
2534
   *                              <td>
2535
   *                              Silently discard invalid code unit sequences instead of returning
2536
   *                              an empty string. Using this flag is discouraged as it
2537
   *                              may have security implications.
2538
   *                              </td>
2539 1
   *                              </tr>
2540
   *                              <tr valign="top">
2541 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2542 1
   *                              <td>
2543 1
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2544 1
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2545
   *                              </td>
2546
   *                              </tr>
2547 1
   *                              <tr valign="top">
2548
   *                              <td><b>ENT_DISALLOWED</b></td>
2549
   *                              <td>
2550
   *                              Replace invalid code points for the given document type with a
2551
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2552
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2553
   *                              instance, to ensure the well-formedness of XML documents with
2554
   *                              embedded external content.
2555
   *                              </td>
2556
   *                              </tr>
2557
   *                              <tr valign="top">
2558
   *                              <td><b>ENT_HTML401</b></td>
2559 6
   *                              <td>
2560
   *                              Handle code as HTML 4.01.
2561 6
   *                              </td>
2562 6
   *                              </tr>
2563 1
   *                              <tr valign="top">
2564
   *                              <td><b>ENT_XML1</b></td>
2565
   *                              <td>
2566 1
   *                              Handle code as XML 1.
2567 1
   *                              </td>
2568 6
   *                              </tr>
2569 1
   *                              <tr valign="top">
2570 1
   *                              <td><b>ENT_XHTML</b></td>
2571 1
   *                              <td>
2572 1
   *                              Handle code as XHTML.
2573 6
   *                              </td>
2574 6
   *                              </tr>
2575
   *                              <tr valign="top">
2576
   *                              <td><b>ENT_HTML5</b></td>
2577
   *                              <td>
2578 6
   *                              Handle code as HTML 5.
2579 6
   *                              </td>
2580 1
   *                              </tr>
2581 1
   *                              </table>
2582 6
   *                              </p>
2583
   * @param string $encoding      [optional] <p>
2584 6
   *                              Defines encoding used in conversion.
2585 4
   *                              </p>
2586 4
   *                              <p>
2587 4
   *                              For the purposes of this function, the encodings
2588
   *                              ISO-8859-1, ISO-8859-15,
2589
   *                              UTF-8, cp866,
2590
   *                              cp1251, cp1252, and
2591 6
   *                              KOI8-R are effectively equivalent, provided the
2592
   *                              <i>string</i> itself is valid for the encoding, as
2593
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2594
   *                              the same positions in all of these encodings.
2595
   *                              </p>
2596 6
   * @param bool   $double_encode [optional] <p>
2597 6
   *                              When <i>double_encode</i> is turned off PHP will not
2598 6
   *                              encode existing html entities, the default is to convert everything.
2599
   *                              </p>
2600 6
   *
2601
   * @return string The converted string.
2602
   * </p>
2603
   * <p>
2604
   * If the input <i>string</i> contains an invalid code unit
2605
   * sequence within the given <i>encoding</i> an empty string
2606
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2607
   * <b>ENT_SUBSTITUTE</b> flags are set.
2608
   */
2609
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2610
  {
2611
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2612
  }
2613
2614 11
  /**
2615
   * checks whether iconv is available on the server
2616 11
   *
2617
   * @return   bool True if available, False otherwise
2618 11
   */
2619 11
  public static function iconv_loaded()
2620
  {
2621
    return extension_loaded('iconv') ? true : false;
2622 1
  }
2623 1
2624
  /**
2625
   * Converts Integer to hexadecimal U+xxxx code point representation.
2626
   *
2627
   * @param    int    $int The integer to be converted to hexadecimal code point.
2628
   * @param    string $pfix
2629
   *
2630
   * @return   string The code point, or empty string on failure.
2631
   */
2632
  public static function int_to_hex($int, $pfix = 'U+')
2633
  {
2634
    if (ctype_digit((string)$int)) {
2635
      $hex = dechex((int)$int);
2636 11
2637
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2638 11
2639 11
      return $pfix . $hex;
2640
    }
2641 11
2642 11
    return '';
2643 11
  }
2644 11
2645 11
  /**
2646 11
   * checks whether intl is available on the server
2647 11
   *
2648 11
   * @return   bool True if available, False otherwise
2649 11
   */
2650 11
  public static function intl_loaded()
2651 11
  {
2652
    return extension_loaded('intl') ? true : false;
2653
  }
2654
2655 11
  /**
2656
   * alias for "UTF8::is_ascii()"
2657
   *
2658
   * @param string $str
2659
   *
2660
   * @return boolean
2661
   */
2662
  public static function isAscii($str)
2663
  {
2664
    return self::is_ascii($str);
2665 2
  }
2666
2667 2
  /**
2668
   * alias for "UTF8::is_base64"
2669
   *
2670
   * @param string $str
2671
   *
2672
   * @return bool
2673
   */
2674
  public static function isBase64($str)
2675
  {
2676
    return self::is_base64($str);
2677 2
  }
2678
2679 2
  /**
2680
   * alias for "UTF8::is_bom"
2681 1
   *
2682
   * @param string $utf8_chr
2683 1
   *
2684 1
   * @return boolean
2685
   */
2686 1
  public static function isBom($utf8_chr)
2687 2
  {
2688 2
    return self::is_bom($utf8_chr);
2689
  }
2690
2691
  /**
2692
   * Try to check if a string is a json-string...
2693
   *
2694
   * @param $str
2695
   *
2696
   * @return bool
2697
   *
2698
   * @deprecated
2699
   */
2700
  public static function isJson($str)
2701 6
  {
2702
    $str = (string)$str;
2703 6
2704 6
    if (!isset($str[0])) {
2705
      return false;
2706 6
    }
2707
2708 6
    if (
2709 5
        is_object(json_decode($str))
2710
        &&
2711
        json_last_error() == JSON_ERROR_NONE
2712
    ) {
2713 6
      return true;
2714
    } else {
2715 6
      return false;
2716
    }
2717 6
  }
2718 1
2719 1
  /**
2720 1
   * alias for "UTF8::is_utf8"
2721
   *
2722 6
   * @param string $str
2723
   *
2724
   * @return bool
2725
   */
2726
  public static function isUtf8($str)
2727
  {
2728
    return self::is_utf8($str);
2729
  }
2730
2731
  /**
2732
   * Checks if a string is 7 bit ASCII.
2733
   *
2734
   * @param    string $str The string to check.
2735
   *
2736
   * @return   bool <strong>true</strong> if it is ASCII<br />
2737
   *                <strong>false</strong> otherwise
2738
   */
2739
  public static function is_ascii($str)
2740
  {
2741
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2742
  }
2743
2744
  /**
2745
   * Returns true if the string is base64 encoded, false otherwise.
2746
   *
2747
   * @param string $str
2748
   *
2749
   * @return bool Whether or not $str is base64 encoded
2750
   */
2751
  public static function is_base64($str)
2752
  {
2753 6
    $str = (string)$str;
2754
2755 6
    if (!isset($str[0])) {
2756
      return false;
2757 6
    }
2758 6
2759
    if (base64_encode(base64_decode($str, true)) === $str) {
2760
      return true;
2761 5
    } else {
2762 5
      return false;
2763
    }
2764 5
  }
2765 1
2766 1
  /**
2767 1
   * Check if the input is binary... (is look like a hack)
2768
   *
2769 5
   * @param string $input
2770
   *
2771
   * @return bool
2772
   */
2773
  public static function is_binary($input)
2774
  {
2775
2776
    $testLength = strlen($input);
2777
2778
    if (
2779
        preg_match('~^[01]+$~', $input)
2780
        ||
2781
        substr_count($input, "\x00") > 0
2782
        ||
2783
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2784
    ) {
2785
      return true;
2786
    } else {
2787
      return false;
2788
    }
2789
  }
2790
2791
  /**
2792
   * Check if the file is binary.
2793
   *
2794
   * @param string $file
2795
   *
2796
   * @return boolean
2797
   */
2798
  public static function is_binary_file($file)
2799
  {
2800
    try {
2801
      $fp = fopen($file, 'r');
2802
      $block = fread($fp, 512);
2803
      fclose($fp);
2804
    } catch (\Exception $e) {
2805
      $block = '';
2806
    }
2807
2808
    return self::is_binary($block);
2809 1
  }
2810
2811 1
  /**
2812
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
2813 1
   *
2814 1
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2815 1
   *
2816
   * @param    string $utf8_chr The input string.
2817
   *
2818
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
2819 1
   */
2820
  public static function is_bom($utf8_chr)
2821
  {
2822
    return ($utf8_chr === self::bom());
2823
  }
2824
2825
  /**
2826
   * Check if the string is UTF-16.
2827
   *
2828
   * @param string $str
2829
   *
2830
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2831
   */
2832 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2833
  {
2834
    if (self::is_binary($str)) {
2835
      self::checkForSupport();
2836
2837
      $maybeUTF16LE = 0;
2838
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2839
      if ($test !== false && strlen($test) > 1) {
2840
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2841
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2842
        if ($test3 == $test) {
2843
          $strChars = self::count_chars($str);
2844
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2845
            if (in_array($test3char, $strChars, true) === true) {
2846
              $maybeUTF16LE++;
2847
            }
2848
          }
2849
        }
2850
      }
2851
2852
      $maybeUTF16BE = 0;
2853
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2854
      if ($test !== false && strlen($test) > 1) {
2855
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2856
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2857
        if ($test3 == $test) {
2858 2
          $strChars = self::count_chars($str);
2859
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2860 2
            if (in_array($test3char, $strChars, true) === true) {
2861
              $maybeUTF16BE++;
2862 2
            }
2863 2
          }
2864 2
        }
2865
      }
2866
2867
      if ($maybeUTF16BE != $maybeUTF16LE) {
2868 2
        if ($maybeUTF16LE > $maybeUTF16BE) {
2869
          return 1;
2870
        } else {
2871
          return 2;
2872
        }
2873
      }
2874
2875
    }
2876
2877
    return false;
2878
  }
2879
2880
  /**
2881
   * Check if the string is UTF-32.
2882
   *
2883
   * @param string $str
2884
   *
2885
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2886
   */
2887 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2888
  {
2889
    if (self::is_binary($str)) {
2890
      self::checkForSupport();
2891
2892
      $maybeUTF32LE = 0;
2893
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2894
      if ($test !== false && strlen($test) > 1) {
2895
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2896
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2897
        if ($test3 == $test) {
2898
          $strChars = self::count_chars($str);
2899
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2900
            if (in_array($test3char, $strChars, true) === true) {
2901
              $maybeUTF32LE++;
2902
            }
2903
          }
2904
        }
2905
      }
2906
2907
      $maybeUTF32BE = 0;
2908
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2909
      if ($test !== false && strlen($test) > 1) {
2910
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2911
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2912
        if ($test3 == $test) {
2913
          $strChars = self::count_chars($str);
2914
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2915
            if (in_array($test3char, $strChars, true) === true) {
2916
              $maybeUTF32BE++;
2917
            }
2918
          }
2919
        }
2920
      }
2921
2922
      if ($maybeUTF32BE != $maybeUTF32LE) {
2923
        if ($maybeUTF32LE > $maybeUTF32BE) {
2924
          return 1;
2925
        } else {
2926
          return 2;
2927
        }
2928
      }
2929
2930
    }
2931
2932
    return false;
2933
  }
2934
2935
  /**
2936
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2937
   *
2938
   * @see    http://hsivonen.iki.fi/php-utf8/
2939
   *
2940
   * @param    string $str The string to be checked.
2941
   *
2942
   * @return   bool
2943
   */
2944 8
  public static function is_utf8($str)
2945
  {
2946 8
    $str = (string)$str;
2947 5
2948 5
    if (!isset($str[0])) {
2949 8
      return true;
2950
    }
2951
2952
    if (self::pcre_utf8_support() !== true) {
2953
2954
      // If even just the first character can be matched, when the /u
2955
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2956
      // invalid, nothing at all will match, even if the string contains
2957
      // some valid sequences
2958
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
2959
2960 1
    } else {
2961
2962 1
      $mState = 0; // cached expected number of octets after the current octet
2963 1
      // until the beginning of the next UTF8 character sequence
2964 1
      $mUcs4 = 0; // cached Unicode character
2965
      $mBytes = 1; // cached expected number of octets in the current sequence
2966 1
      $len = strlen($str);
2967
2968
      /** @noinspection ForeachInvariantsInspection */
2969
      for ($i = 0; $i < $len; $i++) {
2970
        $in = ord($str[$i]);
2971
        if ($mState == 0) {
2972
          // When mState is zero we expect either a US-ASCII character or a
2973
          // multi-octet sequence.
2974
          if (0 == (0x80 & $in)) {
2975
            // US-ASCII, pass straight through.
2976
            $mBytes = 1;
2977 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2978
            // First octet of 2 octet sequence.
2979
            $mUcs4 = $in;
2980
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2981
            $mState = 1;
2982 2
            $mBytes = 2;
2983
          } elseif (0xE0 == (0xF0 & $in)) {
2984 2
            // First octet of 3 octet sequence.
2985 2
            $mUcs4 = $in;
2986
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2987
            $mState = 2;
2988 2
            $mBytes = 3;
2989 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2990
            // First octet of 4 octet sequence.
2991
            $mUcs4 = $in;
2992
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2993
            $mState = 3;
2994
            $mBytes = 4;
2995
          } elseif (0xF8 == (0xFC & $in)) {
2996
            /* First octet of 5 octet sequence.
2997
            *
2998 2
            * This is illegal because the encoded codepoint must be either
2999
            * (a) not the shortest form or
3000 2
            * (b) outside the Unicode range of 0-0x10FFFF.
3001 1
            * Rather than trying to resynchronize, we will carry on until the end
3002
            * of the sequence and let the later error handling code catch it.
3003
            */
3004 2
            $mUcs4 = $in;
3005
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3006
            $mState = 4;
3007
            $mBytes = 5;
3008 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3009
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3010
            $mUcs4 = $in;
3011
            $mUcs4 = ($mUcs4 & 1) << 30;
3012
            $mState = 5;
3013
            $mBytes = 6;
3014
          } else {
3015 15
            /* Current octet is neither in the US-ASCII range nor a legal first
3016
             * octet of a multi-octet sequence.
3017 15
             */
3018 2
            return false;
3019
          }
3020
        } else {
3021 14
          // When mState is non-zero, we expect a continuation of the multi-octet
3022 14
          // sequence
3023
          if (0x80 == (0xC0 & $in)) {
3024 14
            // Legal continuation.
3025 2
            $shift = ($mState - 1) * 6;
3026
            $tmp = $in;
3027
            $tmp = ($tmp & 0x0000003F) << $shift;
3028 13
            $mUcs4 |= $tmp;
3029 7
            /**
3030
             * End of the multi-octet sequence. mUcs4 now contains the final
3031
             * Unicode code point to be output
3032 12
             */
3033 8
            if (0 == --$mState) {
3034
              /*
3035
              * Check for illegal sequences and code points.
3036 10
              */
3037
              // From Unicode 3.1, non-shortest form is illegal
3038
              if (
3039
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
3040
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
3041
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
3042
                  (4 < $mBytes) ||
3043
                  // From Unicode 3.2, surrogate characters are illegal.
3044
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
3045
                  // Code points outside the Unicode range are illegal.
3046
                  ($mUcs4 > 0x10FFFF)
3047
              ) {
3048 1
                return false;
3049
              }
3050 1
              // initialize UTF8 cache
3051 1
              $mState = 0;
3052
              $mUcs4 = 0;
3053 1
              $mBytes = 1;
3054 1
            }
3055 1
          } else {
3056 1
            /**
3057 1
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3058 1
             * Incomplete multi-octet sequence.
3059
             */
3060
            return false;
3061
          }
3062
        }
3063
      }
3064
3065
      return true;
3066
    }
3067
  }
3068 1
3069
  /**
3070 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3071
   * Decodes a JSON string
3072
   *
3073
   * @link http://php.net/manual/en/function.json-decode.php
3074
   *
3075
   * @param string $json    <p>
3076
   *                        The <i>json</i> string being decoded.
3077
   *                        </p>
3078
   *                        <p>
3079
   *                        This function only works with UTF-8 encoded strings.
3080
   *                        </p>
3081
   *                        <p>PHP implements a superset of
3082 2
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3083
   *                        only supports these values when they are nested inside an array or an object.
3084 2
   *                        </p>
3085
   * @param bool   $assoc   [optional] <p>
3086
   *                        When <b>TRUE</b>, returned objects will be converted into
3087
   *                        associative arrays.
3088
   *                        </p>
3089
   * @param int    $depth   [optional] <p>
3090
   *                        User specified recursion depth.
3091
   *                        </p>
3092 2
   * @param int    $options [optional] <p>
3093
   *                        Bitmask of JSON decode options. Currently only
3094 2
   *                        <b>JSON_BIGINT_AS_STRING</b>
3095
   *                        is supported (default is to cast large integers as floats)
3096
   *                        </p>
3097
   *
3098
   * @return mixed the value encoded in <i>json</i> in appropriate
3099
   * PHP type. Values true, false and
3100
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3101
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3102
   * <i>json</i> cannot be decoded or if the encoded
3103
   * data is deeper than the recursion limit.
3104
   */
3105
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3106
  {
3107
    $json = self::filter($json);
3108
3109
    if (Bootup::is_php('5.4') === true) {
3110
      $json = json_decode($json, $assoc, $depth, $options);
3111
    } else {
3112
      $json = json_decode($json, $assoc, $depth);
3113
    }
3114
3115
    return $json;
3116 1
  }
3117
3118 1
  /**
3119
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3120
   * Returns the JSON representation of a value
3121
   *
3122
   * @link http://php.net/manual/en/function.json-encode.php
3123
   *
3124
   * @param mixed $value   <p>
3125
   *                       The <i>value</i> being encoded. Can be any type except
3126
   *                       a resource.
3127
   *                       </p>
3128
   *                       <p>
3129
   *                       All string data must be UTF-8 encoded.
3130
   *                       </p>
3131
   *                       <p>PHP implements a superset of
3132
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3133
   *                       only supports these values when they are nested inside an array or an object.
3134
   *                       </p>
3135
   * @param int   $options [optional] <p>
3136
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3137
   *                       <b>JSON_HEX_TAG</b>,
3138
   *                       <b>JSON_HEX_AMP</b>,
3139
   *                       <b>JSON_HEX_APOS</b>,
3140
   *                       <b>JSON_NUMERIC_CHECK</b>,
3141
   *                       <b>JSON_PRETTY_PRINT</b>,
3142
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3143
   *                       <b>JSON_FORCE_OBJECT</b>,
3144
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3145
   *                       constants is described on
3146
   *                       the JSON constants page.
3147
   *                       </p>
3148
   * @param int   $depth   [optional] <p>
3149
   *                       Set the maximum depth. Must be greater than zero.
3150
   *                       </p>
3151
   *
3152
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3153
   */
3154
  public static function json_encode($value, $options = 0, $depth = 512)
3155
  {
3156
    $value = self::filter($value);
3157
3158
    if (Bootup::is_php('5.5')) {
3159
      $json = json_encode($value, $options, $depth);
3160
    } else {
3161
      $json = json_encode($value, $options);
3162
    }
3163
3164
    return $json;
3165 4
  }
3166
3167 4
  /**
3168
   * Makes string's first char lowercase.
3169
   *
3170
   * @param    string $str The input string
3171 4
   *
3172 4
   * @return   string The resulting string
3173 4
   */
3174
  public static function lcfirst($str)
3175 4
  {
3176 4
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3177 4
  }
3178 4
3179
  /**
3180 4
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3181
   *
3182
   * WARNING: This is much slower then "ltrim()" !!!!
3183
   *
3184 4
   * @param    string $str   The string to be trimmed
3185
   * @param    string $chars Optional characters to be stripped
3186 4
   *
3187
   * @return   string The string with unwanted characters stripped from the left
3188
   */
3189 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3190
  {
3191 4
    $str = (string)$str;
3192 4
3193
    if (!isset($str[0])) {
3194 4
      return '';
3195 4
    }
3196 4
3197 4
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3198 4
3199
    return preg_replace("/^{$chars}+/u", '', $str);
3200 4
  }
3201 4
3202 4
  /**
3203 4
   * Returns the UTF-8 character with the maximum code point in the given data.
3204
   *
3205 4
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3206 3
   *
3207 3
   * @return   string The character with the highest code point than others.
3208 3
   */
3209 3 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3210
  {
3211 3
    if (is_array($arg)) {
3212
      $arg = implode($arg);
3213
    }
3214
3215 3
    return self::chr(max(self::codepoints($arg)));
3216 3
  }
3217
3218 4
  /**
3219
   * Calculates and returns the maximum number of bytes taken by any
3220
   * UTF-8 encoded character in the given string.
3221
   *
3222
   * @param    string $str The original Unicode string.
3223
   *
3224
   * @return   int An array of byte lengths of each character.
3225
   */
3226
  public static function max_chr_width($str)
3227
  {
3228
    $bytes = self::chr_size_list($str);
3229
    if (count($bytes) > 0) {
3230
      return (int)max($bytes);
3231
    } else {
3232
      return 0;
3233
    }
3234
  }
3235
3236
  /**
3237
   * checks whether mbstring is available on the server
3238
   *
3239
   * @return   bool True if available, False otherwise
3240
   */
3241
  public static function mbstring_loaded()
3242
  {
3243 11
    $return = extension_loaded('mbstring');
3244
3245 11
    if ($return === true) {
3246 11
      mb_internal_encoding('UTF-8');
3247
    }
3248 11
3249 2
    return $return;
3250
  }
3251
3252
  /**
3253 10
   * Returns the UTF-8 character with the minimum code point in the given data.
3254 10
   *
3255
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3256
   *
3257
   * @return   string The character with the lowest code point than others.
3258 10
   */
3259 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3260
  {
3261
    if (is_array($arg)) {
3262 10
      $arg = implode($arg);
3263
    }
3264
3265
    return self::chr(min(self::codepoints($arg)));
3266 1
  }
3267 1
3268 1
  /**
3269
   * Normalize the encoding-name input.
3270 10
   *
3271
   * @param string $encodingLabel e.g.: ISO, UTF8, WINDOWS-1251 etc.
3272
   *
3273 10
   * @return string e.g.: ISO-8859-1, UTF-8, ISO-8859-5 etc.
3274 1
   */
3275 1
  public static function normalizeEncoding($encodingLabel)
3276
  {
3277 10
    $encoding = strtoupper($encodingLabel);
3278
3279
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3280
3281
    $equivalences = array(
3282
        'ISO88591'    => 'ISO-8859-1',
3283
        'ISO8859'     => 'ISO-8859-1',
3284
        'ISO'         => 'ISO-8859-1',
3285
        'LATIN1'      => 'ISO-8859-1',
3286
        'LATIN'       => 'ISO-8859-1',
3287
        'UTF16'      => 'UTF-16',
3288
        'UTF32'      => 'UTF-32',
3289
        'UTF8'        => 'UTF-8',
3290
        'UTF'         => 'UTF-8',
3291
        'UTF7'        => 'UTF-7',
3292
        'WIN1252'     => 'ISO-8859-1',
3293
        'WINDOWS1252' => 'ISO-8859-1',
3294
        'WINDOWS1251' => 'ISO-8859-5',
3295
    );
3296
3297
    if (empty($equivalences[$encoding])) {
3298
      return $encodingLabel;
3299
    }
3300
3301
    return $equivalences[$encoding];
3302
  }
3303
3304
  /**
3305
   * Normalize MS Word special characters.
3306 8
   *
3307
   * @param string $str The string to be normalized.
3308 8
   *
3309
   * @return string
3310 8
   */
3311
  public static function normalize_msword($str)
3312
  {
3313
    static $utf8MSWordKeys = null;
3314
    static $utf8MSWordValues = null;
3315
3316
    if ($utf8MSWordKeys === null) {
3317 8
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3318
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3319
    }
3320
3321
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3322
  }
3323
3324
  /**
3325
   * Normalize the whitespace.
3326
   *
3327
   * @param string $str                  The string to be normalized.
3328
   * @param bool   $keepNonBreakingSpace Set to true, to keep non-breaking-spaces.
3329
   *
3330
   * @return string
3331
   */
3332
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false)
3333
  {
3334
    static $whitespaces = array();
3335
3336
    if (!isset($whitespaces[(int)$keepNonBreakingSpace])) {
3337
3338
      $whitespaces[(int)$keepNonBreakingSpace] = self::$whitespaceTable;
3339
3340
      if ($keepNonBreakingSpace === true) {
3341
        /** @noinspection OffsetOperationsInspection */
3342
        unset($whitespaces[(int)$keepNonBreakingSpace]['NO-BREAK SPACE']);
3343
      }
3344
3345 4
      $whitespaces[(int)$keepNonBreakingSpace] = array_values($whitespaces[(int)$keepNonBreakingSpace]);
3346
    }
3347 4
3348
    return str_replace($whitespaces[(int)$keepNonBreakingSpace], ' ', $str);
3349
  }
3350
3351
  /**
3352
   * Format a number with grouped thousands.
3353
   *
3354
   * @param float  $number
3355
   * @param int    $decimals
3356
   * @param string $dec_point
3357 1
   * @param string $thousands_sep
3358
   *
3359 1
   * @return string
3360
   */
3361
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3362
  {
3363 1
    if (Bootup::is_php('5.4') === true) {
3364
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
3365
        return str_replace(
3366
            array(
3367
                '.',
3368
                ',',
3369
            ),
3370
            array(
3371
                $dec_point,
3372
                $thousands_sep,
3373
            ),
3374
            number_format($number, $decimals, '.', ',')
3375
        );
3376
      }
3377 3
    }
3378
3379 3
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3380 3
  }
3381 3
3382
  /**
3383 3
   * Calculates Unicode code point of the given UTF-8 encoded character.
3384
   *
3385 3
   * @param    string $s The character of which to calculate code point.
3386 3
   *
3387 3
   * @return   int Unicode code point of the given character,<br />
3388
   *           0 on invalid UTF-8 byte sequence.
3389 3
   */
3390
  public static function ord($s)
3391 3
  {
3392
    if (!$s) {
3393
      return 0;
3394
    }
3395
3396
    $s = unpack('C*', substr($s, 0, 4));
3397
    $a = $s ? $s[1] : 0;
3398
3399
    if (0xF0 <= $a && isset($s[4])) {
3400
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3401 3
    }
3402
3403
    if (0xE0 <= $a && isset($s[3])) {
3404
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3405
    }
3406
3407
    if (0xC0 <= $a && isset($s[2])) {
3408
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3409
    }
3410
3411 1
    return $a;
3412
  }
3413 1
3414
  /**
3415
   * Parses the string into variables.
3416
   *
3417 1
   * WARNING: This differs from parse_str() by returning the results
3418
   *    instead of placing them in the local scope!
3419
   *
3420
   * @link http://php.net/manual/en/function.parse-str.php
3421
   *
3422
   * @param string $str     <p>
3423
   *                        The input string.
3424
   *                        </p>
3425
   * @param array  $result  <p>
3426
   *                        If the second parameter arr is present,
3427
   *                        variables are stored in this variable as array elements instead.
3428
   *                        </p>
3429
   *
3430
   * @return void
3431
   */
3432
  public static function parse_str($str, &$result)
3433
  {
3434
    // init
3435
    self::checkForSupport();
3436
3437
    $str = self::filter($str);
3438
3439
    mb_parse_str($str, $result);
3440
  }
3441
3442
  /**
3443
   * checks if \u modifier is available that enables Unicode support in PCRE.
3444
   *
3445
   * @return   bool True if support is available, false otherwise
3446
   */
3447
  public static function pcre_utf8_support()
3448
  {
3449
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3450
    return (bool)@preg_match('//u', '');
3451
  }
3452
3453
  /**
3454
   * Create an array containing a range of UTF-8 characters.
3455
   *
3456
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3457
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3458
   *
3459
   * @return   array
3460
   */
3461 1
  public static function range($var1, $var2)
3462
  {
3463 1
    if (!$var1 || !$var2) {
3464
      return array();
3465 1
    }
3466
3467 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3468
      $start = (int)$var1;
3469
    } elseif (ctype_xdigit($var1)) {
3470 1
      $start = (int)self::hex_to_int($var1);
3471 1
    } else {
3472
      $start = self::ord($var1);
3473 1
    }
3474 1
3475 1
    if (!$start) {
3476
      return array();
3477 1
    }
3478
3479 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3480
      $end = (int)$var2;
3481
    } elseif (ctype_xdigit($var2)) {
3482
      $end = (int)self::hex_to_int($var2);
3483
    } else {
3484
      $end = self::ord($var2);
3485
    }
3486
3487
    if (!$end) {
3488
      return array();
3489 1
    }
3490
3491 1
    return array_map(
3492 1
        array(
3493 1
            '\\voku\\helper\\UTF8',
3494 1
            'chr',
3495
        ),
3496 1
        range($start, $end)
3497
    );
3498
  }
3499
3500 1
  /**
3501
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3502
   *
3503
   * @param string $str
3504
   *
3505
   * @return string
3506
   */
3507
  public static function removeBOM($str = '')
3508
  {
3509 1
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3510
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3511
    /** @noinspection SubStrUsedAsStrPosInspection */
3512 1 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3513
      $str = substr($str, 4);
3514
    }
3515
3516
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3517
3518
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3519
    /** @noinspection SubStrUsedAsStrPosInspection */
3520 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3521
      $str = substr($str, 4);
3522
    }
3523
3524
    // UTF-8
3525
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3526
    /** @noinspection SubStrUsedAsStrPosInspection */
3527 View Code Duplication
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3528 26
      $str = substr($str, 3);
3529
    }
3530 26
3531
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3532 26
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3533 5
    /** @noinspection SubStrUsedAsStrPosInspection */
3534 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3535
      $str = substr($str, 2);
3536
    }
3537 22
3538 6
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3539
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3540
    /** @noinspection SubStrUsedAsStrPosInspection */
3541 16 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3542
      $str = substr($str, 2);
3543
    }
3544
3545
    return $str;
3546
  }
3547
3548
  /**
3549
   * Removes duplicate occurrences of a string in another string.
3550
   *
3551
   * @param    string       $str  The base string
3552
   * @param    string|array $what String to search for in the base string
3553
   *
3554 22
   * @return   string The result string with removed duplicates
3555
   */
3556 22
  public static function remove_duplicates($str, $what = ' ')
3557
  {
3558 22
    if (is_string($what)) {
3559 5
      $what = array($what);
3560
    }
3561
3562 18
    if (is_array($what)) {
3563
      foreach ($what as $item) {
3564 18
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3565
      }
3566
    }
3567
3568
    return $str;
3569
  }
3570
3571
  /**
3572
   * Remove Invisible Characters
3573
   *
3574
   * This prevents sandwiching null characters
3575
   * between ascii characters, like Java\0script.
3576
   *
3577 24
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3578
   *
3579 24
   * @param  string $str
3580
   * @param  bool   $url_encoded
3581 24
   *
3582 2
   * @return  string
3583
   */
3584
  public static function remove_invisible_characters($str, $url_encoded = true)
3585 23
  {
3586
    // init
3587 23
    $non_displayables = array();
3588
3589
    // every control character except newline (dec 10),
3590
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3591
    if ($url_encoded) {
3592
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3593
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3594
    }
3595
3596
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3597
3598
    do {
3599
      $str = preg_replace($non_displayables, '', $str, -1, $count);
3600
    } while ($count !== 0);
3601
3602 6
    return $str;
3603
  }
3604
3605 6
  /**
3606 1
   * replace diamond question mark (�)
3607
   *
3608
   * @param string $str
3609 1
   * @param string $unknown
3610
   *
3611
   * @return string
3612 1
   */
3613
  public static function replace_diamond_question_mark($str, $unknown = '?')
3614
  {
3615
    return str_replace(
3616 1
        array(
3617
            "\xEF\xBF\xBD",
3618
            '�',
3619
        ),
3620
        array(
3621
            $unknown,
3622 1
            $unknown,
3623
        ),
3624
        $str
3625
    );
3626 1
  }
3627 1
3628 1
  /**
3629
   * Strip whitespace or other characters from end of a UTF-8 string.
3630
   *
3631
   * WARNING: This is much slower then "rtrim()" !!!!
3632
   *
3633
   * @param    string $str   The string to be trimmed
3634
   * @param    string $chars Optional characters to be stripped
3635
   *
3636
   * @return   string The string with unwanted characters stripped from the right
3637
   */
3638 1 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3639
  {
3640
    $str = (string)$str;
3641
3642 1
    if (!isset($str[0])) {
3643
      return '';
3644 6
    }
3645 1
3646 1
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3647 1
3648 1
    return preg_replace("/{$chars}+$/u", '', $str);
3649
  }
3650 1
3651
  /**
3652
   * rxClass
3653 6
   *
3654 6
   * @param string $s
3655
   * @param string $class
3656 6
   *
3657 4
   * @return string
3658
   */
3659 4
  protected static function rxClass($s, $class = '')
3660 4
  {
3661
    static $rxClassCache = array();
3662 6
3663
    $cacheKey = $s . $class;
3664 6
3665
    if (isset($rxClassCache[$cacheKey])) {
3666
      return $rxClassCache[$cacheKey];
3667
    }
3668
3669
    $class = array($class);
3670
3671
    foreach (self::str_split($s) as $s) {
3672
      if ('-' === $s) {
3673
        $class[0] = '-' . $class[0];
3674
      } elseif (!isset($s[2])) {
3675
        $class[0] .= preg_quote($s, '/');
3676
      } elseif (1 === self::strlen($s)) {
3677
        $class[0] .= $s;
3678
      } else {
3679
        $class[] = $s;
3680
      }
3681
    }
3682
3683
    $class[0] = '[' . $class[0] . ']';
3684
3685
    if (1 === count($class)) {
3686
      $return = $class[0];
3687
    } else {
3688
      $return = '(?:' . implode('|', $class) . ')';
3689
    }
3690
3691
    $rxClassCache[$cacheKey] = $return;
3692
3693
    return $return;
3694
  }
3695
3696
  /**
3697
   * Echo native UTF8-Support libs, e.g. for debugging.
3698
   */
3699
  public static function showSupport()
3700
  {
3701
    foreach (self::$support as $utf8Support) {
3702
      echo $utf8Support . "\n<br>";
3703
    }
3704
  }
3705
3706
  /**
3707
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3708
   *
3709
   * @param    string $chr The Unicode character to be encoded as numbered entity.
3710
   *
3711
   * @return   string The HTML numbered entity.
3712
   */
3713 1
  public static function single_chr_html_encode($chr)
3714
  {
3715 1
    if (!$chr) {
3716
      return '';
3717
    }
3718
3719
    return '&#' . self::ord($chr) . ';';
3720
  }
3721
3722
  /**
3723
   * Convert a string to an array of Unicode characters.
3724
   *
3725 1
   * @param    string  $str       The string to split into array.
3726
   * @param    int     $length    Max character length of each array element.
3727 1
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
3728
   *
3729
   * @return   array An array containing chunks of the string.
3730
   */
3731
  public static function split($str, $length = 1, $cleanUtf8 = false)
3732
  {
3733
    $str = (string)$str;
3734
3735
    if (!isset($str[0])) {
3736
      return array();
3737
    }
3738 1
3739
    // init
3740 1
    self::checkForSupport();
3741 1
    $str = (string)$str;
3742
    $ret = array();
3743
3744 1
    if (self::$support['pcre_utf8'] === true) {
3745
3746 1
      if ($cleanUtf8 === true) {
3747
        $str = self::clean($str);
3748
      }
3749 1
3750
      preg_match_all('/./us', $str, $retArray);
3751
      if (isset($retArray[0])) {
3752 1
        $ret = $retArray[0];
3753
      }
3754
      unset($retArray);
3755
3756 1
    } else {
3757
3758 1
      // fallback
3759
3760
      $len = strlen($str);
3761 1
3762
      /** @noinspection ForeachInvariantsInspection */
3763
      for ($i = 0; $i < $len; $i++) {
3764 1
        if (($str[$i] & "\x80") === "\x00") {
3765
          $ret[] = $str[$i];
3766
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3767
          if (($str[$i + 1] & "\xC0") === "\x80") {
3768 1
            $ret[] = $str[$i] . $str[$i + 1];
3769
3770 1
            $i++;
3771 1
          }
3772 1 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3773 1
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3774 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3775
3776
            $i += 2;
3777
          }
3778
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3779 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3780
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3781
3782
            $i += 3;
3783
          }
3784 1
        }
3785
      }
3786 1
    }
3787 1
3788
    if ($length > 1) {
3789 1
      $ret = array_chunk($ret, $length);
3790 1
3791
      $ret = array_map('implode', $ret);
3792
    }
3793
3794 1
    if (isset($ret[0]) && $ret[0] === '') {
3795
      return array();
3796 1
    }
3797 1
3798 1
    return $ret;
3799
  }
3800 1
3801 1
  /**
3802 1
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3803 1
   *
3804 1
   * @param string $str
3805
   *
3806 1
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3807
   *                      otherwise it will return false.
3808 1
   */
3809 1
  public static function str_detect_encoding($str)
3810
  {
3811
    // init
3812
    $encoding = '';
3813 1
3814 1
    // UTF-8
3815
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3816 1
    /** @noinspection SubStrUsedAsStrPosInspection */
3817
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
3818 1
      return 'UTF-8';
3819 1
    }
3820 1
3821
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3822 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3823
    /** @noinspection SubStrUsedAsStrPosInspection */
3824
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
3825
      return 'UTF-16BE';
3826
    }
3827
3828
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3829
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3830
    /** @noinspection SubStrUsedAsStrPosInspection */
3831
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
3832
      return 'UTF-16LE';
3833
    }
3834
3835 1
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3836
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3837 1
    /** @noinspection SubStrUsedAsStrPosInspection */
3838
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
3839
      return 'UTF-32BE';
3840
    }
3841
3842
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3843
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3844
    /** @noinspection SubStrUsedAsStrPosInspection */
3845
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
3846
      return 'UTF32LE';
3847
    }
3848
3849 1
    if (!$encoding) {
3850
      self::checkForSupport();
3851 1
3852
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
3853 1
      $detectOrder = array(
3854
          'UTF-8',
3855
          'windows-1251',
3856
          'windows-1252',
3857
          'ISO-8859-1',
3858
      );
3859
      $encoding = mb_detect_encoding($str, $detectOrder, true);
3860
    }
3861
3862
    if (self::is_binary($str)) {
3863
      if (self::is_utf16($str) == 1) {
3864 1
        return 'UTF-16LE';
3865
      } elseif (self::is_utf16($str) == 2) {
3866
        return 'UTF-16BE';
3867
      } elseif (self::is_utf32($str) == 1) {
3868 1
        return 'UTF-32LE';
3869
      } elseif (self::is_utf32($str) == 2) {
3870
        return 'UTF-32BE';
3871
      }
3872
    }
3873
3874
    if (!$encoding) {
3875
      $encoding = false;
3876
    }
3877
3878
    return $encoding;
3879
  }
3880 1
3881
  /**
3882 1
   * str_ireplace
3883
   *
3884 1
   * @param string $search
3885 1
   * @param string $replace
3886 1
   * @param string $subject
3887
   * @param null   $count
3888 1
   *
3889 1
   * @return string
3890 1
   */
3891 1
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3892
  {
3893
    $search = (array)$search;
3894 1
3895
    /** @noinspection AlterInForeachInspection */
3896
    foreach ($search as &$s) {
3897
      if ('' === $s .= '') {
3898
        $s = '/^(?<=.)$/';
3899
      } else {
3900
        $s = '/' . preg_quote($s, '/') . '/ui';
3901
      }
3902
    }
3903
3904 2
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3905
    $count = $replace;
3906 2
3907 2
    return $subject;
3908
  }
3909 2
3910 2
  /**
3911 2
   * Limit the number of characters in a string, but also after the next word.
3912
   *
3913 2
   * @param  string $str
3914 2
   * @param  int    $length
3915
   * @param  string $strAddOn
3916
   *
3917
   * @return string
3918
   */
3919
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
3920
  {
3921
    if (!isset($str[0])) {
3922
      return '';
3923
    }
3924
3925
    $length = (int)$length;
3926
3927
    if (self::strlen($str) <= $length) {
3928
      return $str;
3929
    }
3930
3931
    if (self::substr($str, $length - 1, 1) === ' ') {
3932
      return self::substr($str, 0, $length - 1) . $strAddOn;
3933
    }
3934
3935
    $str = self::substr($str, 0, $length);
3936 2
    $array = explode(' ', $str);
3937
    array_pop($array);
3938
    $new_str = implode(' ', $array);
3939 2
3940
    if ($new_str == '') {
3941 2
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
3942
    } else {
3943
      $str = $new_str . $strAddOn;
3944
    }
3945
3946
    return $str;
3947
  }
3948
3949
  /**
3950
   * Pad a UTF-8 string to given length with another string.
3951
   *
3952
   * @param    string $input      The input string
3953
   * @param    int    $pad_length The length of return string
3954 2
   * @param    string $pad_string String to use for padding the input string
3955
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
3956 2
   *
3957
   * @return   string Returns the padded string
3958 2
   */
3959 2
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
3960
  {
3961 2
    $input_length = self::strlen($input);
3962
3963
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
3964 2
      $ps_length = self::strlen($pad_string);
3965 2
3966 2
      $diff = $pad_length - $input_length;
3967 2
3968 2
      switch ($pad_type) {
3969 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3970 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3971 2
          $pre = self::substr($pre, 0, $diff);
3972 2
          $post = '';
3973 2
          break;
3974 2
3975 2
        case STR_PAD_BOTH:
3976
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3977 2
          $pre = self::substr($pre, 0, (int)$diff / 2);
3978 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3979 2
          $post = self::substr($post, 0, (int)ceil($diff / 2));
3980 2
          break;
3981 2
3982 2
        case STR_PAD_RIGHT:
3983 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3984 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3985
          $post = self::substr($post, 0, $diff);
3986
          $pre = '';
3987 2
      }
3988
3989
      return $pre . $input . $post;
3990
    }
3991
3992
    return $input;
3993
  }
3994
3995
  /**
3996
   * Repeat a string.
3997
   *
3998
   * @param string $input      <p>
3999
   *                           The string to be repeated.
4000
   *                           </p>
4001
   * @param int    $multiplier <p>
4002
   *                           Number of time the input string should be
4003
   *                           repeated.
4004
   *                           </p>
4005
   *                           <p>
4006
   *                           multiplier has to be greater than or equal to 0.
4007
   *                           If the multiplier is set to 0, the function
4008 1
   *                           will return an empty string.
4009
   *                           </p>
4010 1
   *
4011
   * @return string the repeated string.
4012 1
   */
4013
  public static function str_repeat($input, $multiplier)
4014
  {
4015
    $input = self::filter($input);
4016
4017
    return str_repeat($input, $multiplier);
4018
  }
4019
4020
  /**
4021
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4022
   *
4023 1
   * (PHP 4, PHP 5)<br/>
4024
   * Replace all occurrences of the search string with the replacement string
4025 1
   *
4026 1
   * @link http://php.net/manual/en/function.str-replace.php
4027 1
   *
4028
   * @param mixed $search  <p>
4029 1
   *                       The value being searched for, otherwise known as the needle.
4030 1
   *                       An array may be used to designate multiple needles.
4031 1
   *                       </p>
4032 1
   * @param mixed $replace <p>
4033 1
   *                       The replacement value that replaces found search
4034
   *                       values. An array may be used to designate multiple replacements.
4035 1
   *                       </p>
4036
   * @param mixed $subject <p>
4037
   *                       The string or array being searched and replaced on,
4038
   *                       otherwise known as the haystack.
4039
   *                       </p>
4040
   *                       <p>
4041
   *                       If subject is an array, then the search and
4042
   *                       replace is performed with every entry of
4043
   *                       subject, and the return value is an array as
4044
   *                       well.
4045
   *                       </p>
4046
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4047
   *
4048
   * @return mixed This function returns a string or an array with the replaced values.
4049
   */
4050
  public static function str_replace($search, $replace, $subject, &$count = null)
4051
  {
4052
    return str_replace($search, $replace, $subject, $count);
4053
  }
4054
4055
  /**
4056
   * Shuffles all the characters in the string.
4057
   *
4058
   * @param    string $str The input string
4059
   *
4060
   * @return   string The shuffled string.
4061 8
   */
4062
  public static function str_shuffle($str)
4063 8
  {
4064 8
    $array = self::split($str);
4065
4066 8
    shuffle($array);
4067 2
4068
    return implode('', $array);
4069
  }
4070
4071 7
  /**
4072
   * Sort all characters according to code points.
4073 7
   *
4074 1
   * @param    string $str    A UTF-8 string.
4075 1
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4076 1
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4077
   *
4078
   * @return   string String of sorted characters
4079 7
   */
4080 1
  public static function str_sort($str, $unique = false, $desc = false)
4081 1
  {
4082
    $array = self::codepoints($str);
4083 7
4084
    if ($unique) {
4085
      $array = array_flip(array_flip($array));
4086
    }
4087
4088
    if ($desc) {
4089
      arsort($array);
4090
    } else {
4091
      asort($array);
4092
    }
4093 1
4094
    return self::string($array);
4095 1
  }
4096
4097
  /**
4098
   * Convert a string to an array.
4099
   *
4100
   * @param string $str
4101
   * @param int    $len
4102
   *
4103
   * @return array
4104
   */
4105 1
  public static function str_split($str, $len = 1)
4106 1
  {
4107 1
    // init
4108 1
    self::checkForSupport();
4109 1
4110
    if (1 > $len = (int)$len) {
4111 1
      $len = func_get_arg(1);
4112
4113
      return str_split($str, $len);
4114
    }
4115
4116
    if (self::$support['intl'] === true) {
4117
      $a = array();
4118
      $p = 0;
4119
      $l = strlen($str);
4120
      while ($p < $l) {
4121
        $a[] = grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4122
      }
4123
    } else {
4124
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4125
      $a = $a[0];
4126
    }
4127
4128
    if (1 == $len) {
4129
      return $a;
4130
    }
4131
4132
    $arrayOutput = array();
4133 13
    $p = -1;
4134
4135 13
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4136
    foreach ($a as $l => $a) {
4137
      if ($l % $len) {
4138
        $arrayOutput[$p] .= $a;
4139
      } else {
4140
        $arrayOutput[++$p] = $a;
4141
      }
4142
    }
4143
4144
    return $arrayOutput;
4145
  }
4146
4147
  /**
4148
   * Get a binary representation of a specific character.
4149
   *
4150 14
   * @param   string $str The input character.
4151
   *
4152 14
   * @return  string
4153
   */
4154 14
  public static function str_to_binary($str)
4155 4
  {
4156
    $str = (string)$str;
4157
4158
    if (!isset($str[0])) {
4159 13
      return '';
4160
    }
4161 13
4162 13
    // init
4163
    $out = null;
4164
    $max = strlen($str);
4165
4166
    /** @noinspection ForeachInvariantsInspection */
4167
    for ($i = 0; $i < $max; ++$i) {
4168
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4169
    }
4170
4171
    return $out;
4172
  }
4173
4174
  /**
4175
   * US-ASCII transliterations of Unicode text.
4176
   *
4177
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4178
   * Warning: you should only pass this well formed UTF-8!
4179
   * Be aware it works by making a copy of the input string which it appends transliterated
4180
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4181
   * requiring up to the same amount again as the input string
4182
   *
4183
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4184
   *
4185
   * @author <[email protected]>
4186
   *
4187
   * @param string $str     UTF-8 string to convert
4188
   * @param string $unknown Character use if character unknown. (default is ?)
4189
   *
4190
   * @return string US-ASCII string
4191
   */
4192
  public static function str_transliterate($str, $unknown = '?')
4193
  {
4194
    static $UTF8_TO_ASCII;
4195
4196
    $str = (string)$str;
4197
4198
    if (!isset($str[0])) {
4199
      return '';
4200
    }
4201
4202
    $str = self::clean($str);
4203
4204
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4205
    $chars = $ar[0];
4206
    foreach ($chars as &$c) {
4207
4208
      $ordC0 = ord($c[0]);
4209
4210
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4211
        continue;
4212
      }
4213
4214
      $ordC1 = ord($c[1]);
4215
4216
      // ASCII - next please
4217
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4218
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4219
      }
4220
4221
      if ($ordC0 >= 224) {
4222
        $ordC2 = ord($c[2]);
4223
4224
        if ($ordC0 <= 239) {
4225
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4226
        }
4227
4228
        if ($ordC0 >= 240) {
4229
          $ordC3 = ord($c[3]);
4230
4231
          if ($ordC0 <= 247) {
4232
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4233
          }
4234
4235
          if ($ordC0 >= 248) {
4236
            $ordC4 = ord($c[4]);
4237
4238 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4239
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4240
            }
4241
4242
            if ($ordC0 >= 252) {
4243
              $ordC5 = ord($c[5]);
4244
4245 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4246
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4247
              }
4248
            }
4249
          }
4250
        }
4251
      }
4252
4253
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4254
        $c = $unknown;
4255
        continue;
4256
      }
4257
4258
      if (!isset($ord)) {
4259
        $c = $unknown;
4260
        continue;
4261
      }
4262
4263
      $bank = $ord >> 8;
4264
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4265
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4266
        if (file_exists($bankfile)) {
4267
          /** @noinspection PhpIncludeInspection */
4268
          include $bankfile;
4269
        } else {
4270
          $UTF8_TO_ASCII[$bank] = array();
4271
        }
4272
      }
4273
4274
      $newchar = $ord & 255;
4275
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4276
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4277
      } else {
4278
        $c = $unknown;
4279
      }
4280
    }
4281
4282
    return implode('', $chars);
4283
  }
4284
4285
  /**
4286
   * Counts number of words in the UTF-8 string.
4287
   *
4288
   * @param string $s The input string.
4289
   * @param int    $format
4290
   * @param string $charlist
4291
   *
4292
   * @return array|float|string The number of words in the string
4293
   */
4294
  public static function str_word_count($s, $format = 0, $charlist = '')
4295
  {
4296
    $charlist = self::rxClass($charlist, '\pL');
4297
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
4298
    $charlist = array();
4299
    $len = count($s);
4300
4301
    if (1 == $format) {
4302
      for ($i = 1; $i < $len; $i += 2) {
4303
        $charlist[] = $s[$i];
4304
      }
4305
    } elseif (2 == $format) {
4306
      self::checkForSupport();
4307
4308
      $offset = self::strlen($s[0]);
4309
      for ($i = 1; $i < $len; $i += 2) {
4310
        $charlist[$offset] = $s[$i];
4311
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
4312
      }
4313
    } else {
4314
      $charlist = ($len - 1) / 2;
4315
    }
4316
4317
    return $charlist;
4318
  }
4319
4320
  /**
4321
   * Case-insensitive string comparison.
4322
   *
4323
   * @param string $str1
4324
   * @param string $str2
4325
   *
4326
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4327
   */
4328
  public static function strcasecmp($str1, $str2)
4329
  {
4330
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4331
  }
4332
4333
  /**
4334
   * String comparison.
4335
   *
4336
   * @param string $str1
4337
   * @param string $str2
4338
   *
4339
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4340
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4341
   *              <strong>0</strong> if they are equal.
4342
   */
4343
  public static function strcmp($str1, $str2)
4344
  {
4345
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4346
        Normalizer::normalize($str1, Normalizer::NFD),
4347
        Normalizer::normalize($str2, Normalizer::NFD)
4348
    );
4349
  }
4350
4351
  /**
4352
   * Find length of initial segment not matching mask.
4353
   *
4354
   * @param string $str
4355
   * @param string $charlist
4356
   * @param int    $start
4357
   * @param int    $len
4358
   *
4359
   * @return int|null
4360
   */
4361
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
4362
  {
4363
    if ('' === $charlist .= '') {
4364
      return null;
4365
    }
4366
4367
    if ($start || 2147483647 != $len) {
4368
      $str = (string)self::substr($str, $start, $len);
4369
    } else {
4370
      $str = (string)$str;
4371
    }
4372
4373
    /* @var $len array */
4374
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
4375
      return self::strlen($len[1]);
4376
    } else {
4377
      return self::strlen($str);
4378
    }
4379
  }
4380
4381
  /**
4382
   * Makes a UTF-8 string from code points.
4383
   *
4384
   * @param    array $array Integer or Hexadecimal codepoints
4385
   *
4386
   * @return   string UTF-8 encoded string
4387
   */
4388
  public static function string($array)
4389
  {
4390
    return implode(
4391
        array_map(
4392
            array(
4393
                '\\voku\\helper\\UTF8',
4394
                'chr',
4395
            ),
4396
            $array
4397
        )
4398
    );
4399
  }
4400
4401
  /**
4402
   * Checks if string starts with "UTF-8 BOM" character.
4403
   *
4404
   * @param    string $str The input string.
4405
   *
4406
   * @return   bool True if the string has BOM at the start, False otherwise.
4407
   */
4408
  public static function string_has_bom($str)
4409
  {
4410
    return self::is_bom(substr($str, 0, 3));
4411
  }
4412
4413
  /**
4414
   * Strip HTML and PHP tags from a string.
4415
   *
4416
   * @link http://php.net/manual/en/function.strip-tags.php
4417
   *
4418
   * @param string $str            <p>
4419
   *                               The input string.
4420
   *                               </p>
4421
   * @param string $allowable_tags [optional] <p>
4422
   *                               You can use the optional second parameter to specify tags which should
4423
   *                               not be stripped.
4424
   *                               </p>
4425
   *                               <p>
4426
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4427
   *                               can not be changed with allowable_tags.
4428
   *                               </p>
4429
   *
4430
   * @return string the stripped string.
4431
   */
4432
  public static function strip_tags($str, $allowable_tags = null)
4433
  {
4434
    //clean broken utf8
4435
    $str = self::clean($str);
4436
4437
    return strip_tags($str, $allowable_tags);
4438
  }
4439
4440
  /**
4441
   * Finds position of first occurrence of a string within another, case insensitive.
4442
   *
4443
   * @link http://php.net/manual/en/function.mb-stripos.php
4444
   *
4445
   * @param string  $haystack  <p>
4446
   *                           The string from which to get the position of the first occurrence
4447
   *                           of needle
4448
   *                           </p>
4449
   * @param string  $needle    <p>
4450
   *                           The string to find in haystack
4451
   *                           </p>
4452
   * @param int     $offset    [optional] <p>
4453
   *                           The position in haystack
4454
   *                           to start searching
4455
   *                           </p>
4456
   * @param string  $encoding
4457
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4458
   *
4459
   * @return int Return the numeric position of the first occurrence of
4460
   * needle in the haystack
4461
   * string, or false if needle is not found.
4462
   */
4463
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4464
  {
4465
    $haystack = (string)$haystack;
4466
    $needle = (string)$needle;
4467
4468
    if (!isset($haystack[0]) || !isset($needle[0])) {
4469
      return false;
4470
    }
4471
4472
    // init
4473
    self::checkForSupport();
4474
4475
    if ($cleanUtf8 === true) {
4476
      $haystack = self::clean($haystack);
4477
      $needle = self::clean($needle);
4478
    }
4479
4480
    // INFO: this is only a fallback for old versions
4481
    if ($encoding === true || $encoding === false) {
4482
      $encoding = 'UTF-8';
4483
    }
4484
4485
    return mb_stripos($haystack, $needle, $offset, $encoding);
4486
  }
4487
4488
  /**
4489
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4490
   *
4491
   * @param string $str
4492
   * @param string $needle
4493
   * @param bool   $before_needle
4494
   *
4495
   * @return false|string
4496
   */
4497
  public static function stristr($str, $needle, $before_needle = false)
4498
  {
4499
    if ('' === $needle .= '') {
4500
      return false;
4501
    }
4502
4503
    // init
4504
    self::checkForSupport();
4505
4506
    return mb_stristr($str, $needle, $before_needle, 'UTF-8');
4507
  }
4508
4509
  /**
4510
   * Get the string length, not the byte-length!
4511
   *
4512
   * @link     http://php.net/manual/en/function.mb-strlen.php
4513
   *
4514
   * @param string  $str       The string being checked for length.
4515
   * @param string  $encoding  Set the charset for e.g. "mb_" function
4516
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4517
   *
4518
   * @return int the number of characters in
4519
   *           string str having character encoding
4520
   *           encoding. A multi-byte character is
4521
   *           counted as 1.
4522
   */
4523
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4524
  {
4525
    $str = (string)$str;
4526
4527
    if (!isset($str[0])) {
4528
      return 0;
4529
    }
4530
4531
    // init
4532
    self::checkForSupport();
4533
4534
    // INFO: this is only a fallback for old versions
4535
    if ($encoding === true || $encoding === false) {
4536
      $encoding = 'UTF-8';
4537
    }
4538
4539
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
4540
      $str = self::clean($str);
4541
    }
4542
4543
    return mb_strlen($str, $encoding);
4544
  }
4545
4546
  /**
4547
   * Case insensitive string comparisons using a "natural order" algorithm.
4548
   *
4549
   * @param string $str1
4550
   * @param string $str2
4551
   *
4552
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
4553
   *             str1 is greater than str2, and 0 if they are equal.
4554
   */
4555
  public static function strnatcasecmp($str1, $str2)
4556
  {
4557
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4558
  }
4559
4560
  /**
4561
   * String comparisons using a "natural order" algorithm.
4562
   *
4563
   * @param string $str1
4564
   * @param string $str2
4565
   *
4566
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
4567
   *             str1 is greater than str2, and 0 if they are equal.
4568
   */
4569
  public static function strnatcmp($str1, $str2)
4570
  {
4571
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4572
  }
4573
4574
  /**
4575
   * Case-insensitive string comparison of the first n characters.
4576
   *
4577
   * @param string $str1
4578
   * @param string $str2
4579
   * @param int    $len
4580
   *
4581
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4582
   */
4583
  public static function strncasecmp($str1, $str2, $len)
4584
  {
4585
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4586
  }
4587
4588
  /**
4589
   * Comparison of the first n characters.
4590
   *
4591
   * @param string $str1
4592
   * @param string $str2
4593
   * @param int    $len
4594
   *
4595
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4596
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4597
   *              <strong>0</strong> if they are equal
4598
   */
4599
  public static function strncmp($str1, $str2, $len)
4600
  {
4601
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
4602
  }
4603
4604
  /**
4605
   * Search a string for any of a set of characters.
4606
   *
4607
   * @param string $s
4608
   * @param string $charList
4609
   *
4610
   * @return string|false
4611
   */
4612
  public static function strpbrk($s, $charList)
4613
  {
4614
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
4615
      return substr($s, strpos($s, $m[0]));
4616
    } else {
4617
      return false;
4618
    }
4619
  }
4620
4621
  /**
4622
   * Find position of first occurrence of string in a string.
4623
   *
4624
   * @link http://php.net/manual/en/function.mb-strpos.php
4625
   *
4626
   * @param string  $haystack     <p>
4627
   *                              The string being checked.
4628
   *                              </p>
4629
   * @param string  $needle       <p>
4630
   *                              The position counted from the beginning of haystack.
4631
   *                              </p>
4632
   * @param int     $offset       [optional] <p>
4633
   *                              The search offset. If it is not specified, 0 is used.
4634
   *                              </p>
4635
   * @param string  $encoding
4636
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
4637
   *
4638
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
4639
   *             If needle is not found it returns false.
4640
   */
4641
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4642
  {
4643
    $haystack = (string)$haystack;
4644
    $needle = (string)$needle;
4645
4646
    if (!isset($haystack[0]) || !isset($needle[0])) {
4647
      return false;
4648
    }
4649
4650
    // init
4651
    self::checkForSupport();
4652
    $offset = (int)$offset;
4653
4654
    // iconv and mbstring do not support integer $needle
4655
4656
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4657
      $needle = self::chr($needle);
4658
    }
4659
4660
    if ($cleanUtf8 === true) {
4661
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
4662
      // iconv_strpos is not tolerant to invalid characters
4663
4664
      $needle = self::clean((string)$needle);
4665
      $haystack = self::clean($haystack);
4666
    }
4667
4668
    if (self::$support['mbstring'] === true) {
4669
4670
      // INFO: this is only a fallback for old versions
4671
      if ($encoding === true || $encoding === false) {
4672
        $encoding = 'UTF-8';
4673
      }
4674
4675
      return mb_strpos($haystack, $needle, $offset, $encoding);
4676
    }
4677
4678
    if (self::$support['iconv'] === true) {
4679
      return grapheme_strpos($haystack, $needle, $offset);
4680
    }
4681
4682
    if ($offset > 0) {
4683
      $haystack = self::substr($haystack, $offset);
4684
    }
4685
4686 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4687
      $left = substr($haystack, 0, $pos);
4688
4689
      // negative offset not supported in PHP strpos(), ignoring
4690
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4691
    }
4692
4693
    return false;
4694
  }
4695
4696
  /**
4697
   * Finds the last occurrence of a character in a string within another.
4698
   *
4699
   * @link http://php.net/manual/en/function.mb-strrchr.php
4700
   *
4701
   * @param string $haystack <p>
4702
   *                         The string from which to get the last occurrence
4703
   *                         of needle
4704
   *                         </p>
4705
   * @param string $needle   <p>
4706
   *                         The string to find in haystack
4707
   *                         </p>
4708
   * @param bool   $part     [optional] <p>
4709
   *                         Determines which portion of haystack
4710
   *                         this function returns.
4711
   *                         If set to true, it returns all of haystack
4712
   *                         from the beginning to the last occurrence of needle.
4713
   *                         If set to false, it returns all of haystack
4714
   *                         from the last occurrence of needle to the end,
4715
   *                         </p>
4716
   * @param string $encoding [optional] <p>
4717
   *                         Character encoding name to use.
4718
   *                         If it is omitted, internal character encoding is used.
4719
   *                         </p>
4720
   *
4721
   * @return string the portion of haystack.
4722
   * or false if needle is not found.
4723
   */
4724
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
4725
  {
4726
    self::checkForSupport();
4727
4728
    return mb_strrchr($haystack, $needle, $part, $encoding);
4729
  }
4730
4731
  /**
4732
   * Reverses characters order in the string.
4733
   *
4734
   * @param    string $str The input string
4735
   *
4736
   * @return   string The string with characters in the reverse sequence
4737
   */
4738
  public static function strrev($str)
4739
  {
4740
    return implode(array_reverse(self::split($str)));
4741
  }
4742
4743
  /**
4744
   * Finds the last occurrence of a character in a string within another, case insensitive.
4745
   *
4746
   * @link http://php.net/manual/en/function.mb-strrichr.php
4747
   *
4748
   * @param string $haystack <p>
4749
   *                         The string from which to get the last occurrence
4750
   *                         of needle
4751
   *                         </p>
4752
   * @param string $needle   <p>
4753
   *                         The string to find in haystack
4754
   *                         </p>
4755
   * @param bool   $part     [optional] <p>
4756
   *                         Determines which portion of haystack
4757
   *                         this function returns.
4758
   *                         If set to true, it returns all of haystack
4759
   *                         from the beginning to the last occurrence of needle.
4760
   *                         If set to false, it returns all of haystack
4761
   *                         from the last occurrence of needle to the end,
4762
   *                         </p>
4763
   * @param string $encoding [optional] <p>
4764
   *                         Character encoding name to use.
4765
   *                         If it is omitted, internal character encoding is used.
4766
   *                         </p>
4767
   *
4768
   * @return string the portion of haystack.
4769
   * or false if needle is not found.
4770
   */
4771
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
4772
  {
4773
    self::checkForSupport();
4774
4775
    return mb_strrichr($haystack, $needle, $part, $encoding);
4776
  }
4777
4778
  /**
4779
   * Find position of last occurrence of a case-insensitive string.
4780
   *
4781
   * @param    string $haystack The string to look in
4782
   * @param    string $needle   The string to look for
4783
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
4784
   *
4785
   * @return   int The position of offset
4786
   */
4787
  public static function strripos($haystack, $needle, $offset = 0)
4788
  {
4789
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
4790
  }
4791
4792
  /**
4793
   * Find position of last occurrence of a string in a string.
4794
   *
4795
   * @link http://php.net/manual/en/function.mb-strrpos.php
4796
   *
4797
   * @param string  $haystack     <p>
4798
   *                              The string being checked, for the last occurrence
4799
   *                              of needle
4800
   *                              </p>
4801
   * @param string  $needle       <p>
4802
   *                              The string to find in haystack.
4803
   *                              </p>
4804
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
4805
   *                              the string. Negative values will stop searching at an arbitrary point
4806
   *                              prior to the end of the string.
4807
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
4808
   *
4809
   * @return int the numeric position of
4810
   * the last occurrence of needle in the
4811
   * haystack string. If
4812
   * needle is not found, it returns false.
4813
   */
4814
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
4815
  {
4816
    $haystack = (string)$haystack;
4817
    $needle = (string)$needle;
4818
4819
    if (!isset($haystack[0]) || !isset($needle[0])) {
4820
      return false;
4821
    }
4822
4823
    // init
4824
    self::checkForSupport();
4825
4826
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4827
      $needle = self::chr($needle);
4828
    }
4829
4830
    $needle = (string)$needle;
4831
    $offset = (int)$offset;
4832
4833
    if ($cleanUtf8 === true) {
4834
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
4835
4836
      $needle = self::clean($needle);
4837
      $haystack = self::clean($haystack);
4838
    }
4839
4840
    if (self::$support['mbstring'] === true) {
4841
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
4842
    }
4843
4844
    if (self::$support['iconv'] === true) {
4845
      return grapheme_strrpos($haystack, $needle, $offset);
4846
    }
4847
4848
    // fallback
4849
4850
    if ($offset > 0) {
4851
      $haystack = self::substr($haystack, $offset);
4852
    } elseif ($offset < 0) {
4853
      $haystack = self::substr($haystack, 0, $offset);
4854
    }
4855
4856 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4857
      $left = substr($haystack, 0, $pos);
4858
4859
      // negative offset not supported in PHP strpos(), ignoring
4860
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4861
    }
4862
4863
    return false;
4864
  }
4865
4866
  /**
4867
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
4868
   * mask.
4869
   *
4870
   * @param string $s
4871
   * @param string $mask
4872
   * @param int    $start
4873
   * @param int    $len
4874
   *
4875
   * @return int|null
4876
   */
4877
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
4878
  {
4879
    if ($start || 2147483647 != $len) {
4880
      $s = self::substr($s, $start, $len);
4881
    }
4882
4883
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
4884
  }
4885
4886
  /**
4887
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
4888
   *
4889
   * @link http://php.net/manual/en/function.grapheme-strstr.php
4890
   *
4891
   * @param string $haystack      <p>
4892
   *                              The input string. Must be valid UTF-8.
4893
   *                              </p>
4894
   * @param string $needle        <p>
4895
   *                              The string to look for. Must be valid UTF-8.
4896
   *                              </p>
4897
   * @param bool   $before_needle [optional] <p>
4898
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
4899
   *                              haystack before the first occurrence of the needle (excluding the needle).
4900
   *                              </p>
4901
   *
4902
   * @return string the portion of string, or FALSE if needle is not found.
4903
   */
4904
  public static function strstr($haystack, $needle, $before_needle = false)
4905
  {
4906
    self::checkForSupport();
4907
4908
    return grapheme_strstr($haystack, $needle, $before_needle);
4909
  }
4910
4911
  /**
4912
   * Unicode transformation for case-less matching.
4913
   *
4914
   * @link http://unicode.org/reports/tr21/tr21-5.html
4915
   *
4916
   * @param string $str
4917
   * @param bool   $full
4918
   *
4919
   * @return string
4920
   */
4921
  public static function strtocasefold($str, $full = true)
4922
  {
4923
    static $fullCaseFold = null;
4924
    static $commonCaseFoldKeys = null;
4925
    static $commonCaseFoldValues = null;
4926
4927
    if ($commonCaseFoldKeys === null) {
4928
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
4929
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
4930
    }
4931
4932
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
4933
4934
    if ($full) {
4935
4936
      if ($fullCaseFold === null) {
4937
        $fullCaseFold = self::getData('caseFolding_full');
4938
      }
4939
4940
      /** @noinspection OffsetOperationsInspection */
4941
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
4942
    }
4943
4944
    $str = self::clean($str);
4945
4946
    return self::strtolower($str);
4947
  }
4948
4949
  /**
4950
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
4951
   * Make a string lowercase.
4952
   *
4953
   * @link http://php.net/manual/en/function.mb-strtolower.php
4954
   *
4955
   * @param string $str <p>
4956
   *                    The string being lowercased.
4957
   *                    </p>
4958
   * @param string $encoding
4959
   *
4960
   * @return string str with all alphabetic characters converted to lowercase.
4961
   */
4962
  public static function strtolower($str, $encoding = 'UTF-8')
4963
  {
4964
    $str = (string)$str;
4965
4966
    if (!isset($str[0])) {
4967
      return '';
4968
    }
4969
4970
    // init
4971
    self::checkForSupport();
4972
4973
    return mb_strtolower($str, $encoding);
4974
  }
4975
4976
  /**
4977
   * Generic case sensitive transformation for collation matching.
4978
   *
4979
   * @param string $s
4980
   *
4981
   * @return string
4982
   */
4983
  protected static function strtonatfold($s)
4984
  {
4985
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
4986
  }
4987
4988
  /**
4989
   * Make a string uppercase.
4990
   *
4991
   * @link http://php.net/manual/en/function.mb-strtoupper.php
4992
   *
4993
   * @param string $str <p>
4994
   *                    The string being uppercased.
4995
   *                    </p>
4996
   * @param string $encoding
4997
   *
4998
   * @return string str with all alphabetic characters converted to uppercase.
4999
   */
5000
  public static function strtoupper($str, $encoding = 'UTF-8')
5001
  {
5002
    $str = (string)$str;
5003
5004
    if (!isset($str[0])) {
5005
      return '';
5006
    }
5007
5008
    // init
5009
    self::checkForSupport();
5010
5011
    if (self::$support['mbstring'] === true) {
5012
      return mb_strtoupper($str, $encoding);
5013
    } else {
5014
5015
      // fallback
5016
5017
      static $caseTableKeys = null;
5018
      static $caseTableValues = null;
5019
5020
      if ($caseTableKeys === null) {
5021
        $caseTable = self::case_table();
5022
        $caseTableKeys = array_keys($caseTable);
5023
        $caseTableValues = array_values($caseTable);
5024
      }
5025
5026
      $str = self::clean($str);
5027
5028
      return str_replace($caseTableKeys, $caseTableValues, $str);
5029
    }
5030
  }
5031
5032
  /**
5033
   * Translate characters or replace sub-strings.
5034
   *
5035
   * @param string $s
5036
   * @param string $from
5037
   * @param string $to
5038
   *
5039
   * @return string
5040
   */
5041
  public static function strtr($s, $from, $to = INF)
5042
  {
5043
    if (INF !== $to) {
5044
      $from = self::str_split($from);
5045
      $to = self::str_split($to);
5046
      $a = count($from);
5047
      $b = count($to);
5048
5049
      if ($a > $b) {
5050
        $from = array_slice($from, 0, $b);
5051
      } elseif ($a < $b) {
5052
        $to = array_slice($to, 0, $a);
5053
      }
5054
5055
      $from = array_combine($from, $to);
5056
    }
5057
5058
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5041 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5059
  }
5060
5061
  /**
5062
   * Return the width of a string.
5063
   *
5064
   * @param string $s
5065
   *
5066
   * @return int
5067
   */
5068
  public static function strwidth($s)
5069
  {
5070
    // init
5071
    self::checkForSupport();
5072
5073
    return mb_strwidth($s, 'UTF-8');
5074
  }
5075
5076
  /**
5077
   * Get part of a string.
5078
   *
5079
   * @link http://php.net/manual/en/function.mb-substr.php
5080
   *
5081
   * @param string  $str       <p>
5082
   *                           The string being checked.
5083
   *                           </p>
5084
   * @param int     $start     <p>
5085
   *                           The first position used in str.
5086
   *                           </p>
5087
   * @param int     $length    [optional] <p>
5088
   *                           The maximum length of the returned string.
5089
   *                           </p>
5090
   * @param string  $encoding
5091
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5092
   *
5093
   * @return string mb_substr returns the portion of
5094
   * str specified by the start and length parameters.
5095
   */
5096
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5097
  {
5098
    $str = (string)$str;
5099
5100
    if (!isset($str[0])) {
5101
      return '';
5102
    }
5103
5104
    // init
5105
    self::checkForSupport();
5106
5107
    if ($cleanUtf8 === true) {
5108
      // iconv and mbstring are not tolerant to invalid encoding
5109
      // further, their behaviour is inconsistent with that of PHP's substr
5110
5111
      $str = self::clean($str);
5112
    }
5113
5114
    if ($length === null) {
5115
      $length = (int)self::strlen($str);
5116
    } else {
5117
      $length = (int)$length;
5118
    }
5119
5120
    if (self::$support['mbstring'] === true) {
5121
5122
      // INFO: this is only a fallback for old versions
5123
      if ($encoding === true || $encoding === false) {
5124
        $encoding = 'UTF-8';
5125
      }
5126
5127
      return mb_substr($str, $start, $length, $encoding);
5128
    }
5129
5130
    if (self::$support['iconv'] === true) {
5131
      return (string)grapheme_substr($str, $start, $length);
5132
    }
5133
5134
    // fallback
5135
5136
    // split to array, and remove invalid characters
5137
    $array = self::split($str);
5138
5139
    // extract relevant part, and join to make sting again
5140
    return implode(array_slice($array, $start, $length));
5141
  }
5142
5143
  /**
5144
   * Binary safe comparison of two strings from an offset, up to length characters.
5145
   *
5146
   * @param string  $main_str           The main string being compared.
5147
   * @param string  $str                The secondary string being compared.
5148
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5149
   *                                    end of the string.
5150
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5151
   *                                    the str compared to the length of main_str less the offset.
5152
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5153
   *
5154
   * @return int
5155
   */
5156
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5157
  {
5158
    $main_str = self::substr($main_str, $offset, $length);
5159
    $str = self::substr($str, 0, self::strlen($main_str));
5160
5161
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5162
  }
5163
5164
  /**
5165
   * Count the number of sub-string occurrences.
5166
   *
5167
   * @param    string $haystack The string to search in.
5168
   * @param    string $needle   The string to search for.
5169
   * @param    int    $offset   The offset where to start counting.
5170
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
5171
   *
5172
   * @return   int number of occurrences of $needle
5173
   */
5174
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5175
  {
5176
    $offset = (int)$offset;
5177
5178
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5179
      $length = (int)$length;
5180
5181
      $haystack = self::substr($haystack, $offset, $length);
5182
    }
5183
5184
    if ($length === null) {
5185
      return substr_count($haystack, $needle, $offset);
5186
    } else {
5187
      return substr_count($haystack, $needle, $offset, $length);
5188
    }
5189
  }
5190
5191
  /**
5192
   * Replace text within a portion of a string.
5193
   *
5194
   * source: https://gist.github.com/stemar/8287074
5195
   *
5196
   * @param string|array $str
5197
   * @param string|array $replacement
5198 1
   * @param int          $start
5199
   * @param null|int     $length
5200 1
   *
5201
   * @return array|string
5202
   */
5203
  public static function substr_replace($str, $replacement, $start, $length = null)
5204
  {
5205
5206
    if (is_array($str)) {
5207
      $num = count($str);
5208
5209
      // $replacement
5210
      if (is_array($replacement)) {
5211
        $replacement = array_slice($replacement, 0, $num);
5212
      } else {
5213
        $replacement = array_pad(array($replacement), $num, $replacement);
5214
      }
5215 1
5216
      // $start
5217
      if (is_array($start)) {
5218
        $start = array_slice($start, 0, $num);
5219
        foreach ($start as &$valueTmp) {
5220
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5221
        }
5222
        unset($value);
5223
      } else {
5224
        $start = array_pad(array($start), $num, $start);
5225
      }
5226
5227
      // $length
5228
      if (!isset($length)) {
5229
        $length = array_fill(0, $num, 0);
5230
      } elseif (is_array($length)) {
5231 1
        $length = array_slice($length, 0, $num);
5232
        foreach ($length as &$valueTmpV2) {
5233 1
          if (isset($valueTmpV2)) {
5234 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5235
          } else {
5236 1
            $valueTmpV2 = 0;
5237
          }
5238
        }
5239
        unset($valueTmpV2);
5240
      } else {
5241
        $length = array_pad(array($length), $num, $length);
5242
      }
5243
5244
      // Recursive call
5245
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5246
    } else {
5247 8
      if (is_array($replacement)) {
5248
        if (count($replacement) > 0) {
5249 8
          $replacement = $replacement[0];
5250
        } else {
5251
          $replacement = '';
5252
        }
5253
      }
5254
    }
5255
5256
    preg_match_all('/./us', (string)$str, $smatches);
5257
    preg_match_all('/./us', (string)$replacement, $rmatches);
5258
5259
    if ($length === null) {
5260 7
      self::checkForSupport();
5261
5262 7
      $length = mb_strlen($str);
5263 2
    }
5264
5265
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5266
5267 6
    return join($smatches[0], null);
5268 6
  }
5269
5270 6
  /**
5271 1
   * Returns a case swapped version of the string.
5272 1
   *
5273 6
   * @param string $str
5274
   * @param string $encoding
5275
   *
5276 6
   * @return string each character's case swapped
5277
   */
5278 6
  public static function swapCase($str, $encoding = 'UTF-8')
5279
  {
5280
    $str = (string)$str;
5281
5282 1
    if (!isset($str[0])) {
5283 1
      return '';
5284 1
    }
5285 6
5286 6
    $str = self::clean($str);
5287 6
5288 6
    $strSwappedCase = preg_replace_callback(
5289 6
        '/[\S]/u',
5290
        function ($match) use ($encoding) {
5291 6
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5292
5293
          if ($match[0] == $marchToUpper) {
5294
            return UTF8::strtolower($match[0], $encoding);
5295
          } else {
5296
            return $marchToUpper;
5297
          }
5298
        },
5299
        $str
5300
    );
5301
5302
    return $strSwappedCase;
5303
  }
5304 1
5305
  /**
5306 1
   * alias for "UTF8::to_ascii()"
5307 1
   *
5308
   * @param string $s The input string e.g. a UTF-8 String
5309
   * @param string $subst_chr
5310
   *
5311
   * @return string
5312
   */
5313
  public static function toAscii($s, $subst_chr = '?')
5314
  {
5315
    return self::to_ascii($s, $subst_chr);
5316
  }
5317
5318
  /**
5319
   * alias for "UTF8::to_latin1()"
5320 1
   *
5321
   * @param $str
5322 1
   *
5323
   * @return string
5324
   */
5325
  public static function toLatin1($str)
5326
  {
5327
    return self::to_latin1($str);
5328
  }
5329
5330
  /**
5331
   * alias for "UTF8::to_utf8"
5332
   *
5333
   * @param string $str
5334
   *
5335
   * @return string
5336
   */
5337
  public static function toUTF8($str)
5338
  {
5339
    return self::to_utf8($str);
5340
  }
5341
5342
  /**
5343
   * convert to ASCII
5344
   *
5345
   * @param string $s The input string e.g. a UTF-8 String
5346
   * @param string $subst_chr
5347
   *
5348
   * @return string
5349
   */
5350
  public static function to_ascii($s, $subst_chr = '?')
5351
  {
5352
    static $translitExtra = null;
5353
5354
    $s = (string)$s;
5355 12
5356
    if (!isset($s[0])) {
5357 12
      return '';
5358
    }
5359
5360
    $s = self::clean($s);
5361
5362
    if (preg_match("/[\x80-\xFF]/", $s)) {
5363
      $s = Normalizer::normalize($s, Normalizer::NFKC);
5364
5365
      $glibc = 'glibc' === ICONV_IMPL;
5366
5367
      preg_match_all('/./u', $s, $s);
5368
5369
      /** @noinspection AlterInForeachInspection */
5370 13
      foreach ($s[0] as &$c) {
5371
5372 13
        if (!isset($c[1])) {
5373
          continue;
5374
        }
5375 13
5376 13
        if ($glibc) {
5377 1
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
5378 1
        } else {
5379 12
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
5380
5381 13
          if ($t !== false && is_string($t)) {
5382
            if (!isset($t[0])) {
5383 13
              $t = '?';
5384 13
            } elseif (isset($t[1])) {
5385
              $t = ltrim($t, '\'`"^~');
5386 13
            }
5387
          }
5388
        }
5389
5390
        if ('?' === $t) {
5391
5392
          if ($translitExtra === null) {
5393
            $translitExtra = (array)self::getData('translit_extra');
5394
          }
5395
5396 6
          if (isset($translitExtra[$c])) {
5397
            $t = $translitExtra[$c];
5398 6
          } else {
5399
            $t = Normalizer::normalize($c, Normalizer::NFD);
5400
5401
            if ($t[0] < "\x80") {
5402
              $t = $t[0];
5403
            } else {
5404
              $t = $subst_chr;
5405
            }
5406
          }
5407
        }
5408
5409
        if ('?' === $t) {
5410 1
          $t = self::str_transliterate($c, $subst_chr);
5411
        }
5412 1
5413
        $c = $t;
5414
      }
5415
5416
      $s = implode('', $s[0]);
5417
    }
5418
5419
    return $s;
5420
  }
5421
5422
  /**
5423
   * alias for "UTF8::to_win1252()"
5424
   *
5425
   * @param   string $str
5426
   *
5427
   * @return  array|string
5428
   */
5429
  public static function to_iso8859($str)
5430
  {
5431
    return self::to_win1252($str);
5432
  }
5433
5434
  /**
5435
   * alias for "UTF8::to_win1252()"
5436
   *
5437 10
   * @param string|array $str
5438
   *
5439 10
   * @return string|array
5440 10
   */
5441
  public static function to_latin1($str)
5442 10
  {
5443 2
    return self::to_win1252($str);
5444
  }
5445
5446
  /**
5447 9
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5448
   *
5449 9
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
5450
   *
5451
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5452
   *
5453 9
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5454 9
   *    are followed by any of these:  ("group B")
5455
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5456 9
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5457
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5458
   * is also a valid unicode character, and will be left unchanged.
5459 1
   *
5460 1
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5461 1
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5462
   *
5463 9
   * @param string $str Any string or array.
5464 9
   *
5465
   * @return string The same string, but UTF8 encoded.
5466
   */
5467
  public static function to_utf8($str)
5468
  {
5469
    if (is_array($str)) {
5470
      foreach ($str as $k => $v) {
5471
        /** @noinspection AlterInForeachInspection */
5472
        $str[$k] = self::to_utf8($v);
5473
      }
5474
5475
      return $str;
5476
    }
5477
5478
    $str = (string)$str;
5479
5480
    if (!isset($str[0])) {
5481
      return $str;
5482
    }
5483
5484
    $max = self::strlen($str, '8bit');
5485
5486
    $buf = '';
5487
    /** @noinspection ForeachInvariantsInspection */
5488
    for ($i = 0; $i < $max; $i++) {
5489
      $c1 = $str[$i];
5490
5491
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5492
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5493
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5494
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5495
5496
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5497
5498
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
5499 1
            $buf .= $c1 . $c2;
5500
            $i++;
5501 1
          } else { // not valid UTF8 - convert it
5502
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5503
            $cc2 = ($c1 & "\x3f") | "\x80";
5504
            $buf .= $cc1 . $cc2;
5505
          }
5506
5507 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5508
5509
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
5510
            $buf .= $c1 . $c2 . $c3;
5511
            $i += 2;
5512
          } else { // not valid UTF8 - convert it
5513
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5514
            $cc2 = ($c1 & "\x3f") | "\x80";
5515
            $buf .= $cc1 . $cc2;
5516
          }
5517
5518
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
5519
5520 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5521
            $buf .= $c1 . $c2 . $c3 . $c4;
5522
            $i += 3;
5523
          } else { // not valid UTF8 - convert it
5524
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5525
            $cc2 = ($c1 & "\x3f") | "\x80";
5526
            $buf .= $cc1 . $cc2;
5527
          }
5528
5529
        } else { // doesn't look like UTF8, but should be converted
5530
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
5531
          $cc2 = (($c1 & "\x3f") | "\x80");
5532
          $buf .= $cc1 . $cc2;
5533
        }
5534
5535
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
5536
5537
        $ordC1 = ord($c1);
5538
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
5539
          $buf .= self::$win1252ToUtf8[$ordC1];
5540
        } else {
5541
          $cc1 = (chr($ordC1 / 64) | "\xc0");
5542
          $cc2 = (($c1 & "\x3f") | "\x80");
5543
          $buf .= $cc1 . $cc2;
5544
        }
5545
5546
      } else { // it doesn't need conversion
5547
        $buf .= $c1;
5548
      }
5549
    }
5550
5551
    self::checkForSupport();
5552
5553
    // decode unicode escape sequences
5554
    $buf = preg_replace_callback(
5555
        '/\\\\u([0-9a-f]{4})/i',
5556
        function ($match) {
5557
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
5558
        },
5559
        $buf
5560
    );
5561
5562
    // decode UTF-8 codepoints
5563
    $buf = preg_replace_callback(
5564 1
        '/&#\d{2,4};/',
5565
        function ($match) {
5566
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
5567 1
        },
5568
        $buf
5569 1
    );
5570
5571 1
    return $buf;
5572 1
  }
5573
5574
  /**
5575
   * Convert a string into win1252.
5576
   *
5577
   * @param  string|array $str
5578
   *
5579
   * @return string|array
5580
   */
5581 1
  protected static function to_win1252($str)
5582
  {
5583 1
    if (is_array($str)) {
5584
5585 1
      foreach ($str as $k => $v) {
5586 1
        /** @noinspection AlterInForeachInspection */
5587
        $str[$k] = self::to_win1252($v);
5588
      }
5589 1
5590
      return $str;
5591 1
    } elseif (is_string($str)) {
5592 1
      return self::utf8_decode($str);
5593 1
    } else {
5594 1
      return $str;
5595 1
    }
5596 1
  }
5597 1
5598 1
  /**
5599 1
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
5600 1
   *
5601 1
   * INFO: This is slower then "trim()"
5602
   *
5603
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
5604
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
5605
   *
5606
   * @param    string $str   The string to be trimmed
5607
   * @param    string $chars Optional characters to be stripped
5608
   *
5609
   * @return   string The trimmed string
5610
   */
5611
  public static function trim($str = '', $chars = INF)
5612
  {
5613
    $str = (string)$str;
5614
5615
    if (!isset($str[0])) {
5616
      return '';
5617
    }
5618
5619
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
5620
    if ($chars === INF || !$chars) {
5621 1
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
5622 1
    }
5623
5624
    return self::rtrim(self::ltrim($str, $chars), $chars);
5625
  }
5626
5627
  /**
5628
   * Makes string's first char uppercase.
5629
   *
5630
   * @param    string $str The input string
5631
   *
5632
   * @return   string The resulting string
5633
   */
5634
  public static function ucfirst($str)
5635
  {
5636
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
5637
  }
5638
5639
  /**
5640
   * alias for "UTF8::ucfirst"
5641
   *
5642
   * @param $str
5643
   *
5644
   * @return string
5645
   */
5646
  public static function ucword($str)
5647
  {
5648
    return self::ucfirst($str);
5649
  }
5650
5651
  /**
5652
   * Uppercase for all words in the string.
5653
   *
5654
   * @param  string $str
5655
   * @param array   $exceptions
5656
   *
5657
   * @return string
5658
   */
5659
  public static function ucwords($str, $exceptions = array())
5660
  {
5661
    if (!$str) {
5662
      return '';
5663
    }
5664
5665
    // init
5666
    $words = explode(' ', $str);
5667
    $newwords = array();
5668
5669
    if (count($exceptions) > 0) {
5670
      $useExceptions = true;
5671
    } else {
5672
      $useExceptions = false;
5673
    }
5674
5675
    foreach ($words as $word) {
5676
      if (
5677
          ($useExceptions === false)
5678
          ||
5679
          (
5680
              $useExceptions === true
5681 2
              &&
5682
              !in_array($word, $exceptions, true)
5683 2
          )
5684 2
      ) {
5685 2
        $word = self::ucfirst($word);
5686
      }
5687 2
      $newwords[] = $word;
5688
    }
5689 2
5690
    return self::ucfirst(implode(' ', $newwords));
5691
  }
5692 2
5693
  /**
5694 2
   * Multi decode html entity & fix urlencoded-win1252-chars.
5695 2
   *
5696 2
   * e.g:
5697
   * 'D&#252;sseldorf'               => 'Düsseldorf'
5698 1
   * 'D%FCsseldorf'                  => 'Düsseldorf'
5699 1
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5700 1
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5701
   * 'Düsseldorf'                   => 'Düsseldorf'
5702
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5703
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5704
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5705
   *
5706 2
   * @param string $str
5707
   *
5708 2
   * @return string
5709 2
   */
5710
  public static function urldecode($str)
5711 2
  {
5712
    $str = (string)$str;
5713
5714
    if (!isset($str[0])) {
5715
      return '';
5716
    }
5717
5718
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
5719
5720
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
5721 1
5722
    $str = self::fix_simple_utf8(
5723 1
        rawurldecode(
5724
            self::html_entity_decode(
5725 1
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5726 1
                $flags
5727 1
            )
5728
        )
5729 1
    );
5730
5731
    return (string)$str;
5732
  }
5733
5734
  /**
5735
   * Return a array with "urlencoded"-win1252 -> UTF-8
5736
   *
5737
   * @return mixed
5738
   */
5739
  protected static function urldecode_fix_win1252_chars()
5740
  {
5741
    static $array = array(
5742
        '%20' => ' ',
5743
        '%21' => '!',
5744
        '%22' => '"',
5745
        '%23' => '#',
5746
        '%24' => '$',
5747
        '%25' => '%',
5748
        '%26' => '&',
5749
        '%27' => "'",
5750
        '%28' => '(',
5751
        '%29' => ')',
5752
        '%2A' => '*',
5753
        '%2B' => '+',
5754
        '%2C' => ',',
5755
        '%2D' => '-',
5756
        '%2E' => '.',
5757
        '%2F' => '/',
5758
        '%30' => '0',
5759
        '%31' => '1',
5760
        '%32' => '2',
5761
        '%33' => '3',
5762
        '%34' => '4',
5763
        '%35' => '5',
5764
        '%36' => '6',
5765
        '%37' => '7',
5766
        '%38' => '8',
5767
        '%39' => '9',
5768
        '%3A' => ':',
5769
        '%3B' => ';',
5770
        '%3C' => '<',
5771
        '%3D' => '=',
5772
        '%3E' => '>',
5773
        '%3F' => '?',
5774
        '%40' => '@',
5775
        '%41' => 'A',
5776
        '%42' => 'B',
5777
        '%43' => 'C',
5778
        '%44' => 'D',
5779
        '%45' => 'E',
5780
        '%46' => 'F',
5781
        '%47' => 'G',
5782
        '%48' => 'H',
5783
        '%49' => 'I',
5784
        '%4A' => 'J',
5785
        '%4B' => 'K',
5786
        '%4C' => 'L',
5787
        '%4D' => 'M',
5788
        '%4E' => 'N',
5789
        '%4F' => 'O',
5790
        '%50' => 'P',
5791
        '%51' => 'Q',
5792
        '%52' => 'R',
5793
        '%53' => 'S',
5794
        '%54' => 'T',
5795
        '%55' => 'U',
5796
        '%56' => 'V',
5797
        '%57' => 'W',
5798
        '%58' => 'X',
5799
        '%59' => 'Y',
5800
        '%5A' => 'Z',
5801
        '%5B' => '[',
5802
        '%5C' => '\\',
5803
        '%5D' => ']',
5804
        '%5E' => '^',
5805
        '%5F' => '_',
5806
        '%60' => '`',
5807
        '%61' => 'a',
5808
        '%62' => 'b',
5809
        '%63' => 'c',
5810
        '%64' => 'd',
5811
        '%65' => 'e',
5812
        '%66' => 'f',
5813
        '%67' => 'g',
5814
        '%68' => 'h',
5815
        '%69' => 'i',
5816
        '%6A' => 'j',
5817
        '%6B' => 'k',
5818
        '%6C' => 'l',
5819
        '%6D' => 'm',
5820
        '%6E' => 'n',
5821
        '%6F' => 'o',
5822
        '%70' => 'p',
5823
        '%71' => 'q',
5824
        '%72' => 'r',
5825
        '%73' => 's',
5826
        '%74' => 't',
5827
        '%75' => 'u',
5828
        '%76' => 'v',
5829
        '%77' => 'w',
5830
        '%78' => 'x',
5831
        '%79' => 'y',
5832
        '%7A' => 'z',
5833
        '%7B' => '{',
5834
        '%7C' => '|',
5835
        '%7D' => '}',
5836
        '%7E' => '~',
5837
        '%7F' => '',
5838
        '%80' => '`',
5839
        '%81' => '',
5840
        '%82' => '‚',
5841
        '%83' => 'ƒ',
5842
        '%84' => '„',
5843
        '%85' => '…',
5844
        '%86' => '†',
5845
        '%87' => '‡',
5846
        '%88' => 'ˆ',
5847
        '%89' => '‰',
5848
        '%8A' => 'Š',
5849
        '%8B' => '‹',
5850
        '%8C' => 'Œ',
5851
        '%8D' => '',
5852
        '%8E' => 'Ž',
5853
        '%8F' => '',
5854
        '%90' => '',
5855
        '%91' => '‘',
5856
        '%92' => '’',
5857
        '%93' => '“',
5858
        '%94' => '”',
5859
        '%95' => '•',
5860
        '%96' => '–',
5861
        '%97' => '—',
5862
        '%98' => '˜',
5863
        '%99' => '™',
5864
        '%9A' => 'š',
5865
        '%9B' => '›',
5866
        '%9C' => 'œ',
5867
        '%9D' => '',
5868
        '%9E' => 'ž',
5869
        '%9F' => 'Ÿ',
5870
        '%A0' => '',
5871
        '%A1' => '¡',
5872
        '%A2' => '¢',
5873
        '%A3' => '£',
5874
        '%A4' => '¤',
5875
        '%A5' => '¥',
5876
        '%A6' => '¦',
5877
        '%A7' => '§',
5878
        '%A8' => '¨',
5879
        '%A9' => '©',
5880
        '%AA' => 'ª',
5881
        '%AB' => '«',
5882
        '%AC' => '¬',
5883
        '%AD' => '',
5884
        '%AE' => '®',
5885
        '%AF' => '¯',
5886
        '%B0' => '°',
5887
        '%B1' => '±',
5888
        '%B2' => '²',
5889
        '%B3' => '³',
5890
        '%B4' => '´',
5891
        '%B5' => 'µ',
5892
        '%B6' => '¶',
5893
        '%B7' => '·',
5894
        '%B8' => '¸',
5895
        '%B9' => '¹',
5896
        '%BA' => 'º',
5897
        '%BB' => '»',
5898
        '%BC' => '¼',
5899
        '%BD' => '½',
5900
        '%BE' => '¾',
5901
        '%BF' => '¿',
5902
        '%C0' => 'À',
5903
        '%C1' => 'Á',
5904
        '%C2' => 'Â',
5905
        '%C3' => 'Ã',
5906
        '%C4' => 'Ä',
5907
        '%C5' => 'Å',
5908
        '%C6' => 'Æ',
5909
        '%C7' => 'Ç',
5910
        '%C8' => 'È',
5911
        '%C9' => 'É',
5912
        '%CA' => 'Ê',
5913
        '%CB' => 'Ë',
5914
        '%CC' => 'Ì',
5915
        '%CD' => 'Í',
5916
        '%CE' => 'Î',
5917
        '%CF' => 'Ï',
5918
        '%D0' => 'Ð',
5919
        '%D1' => 'Ñ',
5920
        '%D2' => 'Ò',
5921
        '%D3' => 'Ó',
5922
        '%D4' => 'Ô',
5923
        '%D5' => 'Õ',
5924
        '%D6' => 'Ö',
5925
        '%D7' => '×',
5926
        '%D8' => 'Ø',
5927
        '%D9' => 'Ù',
5928
        '%DA' => 'Ú',
5929
        '%DB' => 'Û',
5930
        '%DC' => 'Ü',
5931
        '%DD' => 'Ý',
5932
        '%DE' => 'Þ',
5933
        '%DF' => 'ß',
5934
        '%E0' => 'à',
5935
        '%E1' => 'á',
5936
        '%E2' => 'â',
5937
        '%E3' => 'ã',
5938
        '%E4' => 'ä',
5939
        '%E5' => 'å',
5940
        '%E6' => 'æ',
5941
        '%E7' => 'ç',
5942
        '%E8' => 'è',
5943
        '%E9' => 'é',
5944
        '%EA' => 'ê',
5945
        '%EB' => 'ë',
5946
        '%EC' => 'ì',
5947
        '%ED' => 'í',
5948
        '%EE' => 'î',
5949
        '%EF' => 'ï',
5950
        '%F0' => 'ð',
5951
        '%F1' => 'ñ',
5952
        '%F2' => 'ò',
5953
        '%F3' => 'ó',
5954
        '%F4' => 'ô',
5955
        '%F5' => 'õ',
5956
        '%F6' => 'ö',
5957
        '%F7' => '÷',
5958
        '%F8' => 'ø',
5959
        '%F9' => 'ù',
5960
        '%FA' => 'ú',
5961
        '%FB' => 'û',
5962
        '%FC' => 'ü',
5963
        '%FD' => 'ý',
5964
        '%FE' => 'þ',
5965
        '%FF' => 'ÿ',
5966
    );
5967
5968
    return $array;
5969
  }
5970
5971
  /**
5972
   * Decodes an UTF-8 string to ISO-8859-1.
5973
   *
5974
   * @param string $str
5975
   *
5976
   * @return string
5977
   */
5978
  public static function utf8_decode($str)
5979
  {
5980
    static $utf8ToWin1252Keys = null;
5981
    static $utf8ToWin1252Values = null;
5982
5983
    $str = (string)$str;
5984
5985
    if (!isset($str[0])) {
5986
      return '';
5987
    }
5988
5989
    // init
5990
    self::checkForSupport();
5991
5992
    $str = self::to_utf8($str);
5993
5994
    if ($utf8ToWin1252Keys === null) {
5995
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
5996
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
5997
    }
5998
5999
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6000
  }
6001
6002
  /**
6003
   * Encodes an ISO-8859-1 string to UTF-8.
6004
   *
6005
   * @param string $str
6006
   *
6007
   * @return string
6008
   */
6009
  public static function utf8_encode($str)
6010
  {
6011
    $str = utf8_encode($str);
6012
6013
    if (false === strpos($str, "\xC2")) {
6014
      return $str;
6015
    } else {
6016
6017
      static $cp1252ToUtf8Keys = null;
6018
      static $cp1252ToUtf8Values = null;
6019
6020
      if ($cp1252ToUtf8Keys === null) {
6021
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6022
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6023
      }
6024
6025
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6026
    }
6027
  }
6028
6029
  /**
6030
   * fix -> utf8-win1252 chars
6031
   *
6032
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
6033
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6034
   * See: http://en.wikipedia.org/wiki/Windows-1252
6035
   *
6036
   * @deprecated use "UTF8::fix_simple_utf8()"
6037
   *
6038
   * @param   string $str
6039
   *
6040
   * @return  string
6041
   */
6042
  public static function utf8_fix_win1252_chars($str)
6043
  {
6044
    return self::fix_simple_utf8($str);
6045
  }
6046
6047
  /**
6048
   * Returns an array with all utf8 whitespace characters.
6049
   *
6050
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6051
   *
6052
   * @author: Derek E. [email protected]
6053
   *
6054
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6055
   *         as defined in above URL
6056
   */
6057
  public static function whitespace_table()
6058
  {
6059
    return self::$whitespaceTable;
6060
  }
6061
6062
  /**
6063
   * Limit the number of words in a string.
6064
   *
6065
   * @param  string $str
6066
   * @param  int    $words
6067
   * @param  string $strAddOn
6068
   *
6069
   * @return string
6070
   */
6071
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6072
  {
6073
    if (!isset($str[0])) {
6074
      return '';
6075
    }
6076
6077
    $words = (int)$words;
6078
6079
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6080
6081
    if (
6082
        !isset($matches[0])
6083
        ||
6084
        self::strlen($str) === self::strlen($matches[0])
6085
    ) {
6086
      return $str;
6087
    }
6088
6089
    return self::rtrim($matches[0]) . $strAddOn;
6090
  }
6091
6092
  /**
6093
   * Wraps a string to a given number of characters.
6094
   *
6095
   * @param string $str
6096
   * @param int    $width
6097
   * @param string $break
6098
   * @param bool   $cut
6099
   *
6100
   * @return false|string Returns the given string wrapped at the specified length.
6101
   */
6102
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6103
  {
6104
    if (false === wordwrap('-', $width, $break, $cut)) {
6105
      return false;
6106
    }
6107
6108
    if (is_string($break)) {
6109
      $break = (string)$break;
6110
    }
6111
6112
    $w = '';
6113
    $str = explode($break, $str);
6114
    $iLen = count($str);
6115
    $chars = array();
6116
6117
    if (1 === $iLen && '' === $str[0]) {
6118
      return '';
6119
    }
6120
6121
    /** @noinspection ForeachInvariantsInspection */
6122
    for ($i = 0; $i < $iLen; ++$i) {
6123
6124
      if ($i) {
6125
        $chars[] = $break;
6126
        $w .= '#';
6127
      }
6128
6129
      $c = $str[$i];
6130
      unset($str[$i]);
6131
6132
      foreach (self::split($c) as $c) {
6133
        $chars[] = $c;
6134
        $w .= ' ' === $c ? ' ' : '?';
6135
      }
6136
    }
6137
6138
    $str = '';
6139
    $j = 0;
6140
    $b = $i = -1;
6141
    $w = wordwrap($w, $width, '#', $cut);
6142
6143
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6144
      for (++$i; $i < $b; ++$i) {
6145
        $str .= $chars[$j];
6146
        unset($chars[$j++]);
6147
      }
6148
6149
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6150
        unset($chars[$j++]);
6151
      }
6152
6153
      $str .= $break;
6154
    }
6155
6156
    return $str . implode('', $chars);
6157
  }
6158
6159
  /**
6160
   * Returns an array of Unicode White Space characters.
6161
   *
6162
   * @return   array An array with numeric code point as key and White Space Character as value.
6163
   */
6164
  public static function ws()
6165
  {
6166
    return self::$whitespace;
6167
  }
6168
6169
}
6170