Completed
Push — master ( a54e7f...fb0980 )
by Lars
03:21
created

UTF8   D

Complexity

Total Complexity 659

Size/Duplication

Total Lines 6136
Duplicated Lines 5.17 %

Coupling/Cohesion

Components 3
Dependencies 4

Test Coverage

Coverage 75.31%

Importance

Changes 57
Bugs 17 Features 13
Metric Value
wmc 659
c 57
b 17
f 13
lcom 3
cbo 4
dl 317
loc 6136
ccs 1165
cts 1547
cp 0.7531
rs 4.4102

144 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A iconv_loaded() 0 4 2
A intl_loaded() 0 4 2
A toAscii() 0 4 1
A pcre_utf8_support() 0 5 1
A checkForSupport() 0 14 2
A mbstring_loaded() 0 10 2
C to_ascii() 0 71 15
A replace_diamond_question_mark() 0 14 1
A normalize_whitespace() 0 18 3
A whitespace_table() 0 4 1
A normalize_msword() 0 12 2
A getData() 0 9 2
F str_transliterate() 6 92 21
A showSupport() 0 6 2
B removeBOM() 15 40 6
B clean() 0 36 4
A remove_invisible_characters() 0 20 3
A htmlentities() 0 4 1
A htmlspecialchars() 0 4 1
A isBase64() 0 4 1
A is_base64() 0 14 3
A isUtf8() 0 4 1
D is_utf8() 21 124 22
A strspn() 0 8 4
C substr() 0 57 10
B strlen() 0 22 6
C split() 12 69 22
C rxClass() 0 36 7
C str_split() 0 41 7
A strwidth() 0 7 1
A words_limit() 0 20 4
B str_limit() 0 29 5
B strcspn() 0 19 5
A fits_inside() 0 4 1
A stristr() 0 11 2
A strnatcasecmp() 0 4 1
A strnatcmp() 0 4 2
A strtonatfold() 0 4 1
B strtocasefold() 0 25 4
A strtolower() 0 13 2
B swapCase() 0 26 3
A urldecode() 0 23 3
A fix_simple_utf8() 0 18 3
B html_entity_decode() 0 33 6
D to_utf8() 20 106 26
A toUTF8() 0 4 1
A isJson() 0 18 4
A strstr() 0 6 1
C file_get_contents() 0 42 7
C str_detect_encoding() 0 70 13
B is_binary() 0 17 5
C is_utf16() 47 47 14
A count_chars() 0 8 1
C is_utf32() 47 47 14
A cleanup() 0 23 2
A is_binary_file() 0 12 2
A strrchr() 0 6 1
A strrichr() 0 6 1
A filter_var() 10 10 2
C filter() 34 43 13
A encode() 0 14 3
B normalizeEncoding() 0 24 2
A to_latin1() 0 4 1
A to_win1252() 0 16 4
A utf8_decode() 0 23 3
A filter_input() 10 10 2
A utf8_encode() 0 19 3
A json_encode() 0 12 2
A json_decode() 0 12 2
A filter_var_array() 10 10 2
A filter_input_array() 10 10 2
A strpbrk() 0 8 2
A strncasecmp() 0 4 1
A strncmp() 0 4 1
A strcmp() 0 7 2
A max_chr_width() 0 9 2
A chr_size_list() 0 8 2
A single_chr_html_encode() 0 8 2
C ord() 0 23 9
A html_encode() 0 12 1
A file_has_bom() 0 4 1
A is_bom() 0 4 1
A bom() 0 4 1
A isBom() 0 4 1
A string_has_bom() 0 4 1
A add_bom_to_string() 0 8 2
A str_shuffle() 0 8 1
C wordwrap() 0 56 13
C strpos() 6 54 13
A chr() 0 13 3
A hex_to_int() 0 8 2
A strrev() 0 4 1
A max() 8 8 2
B codepoints() 0 26 3
A min() 8 8 2
A chr_to_hex() 0 4 1
A int_to_hex() 0 12 3
A str_to_binary() 0 19 3
B str_word_count() 0 25 5
A trim() 0 15 4
A rtrim() 12 12 3
A ltrim() 12 12 3
C substr_replace() 0 66 14
A toLatin1() 0 4 1
A substr_count() 0 16 4
A isAscii() 0 4 1
A is_ascii() 0 4 1
D range() 14 38 9
B hash() 0 40 5
A callback() 0 4 1
A chr_map() 0 6 1
A access() 0 6 1
A str_sort() 0 16 3
A string() 0 12 1
A strip_tags() 0 7 1
C str_pad() 9 35 7
A str_repeat() 0 6 1
A remove_duplicates() 0 14 4
B stripos() 0 24 6
A fix_utf8() 0 20 4
A ucword() 0 4 1
A ucfirst() 0 4 1
B strtoupper() 0 31 4
B case_table() 0 1001 1
A strtr() 0 19 4
A substr_compare() 0 7 2
A strcasecmp() 0 4 1
C ucwords() 0 33 7
A number_format() 0 20 4
A str_replace() 0 4 1
A str_ireplace() 0 18 3
A lcfirst() 0 4 1
A strripos() 0 4 1
C strrpos() 6 51 12
A chunk_split() 0 4 1
A to_iso8859() 0 4 1
A utf8_fix_win1252_chars() 0 4 1
A ws() 0 4 1
A parse_str() 0 9 1
D getCharDirection() 0 92 115
B chr_to_decimal() 0 32 6
A decimal_to_chr() 0 10 1
B urldecode_fix_win1252_chars() 0 231 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complex Class

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace voku\helper;
4
5
use Patchwork\PHP\Shim\Intl;
6
use Patchwork\PHP\Shim\Normalizer;
7
use Patchwork\PHP\Shim\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * @var array
86
   */
87
  protected static $whitespace = array(
88
      0     => "\x0",
89
      //NUL Byte
90
      9     => "\x9",
91
      //Tab
92
      10    => "\xa",
93
      //New Line
94
      11    => "\xb",
95
      //Vertical Tab
96
      13    => "\xd",
97
      //Carriage Return
98
      32    => "\x20",
99
      //Ordinary Space
100
      160   => "\xc2\xa0",
101
      //NO-BREAK SPACE
102
      5760  => "\xe1\x9a\x80",
103
      //OGHAM SPACE MARK
104
      6158  => "\xe1\xa0\x8e",
105
      //MONGOLIAN VOWEL SEPARATOR
106
      8192  => "\xe2\x80\x80",
107
      //EN QUAD
108
      8193  => "\xe2\x80\x81",
109
      //EM QUAD
110
      8194  => "\xe2\x80\x82",
111
      //EN SPACE
112
      8195  => "\xe2\x80\x83",
113
      //EM SPACE
114
      8196  => "\xe2\x80\x84",
115
      //THREE-PER-EM SPACE
116
      8197  => "\xe2\x80\x85",
117
      //FOUR-PER-EM SPACE
118
      8198  => "\xe2\x80\x86",
119
      //SIX-PER-EM SPACE
120
      8199  => "\xe2\x80\x87",
121
      //FIGURE SPACE
122
      8200  => "\xe2\x80\x88",
123
      //PUNCTUATION SPACE
124
      8201  => "\xe2\x80\x89",
125
      //THIN SPACE
126
      8202  => "\xe2\x80\x8a",
127
      //HAIR SPACE
128
      8232  => "\xe2\x80\xa8",
129
      //LINE SEPARATOR
130
      8233  => "\xe2\x80\xa9",
131
      //PARAGRAPH SEPARATOR
132
      8239  => "\xe2\x80\xaf",
133
      //NARROW NO-BREAK SPACE
134
      8287  => "\xe2\x81\x9f",
135
      //MEDIUM MATHEMATICAL SPACE
136
      12288 => "\xe3\x80\x80"
137
      //IDEOGRAPHIC SPACE
138
  );
139
140
  /**
141
   * @var array
142
   */
143
  protected static $whitespaceTable = array(
144
      'SPACE'                     => "\x20",
145
      'NO-BREAK SPACE'            => "\xc2\xa0",
146
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
147
      'EN QUAD'                   => "\xe2\x80\x80",
148
      'EM QUAD'                   => "\xe2\x80\x81",
149
      'EN SPACE'                  => "\xe2\x80\x82",
150
      'EM SPACE'                  => "\xe2\x80\x83",
151
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
152
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
153
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
154
      'FIGURE SPACE'              => "\xe2\x80\x87",
155
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
156
      'THIN SPACE'                => "\xe2\x80\x89",
157
      'HAIR SPACE'                => "\xe2\x80\x8a",
158
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
159
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
160
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
161
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  protected static $commonCaseFold = array(
168
      'ſ'            => 's',
169
      "\xCD\x85"     => 'ι',
170
      'ς'            => 'σ',
171
      "\xCF\x90"     => 'β',
172
      "\xCF\x91"     => 'θ',
173
      "\xCF\x95"     => 'φ',
174
      "\xCF\x96"     => 'π',
175
      "\xCF\xB0"     => 'κ',
176
      "\xCF\xB1"     => 'ρ',
177
      "\xCF\xB5"     => 'ε',
178
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
179
      "\xE1\xBE\xBE" => 'ι',
180
  );
181
182
  /**
183
   * @var array
184
   */
185
  protected static $brokenUtf8ToUtf8 = array(
186
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
187
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
188
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
189
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
190
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
191
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
192
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
193
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
194
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
195
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
196
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
197
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
198
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
199
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
200
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
201
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
202
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
203
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
204
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
205
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
206
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
207
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
208
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
209
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
210
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
211
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
212
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
213
      'ü'       => 'ü',
214
      'ä'       => 'ä',
215
      'ö'       => 'ö',
216
      'Ö'       => 'Ö',
217
      'ß'       => 'ß',
218
      'Ã '       => 'à',
219
      'á'       => 'á',
220
      'â'       => 'â',
221
      'ã'       => 'ã',
222
      'ù'       => 'ù',
223
      'ú'       => 'ú',
224
      'û'       => 'û',
225
      'Ù'       => 'Ù',
226
      'Ú'       => 'Ú',
227
      'Û'       => 'Û',
228
      'Ü'       => 'Ü',
229
      'ò'       => 'ò',
230
      'ó'       => 'ó',
231
      'ô'       => 'ô',
232
      'è'       => 'è',
233
      'é'       => 'é',
234
      'ê'       => 'ê',
235
      'ë'       => 'ë',
236
      'À'       => 'À',
237
      'Á'       => 'Á',
238
      'Â'       => 'Â',
239
      'Ã'       => 'Ã',
240
      'Ä'       => 'Ä',
241
      'Ã…'       => 'Å',
242
      'Ç'       => 'Ç',
243
      'È'       => 'È',
244
      'É'       => 'É',
245
      'Ê'       => 'Ê',
246
      'Ë'       => 'Ë',
247
      'ÃŒ'       => 'Ì',
248
      'Í'       => 'Í',
249
      'ÃŽ'       => 'Î',
250
      'Ï'       => 'Ï',
251
      'Ñ'       => 'Ñ',
252
      'Ã’'       => 'Ò',
253
      'Ó'       => 'Ó',
254
      'Ô'       => 'Ô',
255
      'Õ'       => 'Õ',
256
      'Ø'       => 'Ø',
257
      'Ã¥'       => 'å',
258
      'æ'       => 'æ',
259
      'ç'       => 'ç',
260
      'ì'       => 'ì',
261
      'í'       => 'í',
262
      'î'       => 'î',
263
      'ï'       => 'ï',
264
      'ð'       => 'ð',
265
      'ñ'       => 'ñ',
266
      'õ'       => 'õ',
267
      'ø'       => 'ø',
268
      'ý'       => 'ý',
269
      'ÿ'       => 'ÿ',
270
      '€'      => '€',
271
  );
272
273
  /**
274
   * @var array
275
   */
276
  protected static $utf8ToWin1252 = array(
277
      "\xe2\x82\xac" => "\x80", // EURO SIGN
278
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
279
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
280
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
281
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
282
      "\xe2\x80\xa0" => "\x86", // DAGGER
283
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
284
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
285
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
286
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
287
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
288
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
289
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
290
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
291
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
292
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
293
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
294
      "\xe2\x80\xa2" => "\x95", // BULLET
295
      "\xe2\x80\x93" => "\x96", // EN DASH
296
      "\xe2\x80\x94" => "\x97", // EM DASH
297
      "\xcb\x9c"     => "\x98", // SMALL TILDE
298
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
299
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
300
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
301
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
302
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
303
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
304
  );
305
306
  /**
307
   * @var array
308
   */
309
  protected static $utf8MSWord = array(
310
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
311
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
312
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
313
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
314
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
315
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
316
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
317
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
318
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
319
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
320
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
321
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
322
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
323
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
324
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
325
  );
326
327
  /**
328
   * @var array
329
   */
330
  private static $support = array();
331
332
  /**
333
   * __construct()
334
   */
335 1
  public function __construct()
336
  {
337 1
    self::checkForSupport();
338 1
  }
339
340
  /**
341
   * check for UTF8-Support
342
   */
343 151
  public static function checkForSupport()
344
  {
345 151
    if (!isset(self::$support['mbstring'])) {
346
347 1
      self::$support['mbstring'] = self::mbstring_loaded();
348 1
      self::$support['iconv'] = self::iconv_loaded();
349 1
      self::$support['intl'] = self::intl_loaded();
350 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
351
352 1
      Bootup::initAll(); // Enables the portablity layer and configures PHP for UTF-8
353 1
      Bootup::filterRequestUri(); // Redirects to an UTF-8 encoded URL if it's not already the case
354 1
      Bootup::filterRequestInputs(); // Normalizes HTTP inputs to UTF-8 NFC
355 1
    }
356 151
  }
357
358
  /**
359
   * checks whether mbstring is available on the server
360
   *
361
   * @return   bool True if available, False otherwise
362
   */
363 2
  public static function mbstring_loaded()
364
  {
365 2
    $return = extension_loaded('mbstring');
366
367 2
    if ($return === true) {
368 2
      mb_internal_encoding('UTF-8');
369 2
    }
370
371 2
    return $return;
372
  }
373
374
  /**
375
   * checks whether iconv is available on the server
376
   *
377
   * @return   bool True if available, False otherwise
378
   */
379 1
  public static function iconv_loaded()
380
  {
381 1
    return extension_loaded('iconv') ? true : false;
382
  }
383
384
  /**
385
   * checks whether intl is available on the server
386
   *
387
   * @return   bool True if available, False otherwise
388
   */
389 1
  public static function intl_loaded()
390
  {
391 1
    return extension_loaded('intl') ? true : false;
392
  }
393
394
  /**
395
   * checks if \u modifier is available that enables Unicode support in PCRE.
396
   *
397
   * @return   bool True if support is available, false otherwise
398
   */
399 30
  public static function pcre_utf8_support()
400
  {
401
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
402 30
    return (bool)@preg_match('//u', '');
403
  }
404
405
  /**
406
   * alias for "UTF8::to_ascii()"
407
   *
408
   * @param string $s The input string e.g. a UTF-8 String
409
   * @param string $subst_chr
410
   *
411
   * @return string
412
   */
413 6
  public static function toAscii($s, $subst_chr = '?')
414
  {
415 6
    return self::to_ascii($s, $subst_chr);
416
  }
417
418
  /**
419
   * convert to ASCII
420
   *
421
   * @param string $s The input string e.g. a UTF-8 String
422
   * @param string $subst_chr
423
   *
424
   * @return string
425
   */
426 7
  public static function to_ascii($s, $subst_chr = '?')
427
  {
428 7
    static $translitExtra = null;
429
430 7
    $s = (string)$s;
431
432 7
    if (!isset($s[0])) {
433 2
      return '';
434
    }
435
436 6
    $s = self::clean($s);
437
438 6
    if (preg_match("/[\x80-\xFF]/", $s)) {
439 3
      $s = Normalizer::normalize($s, Normalizer::NFKC);
440
441 3
      $glibc = 'glibc' === ICONV_IMPL;
442
443 3
      preg_match_all('/./u', $s, $s);
444
445
      /** @noinspection AlterInForeachInspection */
446 3
      foreach ($s[0] as &$c) {
447
448 3
        if (!isset($c[1])) {
449 3
          continue;
450
        }
451
452 3
        if ($glibc) {
453 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
454 3
        } else {
455
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
456
457
          if ($t !== false && is_string($t)) {
458
            if (!isset($t[0])) {
459
              $t = '?';
460
            } elseif (isset($t[1])) {
461
              $t = ltrim($t, '\'`"^~');
462
            }
463
          }
464
        }
465
466 3
        if ('?' === $t) {
467
468 1
          if ($translitExtra === null) {
469 1
            $translitExtra = (array)self::getData('translit_extra');
470 1
          }
471
472 1
          if (isset($translitExtra[$c])) {
473 1
            $t = $translitExtra[$c];
474 1
          } else {
475 1
            $t = Normalizer::normalize($c, Normalizer::NFD);
476
477 1
            if ($t[0] < "\x80") {
478
              $t = $t[0];
479
            } else {
480 1
              $t = $subst_chr;
481
            }
482
          }
483 1
        }
484
485 3
        if ('?' === $t) {
486 1
          $t = self::str_transliterate($c, $subst_chr);
487 1
        }
488
489 3
        $c = $t;
490 3
      }
491
492 3
      $s = implode('', $s[0]);
493 3
    }
494
495 6
    return $s;
496
  }
497
498
  /**
499
   * accepts a string and removes all non-UTF-8 characters from it.
500
   *
501
   * @param string $str                     The string to be sanitized.
502
   * @param bool   $remove_bom
503
   * @param bool   $normalize_whitespace
504
   * @param bool   $normalize_msword        e.g.: "…" => "..."
505
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
506
   *
507
   * @return string Clean UTF-8 encoded string
508
   */
509 24
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
510
  {
511
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
512
    // caused connection reset problem on larger strings
513
514
    $regx = '/
515
       (
516
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
517
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
518
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
519
        |   [\xE1-\xEC][\x80-\xBF]{2}
520
        |   \xED[\x80-\x9F][\x80-\xBF]
521
        |   [\xEE-\xEF][\x80-\xBF]{2}
522
        ){1,50}                          # ...one or more times
523
       )
524
       | .                               # anything else
525 24
       /x';
526 24
    $str = preg_replace($regx, '$1', $str);
527
528 24
    $str = self::replace_diamond_question_mark($str, '');
529 24
    $str = self::remove_invisible_characters($str);
530
531 24
    if ($normalize_whitespace === true) {
532 7
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
533 7
    }
534
535 24
    if ($normalize_msword === true) {
536 1
      $str = self::normalize_msword($str);
537 1
    }
538
539 24
    if ($remove_bom === true) {
540 6
      $str = self::removeBOM($str);
541 6
    }
542
543 24
    return $str;
544
  }
545
546
  /**
547
   * replace diamond question mark (�)
548
   *
549
   * @param string $str
550
   * @param string $unknown
551
   *
552
   * @return string
553
   */
554 25
  public static function replace_diamond_question_mark($str, $unknown = '?')
555
  {
556 25
    return str_replace(
557
        array(
558 25
            "\xEF\xBF\xBD",
559 25
            '�',
560 25
        ),
561
        array(
562 25
            $unknown,
563 25
            $unknown,
564 25
        ),
565
        $str
566 25
    );
567
  }
568
569
  /**
570
   * Remove Invisible Characters
571
   *
572
   * This prevents sandwiching null characters
573
   * between ascii characters, like Java\0script.
574
   *
575
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
576
   *
577
   * @param  string $str
578
   * @param  bool   $url_encoded
579
   *
580
   * @return  string
581
   */
582 25
  public static function remove_invisible_characters($str, $url_encoded = true)
583
  {
584
    // init
585 25
    $non_displayables = array();
586
587
    // every control character except newline (dec 10),
588
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
589 25
    if ($url_encoded) {
590 25
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
591 25
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
592 25
    }
593
594 25
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
595
596
    do {
597 25
      $str = preg_replace($non_displayables, '', $str, -1, $count);
598 25
    } while ($count !== 0);
599
600 25
    return $str;
601
  }
602
603
  /**
604
   * Normalize the whitespace.
605
   *
606
   * @param string $str                  The string to be normalized.
607
   * @param bool   $keepNonBreakingSpace Set to true, to keep non-breaking-spaces.
608
   *
609
   * @return string
610
   */
611 8
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false)
612
  {
613 8
    static $whitespaces = array();
614
615 8
    if (!isset($whitespaces[$keepNonBreakingSpace])) {
616
617 2
      $whitespaces[$keepNonBreakingSpace] = self::$whitespaceTable;
618
619 2
      if ($keepNonBreakingSpace === true) {
620
        /** @noinspection OffsetOperationsInspection */
621 1
        unset($whitespaces[$keepNonBreakingSpace]['NO-BREAK SPACE']);
622 1
      }
623
624 2
      $whitespaces[$keepNonBreakingSpace] = array_values($whitespaces[$keepNonBreakingSpace]);
625 2
    }
626
627 8
    return str_replace($whitespaces[$keepNonBreakingSpace], ' ', $str);
628
  }
629
630
  /**
631
   * Returns an array with all utf8 whitespace characters.
632
   *
633
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
634
   *
635
   * @author: Derek E. [email protected]
636
   *
637
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
638
   *         as defined in above URL
639 1
   */
640
  public static function whitespace_table()
641 1
  {
642
    return self::$whitespaceTable;
643
  }
644
645
  /**
646
   * Normalize MS Word special characters.
647
   *
648
   * @param string $str The string to be normalized.
649
   *
650
   * @return string
651 2
   */
652
  public static function normalize_msword($str)
653 2
  {
654 2
    static $utf8MSWordKeys = null;
655
    static $utf8MSWordValues = null;
656 2
657 1
    if ($utf8MSWordKeys === null) {
658 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
659 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
660
    }
661 2
662
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
663
  }
664
665
  /**
666
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
667
   *
668
   * @param string $str
669
   *
670
   * @return string
671 7
   */
672
  public static function removeBOM($str = '')
673
  {
674
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
675 7
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
676 1
    /** @noinspection SubStrUsedAsStrPosInspection */
677 1 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
678
      $str = substr($str, 4);
679
    }
680 7
681 1
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
682 1
683
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
684
    /** @noinspection SubStrUsedAsStrPosInspection */
685 7 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
686 2
      $str = substr($str, 4);
687 2
    }
688
689
    // UTF-8
690 7
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
691 1
    /** @noinspection SubStrUsedAsStrPosInspection */
692 1 View Code Duplication
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
693
      $str = substr($str, 3);
694
    }
695 7
696 1
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
697 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
698
    /** @noinspection SubStrUsedAsStrPosInspection */
699 7 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
700
      $str = substr($str, 2);
701
    }
702
703
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
704
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
705
    /** @noinspection SubStrUsedAsStrPosInspection */
706 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
707
      $str = substr($str, 2);
708
    }
709 2
710
    return $str;
711 2
  }
712 2
713 2
  /**
714
   * get data from "/data/*.ser"
715
   *
716
   * @param string $file
717
   *
718
   * @return bool|string|array|int false on error
719
   */
720
  protected static function getData($file)
721
  {
722
    $file = __DIR__ . '/data/' . $file . '.ser';
723
    if (file_exists($file)) {
724
      return unserialize(file_get_contents($file));
725
    } else {
726
      return false;
727
    }
728
  }
729
730
  /**
731
   * US-ASCII transliterations of Unicode text.
732
   *
733
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
734
   * Warning: you should only pass this well formed UTF-8!
735
   * Be aware it works by making a copy of the input string which it appends transliterated
736 8
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
737
   * requiring up to the same amount again as the input string
738 8
   *
739
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
740 8
   *
741
   * @author <[email protected]>
742 8
   *
743 2
   * @param string $str     UTF-8 string to convert
744
   * @param string $unknown Character use if character unknown. (default is ?)
745
   *
746 7
   * @return string US-ASCII string
747
   */
748 7
  public static function str_transliterate($str, $unknown = '?')
749 7
  {
750 7
    static $UTF8_TO_ASCII;
751
752 7
    $str = (string)$str;
753
754 7
    if (!isset($str[0])) {
755 6
      return '';
756
    }
757
758 4
    $str = self::clean($str);
759
760
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
761 4
    $chars = $ar[0];
762 4
    foreach ($chars as &$c) {
763 4
764
      $ordC0 = ord($c[0]);
765 4
766 3
      if ($ordC0 >= 0 && $ordC0 <= 127) {
767
        continue;
768 3
      }
769 3
770 3
      $ordC1 = ord($c[1]);
771
772 3
      // ASCII - next please
773
      if ($ordC0 >= 192 && $ordC0 <= 223) {
774
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
775
      }
776
777
      if ($ordC0 >= 224) {
778
        $ordC2 = ord($c[2]);
779
780
        if ($ordC0 <= 239) {
781
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
782
        }
783
784
        if ($ordC0 >= 240) {
785
          $ordC3 = ord($c[3]);
786
787
          if ($ordC0 <= 247) {
788
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
789
          }
790
791
          if ($ordC0 >= 248) {
792
            $ordC4 = ord($c[4]);
793
794 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
795 3
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
796
            }
797 4
798
            if ($ordC0 >= 252) {
799
              $ordC5 = ord($c[5]);
800
801 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
802 4
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
803
              }
804
            }
805
          }
806
        }
807 4
      }
808 4
809 2
      if ($ordC0 >= 254 && $ordC0 <= 255) {
810 2
        $c = $unknown;
811
        continue;
812 2
      }
813 2
814
      if (!isset($ord)) {
815
        $c = $unknown;
816 2
        continue;
817
      }
818 4
819 4
      $bank = $ord >> 8;
820 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
821 4
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
822
        if (file_exists($bankfile)) {
823
          /** @noinspection PhpIncludeInspection */
824 7
          include $bankfile;
825
        } else {
826 7
          $UTF8_TO_ASCII[$bank] = array();
827
        }
828
      }
829
830
      $newchar = $ord & 255;
831
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
832
        $c = $UTF8_TO_ASCII[$bank][$newchar];
833
      } else {
834
        $c = $unknown;
835
      }
836
    }
837
838
    return implode('', $chars);
839
  }
840
841
  /**
842
   * Echo native UTF8-Support libs, e.g. for debugging.
843
   */
844
  public static function showSupport()
845
  {
846
    foreach (self::$support as $utf8Support) {
847
      echo $utf8Support . "\n<br>";
848
    }
849
  }
850
851
  /**
852
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
853
   *
854
   * @link http://php.net/manual/en/function.htmlentities.php
855
   *
856
   * @param string $str        <p>
857
   *                              The input string.
858
   *                              </p>
859
   * @param int    $flags         [optional] <p>
860
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
861
   *                              invalid code unit sequences and the used document type. The default is
862
   *                              ENT_COMPAT | ENT_HTML401.
863
   *                              <table>
864
   *                              Available <i>flags</i> constants
865
   *                              <tr valign="top">
866
   *                              <td>Constant Name</td>
867
   *                              <td>Description</td>
868
   *                              </tr>
869
   *                              <tr valign="top">
870
   *                              <td><b>ENT_COMPAT</b></td>
871
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
872
   *                              </tr>
873
   *                              <tr valign="top">
874
   *                              <td><b>ENT_QUOTES</b></td>
875
   *                              <td>Will convert both double and single quotes.</td>
876
   *                              </tr>
877
   *                              <tr valign="top">
878
   *                              <td><b>ENT_NOQUOTES</b></td>
879
   *                              <td>Will leave both double and single quotes unconverted.</td>
880
   *                              </tr>
881
   *                              <tr valign="top">
882
   *                              <td><b>ENT_IGNORE</b></td>
883
   *                              <td>
884
   *                              Silently discard invalid code unit sequences instead of returning
885
   *                              an empty string. Using this flag is discouraged as it
886
   *                              may have security implications.
887
   *                              </td>
888
   *                              </tr>
889
   *                              <tr valign="top">
890
   *                              <td><b>ENT_SUBSTITUTE</b></td>
891
   *                              <td>
892
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
893
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
894
   *                              </td>
895
   *                              </tr>
896
   *                              <tr valign="top">
897
   *                              <td><b>ENT_DISALLOWED</b></td>
898
   *                              <td>
899
   *                              Replace invalid code points for the given document type with a
900
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
901
   *                              (otherwise) instead of leaving them as is. This may be useful, for
902
   *                              instance, to ensure the well-formedness of XML documents with
903
   *                              embedded external content.
904
   *                              </td>
905
   *                              </tr>
906
   *                              <tr valign="top">
907
   *                              <td><b>ENT_HTML401</b></td>
908
   *                              <td>
909
   *                              Handle code as HTML 4.01.
910
   *                              </td>
911
   *                              </tr>
912
   *                              <tr valign="top">
913
   *                              <td><b>ENT_XML1</b></td>
914
   *                              <td>
915
   *                              Handle code as XML 1.
916
   *                              </td>
917
   *                              </tr>
918
   *                              <tr valign="top">
919
   *                              <td><b>ENT_XHTML</b></td>
920
   *                              <td>
921
   *                              Handle code as XHTML.
922
   *                              </td>
923
   *                              </tr>
924
   *                              <tr valign="top">
925
   *                              <td><b>ENT_HTML5</b></td>
926
   *                              <td>
927
   *                              Handle code as HTML 5.
928
   *                              </td>
929
   *                              </tr>
930
   *                              </table>
931
   *                              </p>
932
   * @param string $encoding      [optional] <p>
933
   *                              Like <b>htmlspecialchars</b>,
934
   *                              <b>htmlentities</b> takes an optional third argument
935
   *                              <i>encoding</i> which defines encoding used in
936
   *                              conversion.
937
   *                              Although this argument is technically optional, you are highly
938
   *                              encouraged to specify the correct value for your code.
939
   *                              </p>
940
   * @param bool   $double_encode [optional] <p>
941
   *                              When <i>double_encode</i> is turned off PHP will not
942
   *                              encode existing html entities. The default is to convert everything.
943
   *                              </p>
944 2
   *
945
   *
946 2
   * @return string the encoded string.
947
   * </p>
948
   * <p>
949
   * If the input <i>string</i> contains an invalid code unit
950
   * sequence within the given <i>encoding</i> an empty string
951
   * will be returned, unless either the <b>ENT_IGNORE</b> or
952
   * <b>ENT_SUBSTITUTE</b> flags are set.
953
   */
954
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
955
  {
956
    return htmlentities($str, $flags, $encoding, $double_encode);
957
  }
958
959
  /**
960
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
961
   *
962
   * @link http://php.net/manual/en/function.htmlspecialchars.php
963
   *
964
   * @param string $str        <p>
965
   *                              The string being converted.
966
   *                              </p>
967
   * @param int    $flags         [optional] <p>
968
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
969
   *                              invalid code unit sequences and the used document type. The default is
970
   *                              ENT_COMPAT | ENT_HTML401.
971
   *                              <table>
972
   *                              Available <i>flags</i> constants
973
   *                              <tr valign="top">
974
   *                              <td>Constant Name</td>
975
   *                              <td>Description</td>
976
   *                              </tr>
977
   *                              <tr valign="top">
978
   *                              <td><b>ENT_COMPAT</b></td>
979
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
980
   *                              </tr>
981
   *                              <tr valign="top">
982
   *                              <td><b>ENT_QUOTES</b></td>
983
   *                              <td>Will convert both double and single quotes.</td>
984
   *                              </tr>
985
   *                              <tr valign="top">
986
   *                              <td><b>ENT_NOQUOTES</b></td>
987
   *                              <td>Will leave both double and single quotes unconverted.</td>
988
   *                              </tr>
989
   *                              <tr valign="top">
990
   *                              <td><b>ENT_IGNORE</b></td>
991
   *                              <td>
992
   *                              Silently discard invalid code unit sequences instead of returning
993
   *                              an empty string. Using this flag is discouraged as it
994
   *                              may have security implications.
995
   *                              </td>
996
   *                              </tr>
997
   *                              <tr valign="top">
998
   *                              <td><b>ENT_SUBSTITUTE</b></td>
999
   *                              <td>
1000
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1001
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1002
   *                              </td>
1003
   *                              </tr>
1004
   *                              <tr valign="top">
1005
   *                              <td><b>ENT_DISALLOWED</b></td>
1006
   *                              <td>
1007
   *                              Replace invalid code points for the given document type with a
1008
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1009
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1010
   *                              instance, to ensure the well-formedness of XML documents with
1011
   *                              embedded external content.
1012
   *                              </td>
1013
   *                              </tr>
1014
   *                              <tr valign="top">
1015
   *                              <td><b>ENT_HTML401</b></td>
1016
   *                              <td>
1017
   *                              Handle code as HTML 4.01.
1018
   *                              </td>
1019
   *                              </tr>
1020
   *                              <tr valign="top">
1021
   *                              <td><b>ENT_XML1</b></td>
1022
   *                              <td>
1023
   *                              Handle code as XML 1.
1024
   *                              </td>
1025
   *                              </tr>
1026
   *                              <tr valign="top">
1027
   *                              <td><b>ENT_XHTML</b></td>
1028
   *                              <td>
1029
   *                              Handle code as XHTML.
1030
   *                              </td>
1031
   *                              </tr>
1032
   *                              <tr valign="top">
1033
   *                              <td><b>ENT_HTML5</b></td>
1034
   *                              <td>
1035
   *                              Handle code as HTML 5.
1036
   *                              </td>
1037
   *                              </tr>
1038
   *                              </table>
1039
   *                              </p>
1040
   * @param string $encoding      [optional] <p>
1041
   *                              Defines encoding used in conversion.
1042
   *                              </p>
1043
   *                              <p>
1044
   *                              For the purposes of this function, the encodings
1045
   *                              ISO-8859-1, ISO-8859-15,
1046
   *                              UTF-8, cp866,
1047
   *                              cp1251, cp1252, and
1048
   *                              KOI8-R are effectively equivalent, provided the
1049
   *                              <i>string</i> itself is valid for the encoding, as
1050
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1051
   *                              the same positions in all of these encodings.
1052
   *                              </p>
1053
   * @param bool   $double_encode [optional] <p>
1054
   *                              When <i>double_encode</i> is turned off PHP will not
1055
   *                              encode existing html entities, the default is to convert everything.
1056
   *                              </p>
1057
   *
1058 1
   * @return string The converted string.
1059
   * </p>
1060 1
   * <p>
1061
   * If the input <i>string</i> contains an invalid code unit
1062
   * sequence within the given <i>encoding</i> an empty string
1063
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1064
   * <b>ENT_SUBSTITUTE</b> flags are set.
1065
   */
1066
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1067
  {
1068
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
1069
  }
1070 16
1071
  /**
1072 16
   * alias for "UTF8::is_base64"
1073
   *
1074
   * @param string $str
1075
   *
1076
   * @return bool
1077
   */
1078
  public static function isBase64($str)
1079
  {
1080
    return self::is_base64($str);
1081
  }
1082
1083
  /**
1084
   * Returns true if the string is base64 encoded, false otherwise.
1085
   *
1086
   * @param string $str
1087 31
   *
1088
   * @return bool Whether or not $str is base64 encoded
1089 31
   */
1090
  public static function is_base64($str)
1091 31
  {
1092 3
    $str = (string)$str;
1093
1094
    if (!isset($str[0])) {
1095 29
      return true;
1096
    }
1097
1098
    if (base64_encode(base64_decode($str, true)) === $str) {
1099
      return true;
1100
    } else {
1101
      return false;
1102 29
    }
1103
  }
1104 29
1105 29
  /**
1106 29
   * alias for "UTF8::is_utf8"
1107 29
   *
1108 29
   * @param string $str
1109 29
   *
1110
   * @return bool
1111
   */
1112 29
  public static function isUtf8($str)
1113
  {
1114 27
    return self::is_utf8($str);
1115 29
  }
1116
1117 25
  /**
1118 25
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
1119 25
   *
1120 25
   * @see    http://hsivonen.iki.fi/php-utf8/
1121 27
   *
1122
   * @param    string $str The string to be checked.
1123 11
   *
1124 11
   * @return   bool
1125 11
   */
1126 11
  public static function is_utf8($str)
1127 21
  {
1128
    $str = (string)$str;
1129 5
1130 5
    if (!isset($str[0])) {
1131 5
      return true;
1132 5
    }
1133 11
1134
    if (self::pcre_utf8_support() !== true) {
1135
1136
      // If even just the first character can be matched, when the /u
1137
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
1138
      // invalid, nothing at all will match, even if the string contains
1139
      // some valid sequences
1140
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
1141
1142 3
    } else {
1143 3
1144 3
      $mState = 0; // cached expected number of octets after the current octet
1145 3
      // until the beginning of the next UTF8 character sequence
1146 7
      $mUcs4 = 0; // cached Unicode character
1147
      $mBytes = 1; // cached expected number of octets in the current sequence
1148 3
      $len = strlen($str);
1149 3
1150 3
      /** @noinspection ForeachInvariantsInspection */
1151 3
      for ($i = 0; $i < $len; $i++) {
1152 3
        $in = ord($str[$i]);
1153
        if ($mState == 0) {
1154
          // When mState is zero we expect either a US-ASCII character or a
1155
          // multi-octet sequence.
1156 3
          if (0 == (0x80 & $in)) {
1157
            // US-ASCII, pass straight through.
1158 29
            $mBytes = 1;
1159 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1160
            // First octet of 2 octet sequence.
1161 27
            $mUcs4 = $in;
1162
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
1163 25
            $mState = 1;
1164 25
            $mBytes = 2;
1165 25
          } elseif (0xE0 == (0xF0 & $in)) {
1166 25
            // First octet of 3 octet sequence.
1167
            $mUcs4 = $in;
1168
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
1169
            $mState = 2;
1170
            $mBytes = 3;
1171 25 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1172
            // First octet of 4 octet sequence.
1173
            $mUcs4 = $in;
1174
            $mUcs4 = ($mUcs4 & 0x07) << 18;
1175
            $mState = 3;
1176
            $mBytes = 4;
1177 25
          } elseif (0xF8 == (0xFC & $in)) {
1178 25
            /* First octet of 5 octet sequence.
1179 25
            *
1180 25
            * This is illegal because the encoded codepoint must be either
1181
            * (a) not the shortest form or
1182 25
            * (b) outside the Unicode range of 0-0x10FFFF.
1183
            * Rather than trying to resynchronize, we will carry on until the end
1184 25
            * of the sequence and let the later error handling code catch it.
1185 25
            */
1186 5
            $mUcs4 = $in;
1187
            $mUcs4 = ($mUcs4 & 0x03) << 24;
1188
            $mState = 4;
1189 25
            $mBytes = 5;
1190 25 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1191 25
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
1192 25
            $mUcs4 = $in;
1193 25
            $mUcs4 = ($mUcs4 & 1) << 30;
1194
            $mState = 5;
1195
            $mBytes = 6;
1196
          } else {
1197
            /* Current octet is neither in the US-ASCII range nor a legal first
1198 13
             * octet of a multi-octet sequence.
1199
             */
1200
            return false;
1201 29
          }
1202
        } else {
1203 11
          // When mState is non-zero, we expect a continuation of the multi-octet
1204
          // sequence
1205
          if (0x80 == (0xC0 & $in)) {
1206
            // Legal continuation.
1207
            $shift = ($mState - 1) * 6;
1208
            $tmp = $in;
1209
            $tmp = ($tmp & 0x0000003F) << $shift;
1210
            $mUcs4 |= $tmp;
1211
            /**
1212
             * End of the multi-octet sequence. mUcs4 now contains the final
1213
             * Unicode code point to be output
1214
             */
1215
            if (0 == --$mState) {
1216
              /*
1217
              * Check for illegal sequences and code points.
1218 6
              */
1219
              // From Unicode 3.1, non-shortest form is illegal
1220 6
              if (
1221
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
1222
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
1223
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
1224 6
                  (4 < $mBytes) ||
1225
                  // From Unicode 3.2, surrogate characters are illegal.
1226
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
1227
                  // Code points outside the Unicode range are illegal.
1228
                  ($mUcs4 > 0x10FFFF)
1229
              ) {
1230
                return false;
1231
              }
1232
              // initialize UTF8 cache
1233
              $mState = 0;
1234
              $mUcs4 = 0;
1235
              $mBytes = 1;
1236
            }
1237
          } else {
1238
            /**
1239
             *((0xC0 & (*in) != 0x80) && (mState != 0))
1240
             * Incomplete multi-octet sequence.
1241
             */
1242
            return false;
1243
          }
1244
        }
1245
      }
1246
1247 37
      return true;
1248
    }
1249 37
  }
1250
1251 37
  /**
1252
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
1253 37
   * mask.
1254 9
   *
1255
   * @param string $s
1256
   * @param string $mask
1257
   * @param int    $start
1258 35
   * @param int    $len
1259
   *
1260 35
   * @return int|null
1261
   */
1262
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
1263
  {
1264 1
    if ($start || 2147483647 != $len) {
1265 1
      $s = self::substr($s, $start, $len);
1266
    }
1267 35
1268 21
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
1269 21
  }
1270 31
1271
  /**
1272
   * Get part of a string.
1273 35
   *
1274
   * @link http://php.net/manual/en/function.mb-substr.php
1275
   *
1276 35
   * @param string  $str       <p>
1277 1
   *                           The string being checked.
1278 1
   *                           </p>
1279
   * @param int     $start     <p>
1280 35
   *                           The first position used in str.
1281
   *                           </p>
1282
   * @param int     $length    [optional] <p>
1283
   *                           The maximum length of the returned string.
1284
   *                           </p>
1285
   * @param string  $encoding
1286
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1287
   *
1288
   * @return string mb_substr returns the portion of
1289
   * str specified by the start and length parameters.
1290
   */
1291
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
1292
  {
1293
    static $bug62759;
1294
1295
    $str = (string)$str;
1296
1297
    if (!isset($str[0])) {
1298
      return '';
1299
    }
1300
1301
    // init
1302
    self::checkForSupport();
1303
1304
    if ($cleanUtf8 === true) {
1305
      // iconv and mbstring are not tolerant to invalid encoding
1306
      // further, their behaviour is inconsistent with that of PHP's substr
1307
1308
      $str = self::clean($str);
1309
    }
1310
1311
    if ($length === null) {
1312
      $length = (int)self::strlen($str);
1313
    } else {
1314
      $length = (int)$length;
1315
    }
1316
1317
    if (self::$support['mbstring'] === true) {
1318
1319 63
      // INFO: this is only a fallback for old versions
1320
      if ($encoding === true || $encoding === false) {
1321 63
        $encoding = 'UTF-8';
1322
      }
1323 63
1324 4
      return mb_substr($str, $start, $length, $encoding);
1325
    }
1326
1327
    if (self::$support['iconv'] === true) {
1328 62
1329
      if (!isset($bug62759)) {
1330
        $bug62759 = ('à' === grapheme_substr('éà', 1, -2));
1331 62
      }
1332
1333
      if ($bug62759) {
1334
        return (string)Intl::grapheme_substr_workaround62759($str, $start, $length);
1335 62
      } else {
1336
        return (string)grapheme_substr($str, $start, $length);
1337
      }
1338 62
    }
1339
1340
    // fallback
1341 62
1342
    // split to array, and remove invalid characters
1343
    $array = self::split($str);
1344
1345
    // extract relevant part, and join to make sting again
1346
    return implode(array_slice($array, $start, $length));
1347
  }
1348
1349
  /**
1350
   * Get the string length, not the byte-length!
1351
   *
1352
   * @link     http://php.net/manual/en/function.mb-strlen.php
1353 24
   *
1354
   * @param string  $str    The string being checked for length.
1355 24
   * @param string  $encoding  Set the charset for e.g. "mb_" function
1356
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1357 24
   *
1358 5
   * @return int the number of characters in
1359
   *           string str having character encoding
1360
   *           encoding. A multi-byte character is
1361
   *           counted as 1.
1362 23
   */
1363 23
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
1364 23
  {
1365
    $str = (string)$str;
1366 23
1367
    if (!isset($str[0])) {
1368 23
      return 0;
1369
    }
1370
1371
    // init
1372 23
    self::checkForSupport();
1373 23
1374 23
    // INFO: this is only a fallback for old versions
1375 23
    if ($encoding === true || $encoding === false) {
1376 23
      $encoding = 'UTF-8';
1377
    }
1378 23
1379
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
1380
      $str = self::clean($str);
1381
    }
1382
1383
    return mb_strlen($str, $encoding);
1384
  }
1385
1386
  /**
1387
   * Convert a string to an array of Unicode characters.
1388
   *
1389
   * @param    string  $str       The string to split into array.
1390
   * @param    int     $length    Max character length of each array element.
1391
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
1392
   *
1393
   * @return   array An array containing chunks of the string.
1394
   */
1395
  public static function split($str, $length = 1, $cleanUtf8 = false)
1396
  {
1397
    $str = (string)$str;
1398
1399
    if (!isset($str[0])) {
1400
      return array();
1401
    }
1402
1403
    // init
1404
    self::checkForSupport();
1405
    $str = (string)$str;
1406
    $ret = array();
1407
1408
    if (self::$support['pcre_utf8'] === true) {
1409 23
1410 5
      if ($cleanUtf8 === true) {
1411
        $str = self::clean($str);
1412 5
      }
1413 5
1414
      preg_match_all('/./us', $str, $retArray);
1415 23
      if (isset($retArray[0])) {
1416
        $ret = $retArray[0];
1417
      }
1418
      unset($retArray);
1419 23
1420
    } else {
1421
1422
      // fallback
1423
1424
      $len = strlen($str);
1425
1426
      /** @noinspection ForeachInvariantsInspection */
1427
      for ($i = 0; $i < $len; $i++) {
1428
        if (($str[$i] & "\x80") === "\x00") {
1429
          $ret[] = $str[$i];
1430 40
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
1431
          if (($str[$i + 1] & "\xC0") === "\x80") {
1432 40
            $ret[] = $str[$i] . $str[$i + 1];
1433
1434 40
            $i++;
1435
          }
1436 40 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1437 30
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
1438
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
1439
1440 16
            $i += 2;
1441
          }
1442 16
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
1443 15 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1444
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
1445 15
1446 14
            $i += 3;
1447 15
          }
1448 1
        }
1449 1
      }
1450
    }
1451
1452 16
    if ($length > 1) {
1453
      $ret = array_chunk($ret, $length);
1454 16
1455
      $ret = array_map('implode', $ret);
1456 16
    }
1457 16
1458 16
    if (isset($ret[0]) && $ret[0] === '') {
1459
      return array();
1460
    }
1461
1462 16
    return $ret;
1463
  }
1464 16
1465
  /**
1466
   * rxClass
1467
   *
1468
   * @param string $s
1469
   * @param string $class
1470
   *
1471
   * @return string
1472
   */
1473
  protected static function rxClass($s, $class = '')
1474
  {
1475 17
    static $rxClassCache = array();
1476
1477
    $cacheKey = $s . $class;
1478 17
1479
    if (isset($rxClassCache[$cacheKey])) {
1480 17
      return $rxClassCache[$cacheKey];
1481
    }
1482
1483
    $class = array($class);
1484
1485
    foreach (self::str_split($s) as $s) {
1486 17
      if ('-' === $s) {
1487 17
        $class[0] = '-' . $class[0];
1488 17
      } elseif (!isset($s[2])) {
1489 17
        $class[0] .= preg_quote($s, '/');
1490 17
      } elseif (1 === self::strlen($s)) {
1491 16
        $class[0] .= $s;
1492 16
      } else {
1493 17
        $class[] = $s;
1494
      }
1495
    }
1496
1497
    $class[0] = '[' . $class[0] . ']';
1498 17
1499 17
    if (1 === count($class)) {
1500
      $return = $class[0];
1501
    } else {
1502 1
      $return = '(?:' . implode('|', $class) . ')';
1503 1
    }
1504
1505
    $rxClassCache[$cacheKey] = $return;
1506 1
1507 1
    return $return;
1508 1
  }
1509 1
1510 1
  /**
1511
   * Convert a string to an array.
1512 1
   *
1513
   * @param string $str
1514 1
   * @param int    $len
1515
   *
1516
   * @return array
1517
   */
1518
  public static function str_split($str, $len = 1)
1519
  {
1520
    // init
1521
    self::checkForSupport();
1522
1523
    if (1 > $len = (int)$len) {
1524 1
      $len = func_get_arg(1);
1525
1526
      return str_split($str, $len);
1527 1
    }
1528
1529 1
    if (self::$support['intl'] === true) {
1530
      $a = array();
1531
      $p = 0;
1532
      $l = strlen($str);
1533
      while ($p < $l) {
1534
        $a[] = grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
1535
      }
1536
    } else {
1537
      preg_match_all('/' . GRAPHEME_CLUSTER_RX . '/u', $str, $a);
1538
      $a = $a[0];
1539
    }
1540
1541
    if (1 == $len) {
1542 5
      return $a;
1543
    }
1544 5
1545
    $arrayOutput = array();
1546
    $p = -1;
1547
1548 5
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
1549
    foreach ($a as $l => $a) {
1550
      if ($l % $len) {
1551 5
        $arrayOutput[$p] .= $a;
1552
      } else {
1553
        $arrayOutput[++$p] = $a;
1554
      }
1555 5
    }
1556 5
1557
    return $arrayOutput;
1558
  }
1559
1560
  /**
1561
   * Return the width of a string.
1562
   *
1563
   * @param string $s
1564
   *
1565
   * @return int
1566
   */
1567
  public static function strwidth($s)
1568
  {
1569
    // init
1570
    self::checkForSupport();
1571
1572 1
    return mb_strwidth($s, 'UTF-8');
1573
  }
1574 1
1575
  /**
1576
   * Limit the number of words in a string.
1577
   *
1578
   * @param  string $str
1579
   * @param  int    $words
1580
   * @param  string $strAddOn
1581
   *
1582
   * @return string
1583
   */
1584
  public static function words_limit($str, $words = 100, $strAddOn = '...')
1585
  {
1586 7
    if (!isset($str[0])) {
1587
      return '';
1588 7
    }
1589 2
1590
    $words = (int)$words;
1591
1592
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
1593 5
1594
    if (
1595 5
        !isset($matches[0])
1596
        ||
1597
        self::strlen($str) === self::strlen($matches[0])
1598
    ) {
1599
      return $str;
1600
    }
1601
1602
    return self::rtrim($matches[0]) . $strAddOn;
1603
  }
1604
1605
  /**
1606
   * Limit the number of characters in a string.
1607 1
   *
1608
   * @param  string $str
1609 1
   * @param  int    $length
1610
   * @param  string $strAddOn
1611
   *
1612
   * @return string
1613
   */
1614
  public static function str_limit($str, $length = 100, $strAddOn = '...')
1615
  {
1616
    if (!isset($str[0])) {
1617
      return '';
1618
    }
1619
1620
    $length = (int)$length;
1621 2
1622
    if (self::strlen($str) <= $length) {
1623 2
      return $str;
1624
    }
1625
1626
    if (self::substr($str, $length - 1, 1) === ' ') {
1627
      return self::substr($str, 0, $length - 1) . $strAddOn;
1628
    }
1629
1630
    $str = self::substr($str, 0, $length);
1631
    $array = explode(' ', $str);
1632
    array_pop($array);
1633 3
    $new_str = implode(' ', $array);
1634
1635 3
    if ($new_str == '') {
1636
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
1637
    } else {
1638
      $str = $new_str . $strAddOn;
1639
    }
1640
1641
    return $str;
1642
  }
1643
1644
  /**
1645
   * Find length of initial segment not matching mask.
1646
   *
1647
   * @param string $str
1648 10
   * @param string $charlist
1649
   * @param int    $start
1650 10
   * @param int    $len
1651 10
   *
1652 10
   * @return int|null
1653
   */
1654 10
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
1655 1
  {
1656 1
    if ('' === $charlist .= '') {
1657 1
      return null;
1658
    }
1659 10
1660
    if ($start || 2147483647 != $len) {
1661 10
      $str = (string)self::substr($str, $start, $len);
1662
    } else {
1663 10
      $str = (string)$str;
1664 1
    }
1665 1
1666
    /* @var $len array */
1667
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
1668 10
      return self::strlen($len[1]);
1669 10
    } else {
1670
      return self::strlen($str);
1671 10
    }
1672
  }
1673
1674
  /**
1675
   * Checks if the number of Unicode characters in a string are not
1676
   * more than the specified integer.
1677
   *
1678
   * @param    string $str      The original string to be checked.
1679
   * @param    int    $box_size The size in number of chars to be checked against string.
1680
   *
1681
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
1682
   */
1683
  public static function fits_inside($str, $box_size)
1684
  {
1685
    return (self::strlen($str) <= $box_size);
1686
  }
1687 19
1688
  /**
1689 19
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
1690
   *
1691 19
   * @param string $str
1692 5
   * @param string $needle
1693
   * @param bool   $before_needle
1694
   *
1695
   * @return false|string
1696 17
   */
1697
  public static function stristr($str, $needle, $before_needle = false)
1698 17
  {
1699
    if ('' === $needle .= '') {
1700
      return false;
1701
    }
1702
1703
    // init
1704
    self::checkForSupport();
1705
1706
    return mb_stristr($str, $needle, $before_needle, 'UTF-8');
1707
  }
1708
1709
  /**
1710 1
   * Case insensitive string comparisons using a "natural order" algorithm.
1711
   *
1712 1
   * @param string $str1
1713
   * @param string $str2
1714 1
   *
1715 1
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
1716
   *             str1 is greater than str2, and 0 if they are equal.
1717
   */
1718 1
  public static function strnatcasecmp($str1, $str2)
1719
  {
1720 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
1721
  }
1722 1
1723 1
  /**
1724 1
   * String comparisons using a "natural order" algorithm.
1725 1
   *
1726
   * @param string $str1
1727 1
   * @param string $str2
1728 1
   *
1729 1
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
1730
   *             str1 is greater than str2, and 0 if they are equal.
1731 1
   */
1732
  public static function strnatcmp($str1, $str2)
1733
  {
1734
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
1735
  }
1736
1737
  /**
1738
   * Generic case sensitive transformation for collation matching.
1739
   *
1740
   * @param string $s
1741 8
   *
1742
   * @return string
1743 8
   */
1744 8
  protected static function strtonatfold($s)
1745
  {
1746 8
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
1747
  }
1748 8
1749 2
  /**
1750
   * Unicode transformation for case-less matching.
1751
   *
1752 8
   * @link http://unicode.org/reports/tr21/tr21-5.html
1753 1
   *
1754 1
   * @param string $str
1755 1
   * @param bool   $full
1756
   *
1757 8
   * @return string
1758
   */
1759
  public static function strtocasefold($str, $full = true)
1760
  {
1761
    static $fullCaseFold = null;
1762
    static $commonCaseFoldKeys = null;
1763
    static $commonCaseFoldValues = null;
1764
1765
    if ($commonCaseFoldKeys === null) {
1766
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
1767
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
1768
    }
1769
1770
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
1771
1772
    if ($full) {
1773
1774
      if ($fullCaseFold === null) {
1775
        $fullCaseFold = self::getData('caseFolding_full');
1776
      }
1777
1778
      /** @noinspection OffsetOperationsInspection */
1779
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
1780
    }
1781
1782
    return self::strtolower($str);
1783
  }
1784
1785
  /**
1786
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
1787
   * Make a string lowercase.
1788
   *
1789
   * @link http://php.net/manual/en/function.mb-strtolower.php
1790
   *
1791
   * @param string $str <p>
1792
   *                    The string being lowercased.
1793
   *                    </p>
1794
   * @param string $encoding
1795
   *
1796
   * @return string str with all alphabetic characters converted to lowercase.
1797
   */
1798
  public static function strtolower($str, $encoding = 'UTF-8')
1799
  {
1800
    $str = (string)$str;
1801
1802
    if (!isset($str[0])) {
1803
      return '';
1804
    }
1805
1806
    // init
1807
    self::checkForSupport();
1808
1809
    return mb_strtolower($str, $encoding);
1810
  }
1811
1812
  /**
1813
   * Returns a case swapped version of the string.
1814
   *
1815
   * @param string $str
1816
   * @param string $encoding
1817
   *
1818
   * @return string each character's case swapped
1819
   */
1820
  public static function swapCase($str, $encoding = 'UTF-8')
1821
  {
1822
    $str = (string)$str;
1823
1824
    if (!isset($str[0])) {
1825
      return '';
1826
    }
1827
1828
    $str = self::clean($str);
1829
1830 14
    $strSwappedCase = preg_replace_callback(
1831
        '/[\S]/u',
1832 14
        function ($match) use ($encoding) {
1833
          $marchToUpper = self::strtoupper($match[0], $encoding);
1834 14
1835 3
          if ($match[0] == $marchToUpper) {
1836
            return self::strtolower($match[0], $encoding);
1837
          } else {
1838 14
            return $marchToUpper;
1839 4
          }
1840
        },
1841
        $str
1842 14
    );
1843 2
1844 2
    return $strSwappedCase;
1845 2
  }
1846
1847
  /**
1848 2
   * Multi decode html entity & fix urlencoded-win1252-chars.
1849
   *
1850
   * e.g:
1851 14
   * 'D&#252;sseldorf'               => 'Düsseldorf'
1852
   * 'D%FCsseldorf'                  => 'Düsseldorf'
1853
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
1854 14
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
1855 14
   * 'Düsseldorf'                   => 'Düsseldorf'
1856 14
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
1857
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
1858 14
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
1859 14
   *
1860
   * @param string $str
1861 14
   *
1862
   * @return string
1863
   */
1864
  public static function urldecode($str)
1865
  {
1866
    $str = (string)$str;
1867
1868
    if (!isset($str[0])) {
1869
      return '';
1870
    }
1871
1872
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
1873
1874
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
1875
1876
    $str = self::fix_simple_utf8(
1877
        rawurldecode(
1878
            self::html_entity_decode(
1879
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
1880
                $flags
1881
            )
1882
        )
1883
    );
1884
1885
    return (string)$str;
1886
  }
1887
1888
  /**
1889
   * Fixing a broken UTF-8 string.
1890 20
   *
1891
   * @param string $str
1892 20
   *
1893 2
   * @return string
1894
   */
1895 2
  public static function fix_simple_utf8($str)
1896 2
  {
1897
    static $brokenUtf8ToUtf8Keys = null;
1898 2
    static $brokenUtf8ToUtf8Values = null;
1899
1900
    $str = (string)$str;
1901 20
1902
    if (!isset($str[0])) {
1903 20
      return '';
1904 9
    }
1905
1906
    if ($brokenUtf8ToUtf8Keys === null) {
1907 20
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
1908
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
1909 20
    }
1910 20
1911 20
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
1912
  }
1913 20
1914 20
  /**
1915 20
   * UTF-8 version of html_entity_decode()
1916 20
   *
1917
   * The reason we are not using html_entity_decode() by itself is because
1918 20
   * while it is not technically correct to leave out the semicolon
1919
   * at the end of an entity most browsers will still interpret the entity
1920 18
   * correctly. html_entity_decode() does not convert entities without
1921 17
   * semicolons, so we are left with our own little solution here. Bummer.
1922 17
   *
1923 17
   * Convert all HTML entities to their applicable characters
1924 5
   *
1925 5
   * @link http://php.net/manual/en/function.html-entity-decode.php
1926 5
   *
1927
   * @param string $str   <p>
1928
   *                         The input string.
1929 20
   *                         </p>
1930
   * @param int    $flags    [optional] <p>
1931 18
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1932 14
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1933 14
   *                         <table>
1934 14
   *                         Available <i>flags</i> constants
1935 8
   *                         <tr valign="top">
1936 8
   *                         <td>Constant Name</td>
1937 8
   *                         <td>Description</td>
1938
   *                         </tr>
1939
   *                         <tr valign="top">
1940 19
   *                         <td><b>ENT_COMPAT</b></td>
1941
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1942 7
   *                         </tr>
1943 1
   *                         <tr valign="top">
1944 1
   *                         <td><b>ENT_QUOTES</b></td>
1945 1
   *                         <td>Will convert both double and single quotes.</td>
1946 6
   *                         </tr>
1947 6
   *                         <tr valign="top">
1948 6
   *                         <td><b>ENT_NOQUOTES</b></td>
1949
   *                         <td>Will leave both double and single quotes unconverted.</td>
1950
   *                         </tr>
1951 7
   *                         <tr valign="top">
1952 6
   *                         <td><b>ENT_HTML401</b></td>
1953 6
   *                         <td>
1954 6
   *                         Handle code as HTML 4.01.
1955
   *                         </td>
1956
   *                         </tr>
1957 20
   *                         <tr valign="top">
1958
   *                         <td><b>ENT_XML1</b></td>
1959 2
   *                         <td>
1960 2
   *                         Handle code as XML 1.
1961
   *                         </td>
1962
   *                         </tr>
1963 2
   *                         <tr valign="top">
1964 2
   *                         <td><b>ENT_XHTML</b></td>
1965 2
   *                         <td>
1966
   *                         Handle code as XHTML.
1967
   *                         </td>
1968 2
   *                         </tr>
1969 18
   *                         <tr valign="top">
1970
   *                         <td><b>ENT_HTML5</b></td>
1971 20
   *                         <td>
1972
   *                         Handle code as HTML 5.
1973 20
   *                         </td>
1974
   *                         </tr>
1975
   *                         </table>
1976 20
   *                         </p>
1977 20
   * @param string $encoding [optional] <p>
1978
   *                         Encoding to use.
1979 3
   *                         </p>
1980 20
   *
1981
   * @return string the decoded string.
1982 20
   */
1983
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
1984
  {
1985 20
    $str = (string)$str;
1986 20
1987 20
    if (!isset($str[0])) {
1988 2
      return '';
1989 20
    }
1990
1991 20
    if (strpos($str, '&') === false) {
1992
      return $str;
1993 20
    }
1994
1995
    if ($flags === null) {
1996
      if (Bootup::is_php('5.4') === true) {
1997
        $flags = ENT_COMPAT | ENT_HTML5;
1998
      } else {
1999
        $flags = ENT_COMPAT;
2000
      }
2001
    }
2002
2003
    do {
2004
      $str_compare = $str;
2005
2006
      // decode numeric & UTF16 two byte entities
2007
      $str = html_entity_decode(
2008
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2009
          $flags,
2010
          $encoding
2011
      );
2012
    } while ($str_compare !== $str);
2013
2014
    return $str;
2015
  }
2016
2017
  /**
2018
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
2019
   *
2020
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
2021
   *
2022
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
2023
   *
2024
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
2025
   *    are followed by any of these:  ("group B")
2026
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
2027
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
2028
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
2029
   * is also a valid unicode character, and will be left unchanged.
2030
   *
2031
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
2032
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
2033
   *
2034
   * @param string $str Any string or array.
2035
   *
2036
   * @return string The same string, but UTF8 encoded.
2037
   */
2038
  public static function to_utf8($str)
2039
  {
2040
    if (is_array($str)) {
2041
      foreach ($str as $k => $v) {
2042
        /** @noinspection AlterInForeachInspection */
2043
        $str[$k] = self::to_utf8($v);
2044
      }
2045
2046
      return $str;
2047
    }
2048
2049
    $str = (string)$str;
2050
2051
    if (!isset($str[0])) {
2052
      return $str;
2053
    }
2054 1
2055
    $max = self::strlen($str, '8bit');
2056 1
2057
    $buf = '';
2058 1
    /** @noinspection ForeachInvariantsInspection */
2059
    for ($i = 0; $i < $max; $i++) {
2060
      $c1 = $str[$i];
2061
2062
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
2063
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
2064
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
2065
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
2066
2067
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
2068
2069
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
2070
            $buf .= $c1 . $c2;
2071
            $i++;
2072
          } else { // not valid UTF8 - convert it
2073
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
2074
            $cc2 = ($c1 & "\x3f") | "\x80";
2075
            $buf .= $cc1 . $cc2;
2076
          }
2077
2078 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2079
2080
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
2081
            $buf .= $c1 . $c2 . $c3;
2082
            $i += 2;
2083
          } else { // not valid UTF8 - convert it
2084
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
2085
            $cc2 = ($c1 & "\x3f") | "\x80";
2086
            $buf .= $cc1 . $cc2;
2087
          }
2088
2089
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
2090
2091 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2092
            $buf .= $c1 . $c2 . $c3 . $c4;
2093
            $i += 3;
2094
          } else { // not valid UTF8 - convert it
2095
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
2096
            $cc2 = ($c1 & "\x3f") | "\x80";
2097
            $buf .= $cc1 . $cc2;
2098
          }
2099
2100
        } else { // doesn't look like UTF8, but should be converted
2101
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
2102
          $cc2 = (($c1 & "\x3f") | "\x80");
2103
          $buf .= $cc1 . $cc2;
2104
        }
2105
2106
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
2107
2108
        $ordC1 = ord($c1);
2109
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
2110
          $buf .= self::$win1252ToUtf8[$ordC1];
2111
        } else {
2112
          $cc1 = (chr($ordC1 / 64) | "\xc0");
2113
          $cc2 = (($c1 & "\x3f") | "\x80");
2114
          $buf .= $cc1 . $cc2;
2115
        }
2116
2117
      } else { // it doesn't need conversion
2118
        $buf .= $c1;
2119
      }
2120
    }
2121
2122
    self::checkForSupport();
2123
2124
    // decode unicode escape sequences
2125
    $buf = preg_replace_callback(
2126
        '/\\\\u([0-9a-f]{4})/i',
2127
        function ($match) {
2128
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
2129
        },
2130
        $buf
2131
    );
2132
2133
    // decode UTF-8 codepoints
2134
    $buf = preg_replace_callback(
2135
        '/&#\d{2,4};/',
2136
        function ($match) {
2137
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
2138
        },
2139
        $buf
2140
    );
2141 2
2142
    return $buf;
2143
  }
2144 2
2145 2
  /**
2146
   * alias for "UTF8::to_utf8"
2147 2
   *
2148 2
   * @param string $str
2149
   *
2150
   * @return string
2151
   */
2152 2
  public static function toUTF8($str)
2153 2
  {
2154
    return self::to_utf8($str);
2155 2
  }
2156 2
2157
  /**
2158 2
   * Try to check if a string is a json-string...
2159 1
   *
2160 1
   * @param $str
2161 2
   *
2162
   * @return bool
2163
   *
2164
   * @deprecated
2165 2
   */
2166
  public static function isJson($str)
2167
  {
2168
    $str = (string)$str;
2169 2
2170 2
    if (!isset($str[0])) {
2171
      return false;
2172 2
    }
2173 2
2174 1
    if (
2175 1
        is_object(json_decode($str))
2176
        &&
2177 2
        json_last_error() == JSON_ERROR_NONE
2178 2
    ) {
2179
      return true;
2180
    } else {
2181 2
      return false;
2182
    }
2183
  }
2184
2185
  /**
2186
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
2187
   *
2188
   * @link http://php.net/manual/en/function.grapheme-strstr.php
2189
   *
2190
   * @param string $haystack      <p>
2191 2
   *                              The input string. Must be valid UTF-8.
2192
   *                              </p>
2193
   * @param string $needle        <p>
2194 2
   *                              The string to look for. Must be valid UTF-8.
2195
   *                              </p>
2196
   * @param bool   $before_needle [optional] <p>
2197
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
2198 2
   *                              haystack before the first occurrence of the needle (excluding the needle).
2199
   *                              </p>
2200
   *
2201
   * @return string the portion of string, or FALSE if needle is not found.
2202
   */
2203
  public static function strstr($haystack, $needle, $before_needle = false)
2204 2
  {
2205
    self::checkForSupport();
2206
2207
    return grapheme_strstr($haystack, $needle, $before_needle);
2208
  }
2209
2210 2
  /**
2211
   * Reads entire file into a string.
2212
   *
2213
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2214
   *
2215
   * @link http://php.net/manual/en/function.file-get-contents.php
2216 2
   *
2217
   * @param string   $filename      <p>
2218
   *                                Name of the file to read.
2219
   *                                </p>
2220
   * @param int      $flags         [optional] <p>
2221
   *                                Prior to PHP 6, this parameter is called
2222 2
   *                                use_include_path and is a bool.
2223
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2224
   *                                to trigger include path
2225
   *                                search.
2226 2
   *                                </p>
2227 2
   *                                <p>
2228
   *                                The value of flags can be any combination of
2229
   *                                the following flags (with some restrictions), joined with the
2230
   *                                binary OR (|)
2231 2
   *                                operator.
2232 2
   *                                </p>
2233 2
   *                                <p>
2234 2
   *                                <table>
2235 2
   *                                Available flags
2236 2
   *                                <tr valign="top">
2237
   *                                <td>Flag</td>
2238 2
   *                                <td>Description</td>
2239 1
   *                                </tr>
2240 1
   *                                <tr valign="top">
2241 1
   *                                <td>
2242 1
   *                                FILE_USE_INCLUDE_PATH
2243 1
   *                                </td>
2244
   *                                <td>
2245 1
   *                                Search for filename in the include directory.
2246
   *                                See include_path for more
2247
   *                                information.
2248 1
   *                                </td>
2249
   *                                </tr>
2250 2
   *                                <tr valign="top">
2251
   *                                <td>
2252
   *                                FILE_TEXT
2253
   *                                </td>
2254 2
   *                                <td>
2255
   *                                As of PHP 6, the default encoding of the read
2256
   *                                data is UTF-8. You can specify a different encoding by creating a
2257
   *                                custom context or by changing the default using
2258
   *                                stream_default_encoding. This flag cannot be
2259
   *                                used with FILE_BINARY.
2260
   *                                </td>
2261
   *                                </tr>
2262
   *                                <tr valign="top">
2263
   *                                <td>
2264 3
   *                                FILE_BINARY
2265
   *                                </td>
2266
   *                                <td>
2267 3
   *                                With this flag, the file is read in binary mode. This is the default
2268
   *                                setting and cannot be used with FILE_TEXT.
2269
   *                                </td>
2270 3
   *                                </tr>
2271
   *                                </table>
2272 3
   *                                </p>
2273 3
   * @param resource $context       [optional] <p>
2274 3
   *                                A valid context resource created with
2275 3
   *                                stream_context_create. If you don't need to use a
2276 2
   *                                custom context, you can skip this parameter by &null;.
2277
   *                                </p>
2278 3
   * @param int      $offset        [optional] <p>
2279
   *                                The offset where the reading starts.
2280
   *                                </p>
2281
   * @param int      $maxlen        [optional] <p>
2282
   *                                Maximum length of data read. The default is to read until end
2283
   *                                of file is reached.
2284
   *                                </p>
2285
   * @param int      $timeout
2286
   *
2287
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2288
   *                                default utf-8 chars
2289 1
   *
2290
   * @return string The function returns the read data or false on failure.
2291 1
   */
2292 1
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2293
  {
2294 1
    // init
2295 1
    $timeout = (int)$timeout;
2296 1
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2297 1
2298 1
    if ($timeout && $context === null) {
2299 1
      $context = stream_context_create(
2300 1
          array(
2301 1
              'http' =>
2302 1
                  array(
2303 1
                      'timeout' => $timeout,
2304 1
                  ),
2305 1
          )
2306 1
      );
2307 1
    }
2308
2309 1
    if (is_int($maxlen)) {
2310 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2311 1
    } else {
2312 1
      $data = file_get_contents($filename, $flags, $context, $offset);
2313 1
    }
2314 1
2315 1
    // return false on error
2316 1
    if ($data === false) {
2317 1
      return false;
2318 1
    }
2319 1
2320 1
    if ($convertToUtf8 === true) {
2321 1
      self::checkForSupport();
2322 1
2323
      $encoding = self::str_detect_encoding($data);
2324 1
      if ($encoding != 'UTF-8') {
2325 1
        $data = mb_convert_encoding($data, 'UTF-8', $encoding);
2326 1
      }
2327
2328 1
      $data = self::cleanup($data);
2329
    }
2330
2331
    // clean utf-8 string
2332 1
    return $data;
2333
  }
2334 1
2335
  /**
2336
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
2337
   *
2338
   * @param string $str
2339
   *
2340
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
2341
   *                      otherwise it will return false.
2342
   */
2343
  public static function str_detect_encoding($str)
2344
  {
2345 2
    // init
2346
    $encoding = '';
2347 2
2348
    // UTF-8
2349 2
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2350
    /** @noinspection SubStrUsedAsStrPosInspection */
2351 2
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
2352
      return 'UTF-8';
2353
    }
2354
2355
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2356
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2357
    /** @noinspection SubStrUsedAsStrPosInspection */
2358
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
2359
      return 'UTF-16BE';
2360
    }
2361 1
2362
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2363 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2364 1
    /** @noinspection SubStrUsedAsStrPosInspection */
2365
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
2366 1
      return 'UTF-16LE';
2367 1
    }
2368 1
2369 1
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2370 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2371 1
    /** @noinspection SubStrUsedAsStrPosInspection */
2372 1
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
2373 1
      return 'UTF-32BE';
2374 1
    }
2375
2376
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2377 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2378 1
    /** @noinspection SubStrUsedAsStrPosInspection */
2379 1
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
2380
      return 'UTF32LE';
2381 1
    }
2382 1
2383 1
    if (!$encoding) {
2384
      self::checkForSupport();
2385
2386
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
2387
      $detectOrder = array(
2388
          'UTF-8',
2389
          'windows-1251',
2390
          'ISO-8859-1',
2391
      );
2392
      $encoding = mb_detect_encoding($str, $detectOrder, true);
2393
    }
2394
2395
    if (self::is_binary($str)) {
2396 1
      if (self::is_utf16($str) == 1) {
2397
        return 'UTF-16LE';
2398
      } elseif (self::is_utf16($str) == 2) {
2399
        return 'UTF-16BE';
2400
      } elseif (self::is_utf32($str) == 1) {
2401
        return 'UTF-32LE';
2402
      } elseif (self::is_utf32($str) == 2) {
2403
        return 'UTF-32BE';
2404 1
      }
2405
    }
2406 1
2407
    if (!$encoding) {
2408
      $encoding = false;
2409
    }
2410
2411
    return $encoding;
2412
  }
2413
2414
  /**
2415
   * Check if the input is binary... (is look like a hack)
2416 5
   *
2417
   * @param string $input
2418 5
   *
2419
   * @return bool
2420 5
   */
2421
  public static function is_binary($input)
2422
  {
2423
2424
    $testLength = strlen($input);
2425 5
2426
    if (
2427
        preg_match('~^[01]+$~', $input)
2428 5
        ||
2429
        substr_count($input, "\x00") > 0
2430
        ||
2431
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2432
    ) {
2433
      return true;
2434
    } else {
2435 5
      return false;
2436
    }
2437 5
  }
2438
2439
  /**
2440
   * Check if the string is UTF-16.
2441
   *
2442
   * @param string $str
2443
   *
2444
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2445
   */
2446 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2447
  {
2448
    if (self::is_binary($str)) {
2449
      self::checkForSupport();
2450
2451
      $maybeUTF16LE = 0;
2452
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2453
      if ($test !== false && strlen($test) > 1) {
2454
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2455
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2456
        if ($test3 == $test) {
2457
          $strChars = self::count_chars($str);
2458
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2459
            if (in_array($test3char, $strChars, true) === true) {
2460
              $maybeUTF16LE++;
2461
            }
2462
          }
2463
        }
2464
      }
2465
2466
      $maybeUTF16BE = 0;
2467
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2468
      if ($test !== false && strlen($test) > 1) {
2469
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2470
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2471
        if ($test3 == $test) {
2472
          $strChars = self::count_chars($str);
2473
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2474
            if (in_array($test3char, $strChars, true) === true) {
2475
              $maybeUTF16BE++;
2476
            }
2477
          }
2478
        }
2479
      }
2480
2481
      if ($maybeUTF16BE != $maybeUTF16LE) {
2482
        if ($maybeUTF16LE > $maybeUTF16BE) {
2483
          return 1;
2484
        } else {
2485
          return 2;
2486
        }
2487
      }
2488 1
2489
    }
2490 1
2491
    return false;
2492 1
  }
2493
2494
  /**
2495
   * Returns count of characters used in a string.
2496
   *
2497
   * @param    string $str The input string.
2498
   *
2499
   * @return   array An associative array of Character as keys and
2500
   *           their count as values.
2501
   */
2502
  public static function count_chars($str) // there is no $mode parameters
2503
  {
2504
    $array = array_count_values(self::split($str));
2505
2506
    ksort($array);
2507
2508
    return $array;
2509
  }
2510
2511
  /**
2512
   * Check if the string is UTF-32.
2513
   *
2514
   * @param string $str
2515
   *
2516
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2517
   */
2518 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2519
  {
2520
    if (self::is_binary($str)) {
2521
      self::checkForSupport();
2522
2523 1
      $maybeUTF32LE = 0;
2524
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2525 1
      if ($test !== false && strlen($test) > 1) {
2526
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2527 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2528
        if ($test3 == $test) {
2529
          $strChars = self::count_chars($str);
2530
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2531
            if (in_array($test3char, $strChars, true) === true) {
2532
              $maybeUTF32LE++;
2533
            }
2534
          }
2535
        }
2536
      }
2537
2538
      $maybeUTF32BE = 0;
2539 1
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2540
      if ($test !== false && strlen($test) > 1) {
2541 1
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2542 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2543 1
        if ($test3 == $test) {
2544 1
          $strChars = self::count_chars($str);
2545
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2546
            if (in_array($test3char, $strChars, true) === true) {
2547 1
              $maybeUTF32BE++;
2548
            }
2549
          }
2550
        }
2551
      }
2552
2553
      if ($maybeUTF32BE != $maybeUTF32LE) {
2554
        if ($maybeUTF32LE > $maybeUTF32BE) {
2555
          return 1;
2556
        } else {
2557
          return 2;
2558
        }
2559 6
      }
2560
2561 6
    }
2562 6
2563 1
    return false;
2564
  }
2565
2566 1
  /**
2567 1
   * Clean-up a and show only printable UTF-8 chars at the end.
2568 6
   *
2569 1
   * @param string|false $str
2570 1
   *
2571 1
   * @return string
2572 1
   */
2573 6
  public static function cleanup($str)
2574 6
  {
2575
    $str = (string)$str;
2576
2577
    if (!isset($str[0])) {
2578 6
      return '';
2579 6
    }
2580 1
2581 1
    // init
2582 6
    self::checkForSupport();
2583
2584 6
    // fixed ISO <-> UTF-8 Errors
2585 4
    $str = self::fix_simple_utf8($str);
2586 4
2587 4
    // remove all none UTF-8 symbols
2588
    // && remove diamond question mark (�)
2589
    // && remove remove invisible characters (e.g. "\0")
2590
    // && remove BOM
2591 6
    // && normalize whitespace chars (but keep non-breaking-spaces)
2592
    $str = self::clean($str, true, true, false, true);
2593
2594
    return (string)$str;
2595
  }
2596 6
2597 6
  /**
2598 6
   * Check if the file is binary.
2599
   *
2600 6
   * @param string $file
2601
   *
2602
   * @return boolean
2603
   */
2604
  public static function is_binary_file($file)
2605
  {
2606
    try {
2607
      $fp = fopen($file, 'r');
2608
      $block = fread($fp, 512);
2609
      fclose($fp);
2610
    } catch (\Exception $e) {
2611
      $block = '';
2612
    }
2613
2614 11
    return self::is_binary($block);
2615
  }
2616 11
2617
  /**
2618 11
   * Finds the last occurrence of a character in a string within another.
2619 11
   *
2620
   * @link http://php.net/manual/en/function.mb-strrchr.php
2621
   *
2622 1
   * @param string $haystack <p>
2623 1
   *                         The string from which to get the last occurrence
2624
   *                         of needle
2625
   *                         </p>
2626
   * @param string $needle   <p>
2627
   *                         The string to find in haystack
2628
   *                         </p>
2629
   * @param bool   $part     [optional] <p>
2630
   *                         Determines which portion of haystack
2631
   *                         this function returns.
2632
   *                         If set to true, it returns all of haystack
2633
   *                         from the beginning to the last occurrence of needle.
2634
   *                         If set to false, it returns all of haystack
2635
   *                         from the last occurrence of needle to the end,
2636 11
   *                         </p>
2637
   * @param string $encoding [optional] <p>
2638 11
   *                         Character encoding name to use.
2639 11
   *                         If it is omitted, internal character encoding is used.
2640
   *                         </p>
2641 11
   *
2642 11
   * @return string the portion of haystack.
2643 11
   * or false if needle is not found.
2644 11
   */
2645 11
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2646 11
  {
2647 11
    self::checkForSupport();
2648 11
2649 11
    return mb_strrchr($haystack, $needle, $part, $encoding);
2650 11
  }
2651 11
2652
  /**
2653
   * Finds the last occurrence of a character in a string within another, case insensitive.
2654
   *
2655 11
   * @link http://php.net/manual/en/function.mb-strrichr.php
2656
   *
2657
   * @param string $haystack <p>
2658
   *                         The string from which to get the last occurrence
2659
   *                         of needle
2660
   *                         </p>
2661
   * @param string $needle   <p>
2662
   *                         The string to find in haystack
2663
   *                         </p>
2664
   * @param bool   $part     [optional] <p>
2665 2
   *                         Determines which portion of haystack
2666
   *                         this function returns.
2667 2
   *                         If set to true, it returns all of haystack
2668
   *                         from the beginning to the last occurrence of needle.
2669
   *                         If set to false, it returns all of haystack
2670
   *                         from the last occurrence of needle to the end,
2671
   *                         </p>
2672
   * @param string $encoding [optional] <p>
2673
   *                         Character encoding name to use.
2674
   *                         If it is omitted, internal character encoding is used.
2675
   *                         </p>
2676
   *
2677 2
   * @return string the portion of haystack.
2678
   * or false if needle is not found.
2679 2
   */
2680
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2681 1
  {
2682
    self::checkForSupport();
2683 1
2684 1
    return mb_strrichr($haystack, $needle, $part, $encoding);
2685
  }
2686 1
2687 2
  /**
2688 2
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2689
   *
2690
   * @param mixed $var
2691
   * @param int   $filter
2692
   * @param mixed $option
2693
   *
2694
   * @return mixed
2695
   */
2696 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2697
  {
2698
    if (3 > func_num_args()) {
2699
      $var = filter_var($var, $filter);
2700
    } else {
2701 6
      $var = filter_var($var, $filter, $option);
2702
    }
2703 6
2704 6
    return self::filter($var);
2705
  }
2706 6
2707
  /**
2708 6
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2709 5
   *
2710
   * @param mixed  $var
2711
   * @param int    $normalization_form
2712
   * @param string $leading_combining
2713 6
   *
2714
   * @return mixed
2715 6
   */
2716
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2717 6
  {
2718 1
    switch (gettype($var)) {
2719 1 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2720 1
        foreach ($var as $k => $v) {
2721
          /** @noinspection AlterInForeachInspection */
2722 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2723
        }
2724
        break;
2725 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2726
        foreach ($var as $k => $v) {
2727
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2728
        }
2729
        break;
2730
      case 'string':
2731 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2732
          // Workaround https://bugs.php.net/65732
2733
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2734
        }
2735 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2736
          if (Normalizer::isNormalized($var, $normalization_form)) {
2737
            $n = '-';
2738
          } else {
2739
            $n = Normalizer::normalize($var, $normalization_form);
2740
2741
            if (isset($n[0])) {
2742
              $var = $n;
2743
            } else {
2744
              $var = self::encode('UTF-8', $var);
2745
            }
2746
2747
          }
2748
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2749
            // Prevent leading combining chars
2750
            // for NFC-safe concatenations.
2751
            $var = $leading_combining . $var;
2752
          }
2753 6
        }
2754
        break;
2755 6
    }
2756
2757 6
    return $var;
2758 6
  }
2759
2760
  /**
2761 5
   * Encode to UTF8 or LATIN1.
2762 5
   *
2763
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2764 5
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2765 1
   *
2766 1
   * @param string $encodingLabel ISO-8859-1 || UTF-8
2767 1
   * @param string $str
2768
   *
2769 5
   * @return false|string Will return false on error.
2770
   */
2771
  public static function encode($encodingLabel, $str)
2772
  {
2773
    $encodingLabel = self::normalizeEncoding($encodingLabel);
2774
2775
    if ($encodingLabel === 'UTF-8') {
2776
      return self::to_utf8($str);
2777
    }
2778
2779
    if ($encodingLabel === 'ISO-8859-1') {
2780
      return self::to_latin1($str);
2781
    }
2782
2783
    return false;
2784
  }
2785
2786
  /**
2787
   * Normalize the encoding-name input.
2788
   *
2789
   * @param string $encodingLabel e.g.: ISO, UTF8, ISO88591, WIN1252, etc.
2790
   *
2791
   * @return string
2792
   */
2793
  protected static function normalizeEncoding($encodingLabel)
2794
  {
2795
    $encoding = strtoupper($encodingLabel);
2796
2797
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2798
2799
    $equivalences = array(
2800
        'ISO88591'    => 'ISO-8859-1',
2801
        'ISO8859'     => 'ISO-8859-1',
2802
        'ISO'         => 'ISO-8859-1',
2803
        'LATIN1'      => 'ISO-8859-1',
2804
        'LATIN'       => 'ISO-8859-1',
2805
        'UTF8'        => 'UTF-8',
2806
        'UTF'         => 'UTF-8',
2807
        'WIN1252'     => 'ISO-8859-1',
2808
        'WINDOWS1252' => 'ISO-8859-1',
2809 1
    );
2810
2811 1
    if (empty($equivalences[$encoding])) {
2812
      return 'UTF-8';
2813 1
    }
2814 1
2815 1
    return $equivalences[$encoding];
2816
  }
2817
2818
  /**
2819 1
   * alias for "UTF8::to_win1252()"
2820
   *
2821
   * @param string|array $str
2822
   *
2823
   * @return string|array
2824
   */
2825
  public static function to_latin1($str)
2826
  {
2827
    return self::to_win1252($str);
2828
  }
2829
2830
  /**
2831
   * Convert a string into win1252.
2832
   *
2833
   * @param  string|array $str
2834
   *
2835
   * @return string|array
2836
   */
2837
  protected static function to_win1252($str)
2838
  {
2839
    if (is_array($str)) {
2840
2841
      foreach ($str as $k => $v) {
2842
        /** @noinspection AlterInForeachInspection */
2843
        $str[$k] = self::to_win1252($v);
2844
      }
2845
2846
      return $str;
2847
    } elseif (is_string($str)) {
2848
      return self::utf8_decode($str);
2849
    } else {
2850
      return $str;
2851
    }
2852
  }
2853
2854
  /**
2855
   * Decodes an UTF-8 string to ISO-8859-1.
2856
   *
2857
   * @param string $str
2858 2
   *
2859
   * @return string
2860 2
   */
2861
  public static function utf8_decode($str)
2862 2
  {
2863 2
    static $utf8ToWin1252Keys = null;
2864 2
    static $utf8ToWin1252Values = null;
2865
2866
    $str = (string)$str;
2867
2868 2
    if (!isset($str[0])) {
2869
      return '';
2870
    }
2871
2872
    // init
2873
    self::checkForSupport();
2874
2875
    $str = self::to_utf8($str);
2876
2877
    if ($utf8ToWin1252Keys === null) {
2878
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
2879
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
2880
    }
2881
2882
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
2883
  }
2884
2885
  /**
2886
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2887
   *
2888
   * @param int    $type
2889
   * @param string $var
2890
   * @param int    $filter
2891
   * @param mixed  $option
2892
   *
2893
   * @return mixed
2894
   */
2895 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2896
  {
2897
    if (4 > func_num_args()) {
2898
      $var = filter_input($type, $var, $filter);
2899
    } else {
2900
      $var = filter_input($type, $var, $filter, $option);
2901
    }
2902
2903
    return self::filter($var);
2904
  }
2905
2906
  /**
2907
   * Encodes an ISO-8859-1 string to UTF-8.
2908
   *
2909
   * @param string $str
2910
   *
2911
   * @return string
2912
   */
2913
  public static function utf8_encode($str)
2914
  {
2915
    $str = utf8_encode($str);
2916
2917
    if (false === strpos($str, "\xC2")) {
2918
      return $str;
2919
    } else {
2920
2921
      static $cp1252ToUtf8Keys = null;
2922
      static $cp1252ToUtf8Values = null;
2923
2924
      if ($cp1252ToUtf8Keys === null) {
2925
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
2926
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
2927
      }
2928
2929
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
2930
    }
2931
  }
2932
2933
  /**
2934
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2935
   * Returns the JSON representation of a value
2936
   *
2937
   * @link http://php.net/manual/en/function.json-encode.php
2938
   *
2939
   * @param mixed $value   <p>
2940
   *                       The <i>value</i> being encoded. Can be any type except
2941
   *                       a resource.
2942
   *                       </p>
2943
   *                       <p>
2944 8
   *                       All string data must be UTF-8 encoded.
2945
   *                       </p>
2946 8
   *                       <p>PHP implements a superset of
2947 5
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2948 5
   *                       only supports these values when they are nested inside an array or an object.
2949 8
   *                       </p>
2950
   * @param int   $options [optional] <p>
2951
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2952
   *                       <b>JSON_HEX_TAG</b>,
2953
   *                       <b>JSON_HEX_AMP</b>,
2954
   *                       <b>JSON_HEX_APOS</b>,
2955
   *                       <b>JSON_NUMERIC_CHECK</b>,
2956
   *                       <b>JSON_PRETTY_PRINT</b>,
2957
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2958
   *                       <b>JSON_FORCE_OBJECT</b>,
2959
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2960 1
   *                       constants is described on
2961
   *                       the JSON constants page.
2962 1
   *                       </p>
2963 1
   * @param int   $depth   [optional] <p>
2964 1
   *                       Set the maximum depth. Must be greater than zero.
2965
   *                       </p>
2966 1
   *
2967
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2968
   */
2969
  public static function json_encode($value, $options = 0, $depth = 512)
2970
  {
2971
    $value = self::filter($value);
2972
2973
    if (Bootup::is_php('5.5')) {
2974
      $json = json_encode($value, $options, $depth);
2975
    } else {
2976
      $json = json_encode($value, $options);
2977
    }
2978
2979
    return $json;
2980
  }
2981
2982 2
  /**
2983
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2984 2
   * Decodes a JSON string
2985 2
   *
2986
   * @link http://php.net/manual/en/function.json-decode.php
2987
   *
2988 2
   * @param string $json    <p>
2989
   *                        The <i>json</i> string being decoded.
2990
   *                        </p>
2991
   *                        <p>
2992
   *                        This function only works with UTF-8 encoded strings.
2993
   *                        </p>
2994
   *                        <p>PHP implements a superset of
2995
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2996
   *                        only supports these values when they are nested inside an array or an object.
2997
   *                        </p>
2998 2
   * @param bool   $assoc   [optional] <p>
2999
   *                        When <b>TRUE</b>, returned objects will be converted into
3000 2
   *                        associative arrays.
3001 1
   *                        </p>
3002
   * @param int    $depth   [optional] <p>
3003
   *                        User specified recursion depth.
3004 2
   *                        </p>
3005
   * @param int    $options [optional] <p>
3006
   *                        Bitmask of JSON decode options. Currently only
3007
   *                        <b>JSON_BIGINT_AS_STRING</b>
3008
   *                        is supported (default is to cast large integers as floats)
3009
   *                        </p>
3010
   *
3011
   * @return mixed the value encoded in <i>json</i> in appropriate
3012
   * PHP type. Values true, false and
3013
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3014
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3015 15
   * <i>json</i> cannot be decoded or if the encoded
3016
   * data is deeper than the recursion limit.
3017 15
   */
3018 2
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3019
  {
3020
    $json = self::filter($json);
3021 14
3022 14
    if (Bootup::is_php('5.4') === true) {
3023
      $json = json_decode($json, $assoc, $depth, $options);
3024 14
    } else {
3025 2
      $json = json_decode($json, $assoc, $depth);
3026
    }
3027
3028 13
    return $json;
3029 7
  }
3030
3031
  /**
3032 12
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
3033 8
   *
3034
   * @param array $data
3035
   * @param mixed $definition
3036 10
   * @param bool  $add_empty
3037
   *
3038
   * @return mixed
3039
   */
3040 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3041
  {
3042
    if (2 > func_num_args()) {
3043
      $a = filter_var_array($data);
3044
    } else {
3045
      $a = filter_var_array($data, $definition, $add_empty);
3046
    }
3047
3048 1
    return self::filter($a);
3049
  }
3050 1
3051 1
  /**
3052
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
3053 1
   *
3054 1
   * @param int   $type
3055 1
   * @param mixed $definition
3056 1
   * @param bool  $add_empty
3057 1
   *
3058 1
   * @return mixed
3059
   */
3060 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3061
  {
3062
    if (2 > func_num_args()) {
3063
      $a = filter_input_array($type);
3064
    } else {
3065
      $a = filter_input_array($type, $definition, $add_empty);
3066
    }
3067
3068 1
    return self::filter($a);
3069
  }
3070 1
3071
  /**
3072
   * Search a string for any of a set of characters.
3073
   *
3074
   * @param string $s
3075
   * @param string $charList
3076
   *
3077
   * @return string|false
3078
   */
3079
  public static function strpbrk($s, $charList)
3080
  {
3081
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
3082 2
      return substr($s, strpos($s, $m[0]));
3083
    } else {
3084 2
      return false;
3085
    }
3086
  }
3087
3088
  /**
3089
   * Case-insensitive string comparison of the first n characters.
3090
   *
3091
   * @param string $str1
3092 2
   * @param string $str2
3093
   * @param int    $len
3094 2
   *
3095
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
3096
   */
3097
  public static function strncasecmp($str1, $str2, $len)
3098
  {
3099
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
3100
  }
3101
3102
  /**
3103
   * Comparison of the first n characters.
3104
   *
3105
   * @param string $str1
3106
   * @param string $str2
3107
   * @param int    $len
3108
   *
3109
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
3110
   *              <strong>> 0</strong> if str1 is greater than str2<br />
3111
   *              <strong>0</strong> if they are equal
3112
   */
3113
  public static function strncmp($str1, $str2, $len)
3114
  {
3115
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
3116 1
  }
3117
3118 1
  /**
3119
   * String comparison.
3120
   *
3121
   * @param string $str1
3122
   * @param string $str2
3123
   *
3124
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
3125
   *              <strong>> 0</strong> if str1 is greater than str2<br />
3126
   *              <strong>0</strong> if they are equal.
3127
   */
3128
  public static function strcmp($str1, $str2)
3129
  {
3130
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
3131
        Normalizer::normalize($str1, Normalizer::NFD),
3132
        Normalizer::normalize($str2, Normalizer::NFD)
3133
    );
3134
  }
3135
3136
  /**
3137
   * Calculates and returns the maximum number of bytes taken by any
3138
   * UTF-8 encoded character in the given string.
3139
   *
3140
   * @param    string $str The original Unicode string.
3141
   *
3142
   * @return   int An array of byte lengths of each character.
3143
   */
3144
  public static function max_chr_width($str)
3145
  {
3146
    $bytes = self::chr_size_list($str);
3147
    if (count($bytes) > 0) {
3148
      return (int)max($bytes);
3149
    } else {
3150
      return 0;
3151
    }
3152
  }
3153
3154
  /**
3155
   * Generates an array of byte length of each character of a Unicode string.
3156
   *
3157
   * 1 byte => U+0000  - U+007F
3158
   * 2 byte => U+0080  - U+07FF
3159
   * 3 byte => U+0800  - U+FFFF
3160
   * 4 byte => U+10000 - U+10FFFF
3161
   *
3162
   * @param    string $str The original Unicode string.
3163
   *
3164
   * @return   array An array of byte lengths of each character.
3165 4
   */
3166
  public static function chr_size_list($str)
3167 4
  {
3168
    if (!$str) {
3169
      return array();
3170
    }
3171 4
3172 4
    return array_map('strlen', self::split($str));
3173 4
  }
3174
3175 4
  /**
3176 4
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3177 4
   *
3178 4
   * @param    string $chr The Unicode character to be encoded as numbered entity.
3179
   *
3180 4
   * @return   string The HTML numbered entity.
3181
   */
3182
  public static function single_chr_html_encode($chr)
3183
  {
3184 4
    if (!$chr) {
3185
      return '';
3186 4
    }
3187
3188
    return '&#' . self::ord($chr) . ';';
3189
  }
3190
3191 4
  /**
3192 4
   * Calculates Unicode code point of the given UTF-8 encoded character.
3193
   *
3194 4
   * @param    string $s The character of which to calculate code point.
3195 4
   *
3196 4
   * @return   int Unicode code point of the given character,<br />
3197 4
   *           0 on invalid UTF-8 byte sequence.
3198 4
   */
3199
  public static function ord($s)
3200 4
  {
3201 4
    if (!$s) {
3202 4
      return 0;
3203 4
    }
3204
3205 4
    $s = unpack('C*', substr($s, 0, 4));
3206 3
    $a = $s ? $s[1] : 0;
3207 3
3208 3
    if (0xF0 <= $a && isset($s[4])) {
3209 3
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3210
    }
3211 3
3212
    if (0xE0 <= $a && isset($s[3])) {
3213
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3214
    }
3215 3
3216 3
    if (0xC0 <= $a && isset($s[2])) {
3217
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3218 4
    }
3219
3220
    return $a;
3221
  }
3222
3223
  /**
3224
   * Converts a UTF-8 string to a series of HTML numbered entities.
3225
   *
3226
   * e.g.: &#123;&#39;&#1740;
3227
   *
3228
   * @param  string $str The Unicode string to be encoded as numbered entities.
3229
   *
3230
   * @return string HTML numbered entities.
3231
   */
3232
  public static function html_encode($str)
3233
  {
3234
    return implode(
3235
        array_map(
3236
            array(
3237
                '\\voku\\helper\\UTF8',
3238
                'single_chr_html_encode',
3239
            ),
3240
            self::split($str)
3241
        )
3242
    );
3243 11
  }
3244
3245 11
  /**
3246 11
   * Checks if a file starts with BOM character.
3247
   *
3248 11
   * @param    string $file_path Path to a valid file.
3249 2
   *
3250
   * @return   bool True if the file has BOM at the start, False otherwise.
3251
   */
3252
  public static function file_has_bom($file_path)
3253 10
  {
3254 10
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
3255
  }
3256
3257
  /**
3258 10
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3259
   *
3260
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3261
   *
3262 10
   * @param    string $utf8_chr The input string.
3263
   *
3264
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3265
   */
3266 1
  public static function is_bom($utf8_chr)
3267 1
  {
3268 1
    return ($utf8_chr === self::bom());
3269
  }
3270 10
3271
  /**
3272
   * Returns the Byte Order Mark Character.
3273 10
   *
3274 1
   * @return   string Byte Order Mark
3275 1
   */
3276
  public static function bom()
3277 10
  {
3278
    return "\xEF\xBB\xBF";
3279
  }
3280
3281
  /**
3282
   * alias for "UTF8::is_bom"
3283
   *
3284
   * @param string $utf8_chr
3285
   *
3286
   * @return boolean
3287
   */
3288
  public static function isBom($utf8_chr)
3289
  {
3290
    return self::is_bom($utf8_chr);
3291
  }
3292
3293
  /**
3294
   * Checks if string starts with "UTF-8 BOM" character.
3295
   *
3296
   * @param    string $str The input string.
3297
   *
3298
   * @return   bool True if the string has BOM at the start, False otherwise.
3299
   */
3300
  public static function string_has_bom($str)
3301
  {
3302
    return self::is_bom(substr($str, 0, 3));
3303
  }
3304
3305
  /**
3306 8
   * Prepends BOM character to the string and returns the whole string.
3307
   *
3308 8
   * INFO: If BOM already existed there, the Input string is returned.
3309
   *
3310 8
   * @param    string $str The input string
3311
   *
3312
   * @return   string The output string that contains BOM
3313
   */
3314
  public static function add_bom_to_string($str)
3315
  {
3316
    if (!self::is_bom(substr($str, 0, 3))) {
3317 8
      $str = self::bom() . $str;
3318
    }
3319
3320
    return $str;
3321
  }
3322
3323
  /**
3324
   * Shuffles all the characters in the string.
3325
   *
3326
   * @param    string $str The input string
3327
   *
3328
   * @return   string The shuffled string.
3329
   */
3330
  public static function str_shuffle($str)
3331
  {
3332
    $array = self::split($str);
3333
3334
    shuffle($array);
3335
3336
    return implode('', $array);
3337
  }
3338
3339
  /**
3340
   * Wraps a string to a given number of characters.
3341
   *
3342
   * @param string $str
3343
   * @param int    $width
3344
   * @param string $break
3345 4
   * @param bool   $cut
3346
   *
3347 4
   * @return false|string Returns the given string wrapped at the specified length.
3348
   */
3349
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
3350
  {
3351
    if (false === wordwrap('-', $width, $break, $cut)) {
3352
      return false;
3353
    }
3354
3355
    if (is_string($break)) {
3356
      $break = (string)$break;
3357 1
    }
3358
3359 1
    $w = '';
3360
    $str = explode($break, $str);
3361
    $iLen = count($str);
3362
    $chars = array();
3363 1
3364
    if (1 === $iLen && '' === $str[0]) {
3365
      return '';
3366
    }
3367
3368
    /** @noinspection ForeachInvariantsInspection */
3369
    for ($i = 0; $i < $iLen; ++$i) {
3370
3371
      if ($i) {
3372
        $chars[] = $break;
3373
        $w .= '#';
3374
      }
3375
3376
      $c = $str[$i];
3377 3
      unset($str[$i]);
3378
3379 3
      foreach (self::split($c) as $c) {
3380 3
        $chars[] = $c;
3381 3
        $w .= ' ' === $c ? ' ' : '?';
3382
      }
3383 3
    }
3384
3385 3
    $str = '';
3386 3
    $j = 0;
3387 3
    $b = $i = -1;
3388
    $w = wordwrap($w, $width, '#', $cut);
3389 3
3390
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
3391 3
      for (++$i; $i < $b; ++$i) {
3392
        $str .= $chars[$j];
3393
        unset($chars[$j++]);
3394
      }
3395
3396
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
3397
        unset($chars[$j++]);
3398
      }
3399
3400
      $str .= $break;
3401 3
    }
3402
3403
    return $str . implode('', $chars);
3404
  }
3405
3406
  /**
3407
   * Find position of first occurrence of string in a string.
3408
   *
3409
   * @link http://php.net/manual/en/function.mb-strpos.php
3410
   *
3411 1
   * @param string  $haystack     <p>
3412
   *                              The string being checked.
3413 1
   *                              </p>
3414
   * @param string  $needle       <p>
3415
   *                              The position counted from the beginning of haystack.
3416
   *                              </p>
3417 1
   * @param int     $offset       [optional] <p>
3418
   *                              The search offset. If it is not specified, 0 is used.
3419
   *                              </p>
3420
   * @param string  $encoding
3421
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
3422
   *
3423
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
3424
   *             If needle is not found it returns false.
3425
   */
3426
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
3427
  {
3428
    $haystack = (string)$haystack;
3429
    $needle = (string)$needle;
3430
3431
    if (!isset($haystack[0]) || !isset($needle[0])) {
3432
      return false;
3433
    }
3434
3435
    // init
3436
    self::checkForSupport();
3437
    $offset = (int)$offset;
3438
3439
    // iconv and mbstring do not support integer $needle
3440
3441
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
3442
      $needle = self::chr($needle);
3443
    }
3444
3445
    if ($cleanUtf8 === true) {
3446
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
3447
      // iconv_strpos is not tolerant to invalid characters
3448
3449
      $needle = self::clean((string)$needle);
3450
      $haystack = self::clean($haystack);
3451
    }
3452
3453
    if (self::$support['mbstring'] === true) {
3454
3455
      // INFO: this is only a fallback for old versions
3456
      if ($encoding === true || $encoding === false) {
3457
        $encoding = 'UTF-8';
3458
      }
3459
3460
      return mb_strpos($haystack, $needle, $offset, $encoding);
3461 1
    }
3462
3463 1
    if (self::$support['iconv'] === true) {
3464
      return grapheme_strpos($haystack, $needle, $offset);
3465 1
    }
3466
3467
    if ($offset > 0) {
3468
      $haystack = self::substr($haystack, $offset);
3469
    }
3470 1
3471 1 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3472
      $left = substr($haystack, 0, $pos);
3473 1
3474 1
      // negative offset not supported in PHP strpos(), ignoring
3475 1
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
3476
    }
3477 1
3478
    return false;
3479
  }
3480
3481
  /**
3482
   * Generates a UTF-8 encoded character from the given code point.
3483
   *
3484
   * @param    int $code_point The code point for which to generate a character.
3485
   *
3486
   * @return   string Multi-Byte character, returns empty string on failure to encode.
3487
   */
3488
  public static function chr($code_point)
3489 1
  {
3490
    self::checkForSupport();
3491 1
3492 1
    if (($i = (int)$code_point) !== $code_point) {
3493 1
      // $code_point is a string, lets extract int code point from it
3494 1
      if (!($i = (int)self::hex_to_int($code_point))) {
3495
        return '';
3496 1
      }
3497
    }
3498
3499
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
3500 1
  }
3501
3502
  /**
3503
   * Converts hexadecimal U+xxxx code point representation to Integer.
3504
   *
3505
   * INFO: opposite to UTF8::int_to_hex( )
3506
   *
3507
   * @param    string $str The hexadecimal code point representation.
3508
   *
3509 1
   * @return   int The code point, or 0 on failure.
3510
   */
3511
  public static function hex_to_int($str)
3512 1
  {
3513
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
3514
      return intval($match[1], 16);
3515
    }
3516
3517
    return 0;
3518
  }
3519
3520
  /**
3521
   * Reverses characters order in the string.
3522
   *
3523
   * @param    string $str The input string
3524
   *
3525
   * @return   string The string with characters in the reverse sequence
3526
   */
3527
  public static function strrev($str)
3528 26
  {
3529
    return implode(array_reverse(self::split($str)));
3530 26
  }
3531
3532 26
  /**
3533 5
   * Returns the UTF-8 character with the maximum code point in the given data.
3534
   *
3535
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3536
   *
3537 22
   * @return   string The character with the highest code point than others.
3538 6
   */
3539 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3540
  {
3541 16
    if (is_array($arg)) {
3542
      $arg = implode($arg);
3543
    }
3544
3545
    return self::chr(max(self::codepoints($arg)));
3546
  }
3547
3548
  /**
3549
   * Accepts a string and returns an array of Unicode code points.
3550
   *
3551
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
3552
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
3553
   *                          default, code points will be returned as integers.
3554 22
   *
3555
   * @return   array The array of code points
3556 22
   */
3557
  public static function codepoints($arg, $u_style = false)
3558 22
  {
3559 5
    if (is_string($arg)) {
3560
      $arg = self::split($arg);
3561
    }
3562 18
3563
    $arg = array_map(
3564 18
        array(
3565
            '\\voku\\helper\\UTF8',
3566
            'ord',
3567
        ),
3568
        $arg
3569
    );
3570
3571
    if ($u_style) {
3572
      $arg = array_map(
3573
          array(
3574
              '\\voku\\helper\\UTF8',
3575
              'int_to_hex',
3576
          ),
3577 24
          $arg
3578
      );
3579 24
    }
3580
3581 24
    return $arg;
3582 2
  }
3583
3584
  /**
3585 23
   * Returns the UTF-8 character with the minimum code point in the given data.
3586
   *
3587 23
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3588
   *
3589
   * @return   string The character with the lowest code point than others.
3590
   */
3591 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3592
  {
3593
    if (is_array($arg)) {
3594
      $arg = implode($arg);
3595
    }
3596
3597
    return self::chr(min(self::codepoints($arg)));
3598
  }
3599
3600
  /**
3601
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
3602 6
   *
3603
   * @param    string $chr The input character
3604
   * @param    string $pfix
3605 6
   *
3606 1
   * @return   string The code point encoded as U+xxxx
3607
   */
3608
  public static function chr_to_hex($chr, $pfix = 'U+')
3609 1
  {
3610
    return self::int_to_hex(self::ord($chr), $pfix);
3611
  }
3612 1
3613
  /**
3614
   * Converts Integer to hexadecimal U+xxxx code point representation.
3615
   *
3616 1
   * @param    int    $int The integer to be converted to hexadecimal code point.
3617
   * @param    string $pfix
3618
   *
3619
   * @return   string The code point, or empty string on failure.
3620
   */
3621
  public static function int_to_hex($int, $pfix = 'U+')
3622 1
  {
3623
    if (ctype_digit((string)$int)) {
3624
      $hex = dechex((int)$int);
3625
3626 1
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3627 1
3628 1
      return $pfix . $hex;
3629
    }
3630
3631
    return '';
3632
  }
3633
3634
  /**
3635
   * Get a binary representation of a specific character.
3636
   *
3637
   * @param   string $str The input character.
3638 1
   *
3639
   * @return  string
3640
   */
3641
  public static function str_to_binary($str)
3642 1
  {
3643
    $str = (string)$str;
3644 6
3645 1
    if (!isset($str[0])) {
3646 1
      return '';
3647 1
    }
3648 1
3649
    // init
3650 1
    $out = null;
3651
    $max = strlen($str);
3652
3653 6
    /** @noinspection ForeachInvariantsInspection */
3654 6
    for ($i = 0; $i < $max; ++$i) {
3655
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
3656 6
    }
3657 4
3658
    return $out;
3659 4
  }
3660 4
3661
  /**
3662 6
   * Counts number of words in the UTF-8 string.
3663
   *
3664 6
   * @param string $s The input string.
3665
   * @param int    $format
3666
   * @param string $charlist
3667
   *
3668
   * @return array|float|string The number of words in the string
3669
   */
3670
  public static function str_word_count($s, $format = 0, $charlist = '')
3671
  {
3672
    $charlist = self::rxClass($charlist, '\pL');
3673
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
3674
    $charlist = array();
3675
    $len = count($s);
3676
3677
    if (1 == $format) {
3678
      for ($i = 1; $i < $len; $i += 2) {
3679
        $charlist[] = $s[$i];
3680
      }
3681
    } elseif (2 == $format) {
3682
      self::checkForSupport();
3683
3684
      $offset = self::strlen($s[0]);
3685
      for ($i = 1; $i < $len; $i += 2) {
3686
        $charlist[$offset] = $s[$i];
3687
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
3688
      }
3689
    } else {
3690
      $charlist = ($len - 1) / 2;
3691
    }
3692
3693
    return $charlist;
3694
  }
3695
3696
  /**
3697
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
3698
   *
3699
   * INFO: This is slower then "trim()"
3700
   *
3701
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
3702
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
3703
   *
3704
   * @param    string $str The string to be trimmed
3705
   * @param    string $chars  Optional characters to be stripped
3706
   *
3707
   * @return   string The trimmed string
3708
   */
3709
  public static function trim($str = '', $chars = INF)
3710
  {
3711
    $str = (string)$str;
3712
3713 1
    if (!isset($str[0])) {
3714
      return '';
3715 1
    }
3716
3717
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3718
    if ($chars === INF || !$chars) {
3719
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
3720
    }
3721
3722
    return self::rtrim(self::ltrim($str, $chars), $chars);
3723
  }
3724
3725 1
  /**
3726
   * Strip whitespace or other characters from end of a UTF-8 string.
3727 1
   *
3728
   * WARNING: This is much slower then "rtrim()" !!!!
3729
   *
3730
   * @param    string $str The string to be trimmed
3731
   * @param    string $chars  Optional characters to be stripped
3732
   *
3733
   * @return   string The string with unwanted characters stripped from the right
3734
   */
3735 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3736
  {
3737
    $str = (string)$str;
3738 1
3739
    if (!isset($str[0])) {
3740 1
      return '';
3741 1
    }
3742
3743
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3744 1
3745
    return preg_replace("/{$chars}+$/u", '', $str);
3746 1
  }
3747
3748
  /**
3749 1
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3750
   *
3751
   * WARNING: This is much slower then "ltrim()" !!!!
3752 1
   *
3753
   * @param    string $str The string to be trimmed
3754
   * @param    string $chars  Optional characters to be stripped
3755
   *
3756 1
   * @return   string The string with unwanted characters stripped from the left
3757
   */
3758 1 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3759
  {
3760
    $str = (string)$str;
3761 1
3762
    if (!isset($str[0])) {
3763
      return '';
3764 1
    }
3765
3766
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3767
3768 1
    return preg_replace("/^{$chars}+/u", '', $str);
3769
  }
3770 1
3771 1
  /**
3772 1
   * Replace text within a portion of a string.
3773 1
   *
3774 1
   * source: https://gist.github.com/stemar/8287074
3775
   *
3776
   * @param string|array $str
3777
   * @param string|array $replacement
3778
   * @param int          $start
3779
   * @param null|int     $length
3780
   *
3781
   * @return array|string
3782
   */
3783
  public static function substr_replace($str, $replacement, $start, $length = null)
3784 1
  {
3785
3786 1
    if (is_array($str)) {
3787 1
      $num = count($str);
3788
3789 1
      // $replacement
3790 1
      if (is_array($replacement)) {
3791
        $replacement = array_slice($replacement, 0, $num);
3792
      } else {
3793
        $replacement = array_pad(array($replacement), $num, $replacement);
3794 1
      }
3795
3796 1
      // $start
3797 1
      if (is_array($start)) {
3798 1
        $start = array_slice($start, 0, $num);
3799
        foreach ($start as &$valueTmp) {
3800 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
3801 1
        }
3802 1
        unset($value);
3803 1
      } else {
3804 1
        $start = array_pad(array($start), $num, $start);
3805
      }
3806 1
3807
      // $length
3808 1
      if (!isset($length)) {
3809 1
        $length = array_fill(0, $num, 0);
3810
      } elseif (is_array($length)) {
3811
        $length = array_slice($length, 0, $num);
3812
        foreach ($length as &$valueTmpV2) {
3813 1
          if (isset($valueTmpV2)) {
3814 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
3815
          } else {
3816 1
            $valueTmpV2 = 0;
3817
          }
3818 1
        }
3819 1
        unset($valueTmpV2);
3820 1
      } else {
3821
        $length = array_pad(array($length), $num, $length);
3822 1
      }
3823
3824
      // Recursive call
3825
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
3826
    } else {
3827
      if (is_array($replacement)) {
3828
        if (count($replacement) > 0) {
3829
          $replacement = $replacement[0];
3830
        } else {
3831
          $replacement = '';
3832
        }
3833
      }
3834
    }
3835 1
3836
    preg_match_all('/./us', (string)$str, $smatches);
3837 1
    preg_match_all('/./us', (string)$replacement, $rmatches);
3838
3839
    if ($length === null) {
3840
      self::checkForSupport();
3841
3842
      $length = mb_strlen($str);
3843
    }
3844
3845
    array_splice($smatches[0], $start, $length, $rmatches[0]);
3846
3847
    return join($smatches[0], null);
3848
  }
3849 1
3850
  /**
3851 1
   * alias for "UTF8::to_latin1()"
3852
   *
3853 1
   * @param $str
3854
   *
3855
   * @return string
3856
   */
3857
  public static function toLatin1($str)
3858
  {
3859
    return self::to_latin1($str);
3860
  }
3861
3862
  /**
3863
   * Count the number of sub-string occurrences.
3864 1
   *
3865
   * @param    string $haystack The string to search in.
3866
   * @param    string $needle   The string to search for.
3867
   * @param    int    $offset   The offset where to start counting.
3868 1
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
3869
   *
3870
   * @return   int number of occurrences of $needle
3871
   */
3872
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
3873
  {
3874
    $offset = (int)$offset;
3875
3876
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
3877
      $length = (int)$length;
3878
3879
      $haystack = self::substr($haystack, $offset, $length);
3880 1
    }
3881
3882 1
    if ($length === null) {
3883
      return substr_count($haystack, $needle, $offset);
3884 1
    } else {
3885 1
      return substr_count($haystack, $needle, $offset, $length);
3886 1
    }
3887
  }
3888 1
3889 1
  /**
3890 1
   * alias for "UTF8::is_ascii()"
3891 1
   *
3892
   * @param string $str
3893
   *
3894 1
   * @return boolean
3895
   */
3896
  public static function isAscii($str)
3897
  {
3898
    return self::is_ascii($str);
3899
  }
3900
3901
  /**
3902
   * Checks if a string is 7 bit ASCII.
3903
   *
3904 2
   * @param    string $str The string to check.
3905
   *
3906 2
   * @return   bool <strong>true</strong> if it is ASCII<br />
3907 2
   *                <strong>false</strong> otherwise
3908
   */
3909 2
  public static function is_ascii($str)
3910 2
  {
3911 2
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3912
  }
3913 2
3914 2
  /**
3915
   * Create an array containing a range of UTF-8 characters.
3916
   *
3917
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3918
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3919
   *
3920
   * @return   array
3921
   */
3922
  public static function range($var1, $var2)
3923
  {
3924
    if (!$var1 || !$var2) {
3925
      return array();
3926
    }
3927
3928 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3929
      $start = (int)$var1;
3930
    } elseif (ctype_xdigit($var1)) {
3931
      $start = (int)self::hex_to_int($var1);
3932
    } else {
3933
      $start = self::ord($var1);
3934
    }
3935
3936 2
    if (!$start) {
3937
      return array();
3938
    }
3939 2
3940 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3941 2
      $end = (int)$var2;
3942
    } elseif (ctype_xdigit($var2)) {
3943
      $end = (int)self::hex_to_int($var2);
3944
    } else {
3945
      $end = self::ord($var2);
3946
    }
3947
3948
    if (!$end) {
3949
      return array();
3950
    }
3951
3952
    return array_map(
3953
        array(
3954 2
            '\\voku\\helper\\UTF8',
3955
            'chr',
3956 2
        ),
3957
        range($start, $end)
3958 2
    );
3959 2
  }
3960
3961 2
  /**
3962
   * Creates a random string of UTF-8 characters.
3963
   *
3964 2
   * @param    int $len The length of string in characters.
3965 2
   *
3966 2
   * @return   string String consisting of random characters.
3967 2
   */
3968 2
  public static function hash($len = 8)
3969
  {
3970 2
    static $chars = array();
3971 2
    static $chars_len = null;
3972 2
3973 2
    if ($len <= 0) {
3974 2
      return '';
3975 2
    }
3976
3977 2
    // init
3978 2
    self::checkForSupport();
3979 2
3980 2
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
3981 2
      if (self::$support['pcre_utf8'] === true) {
3982 2
        $chars = array_map(
3983
            array(
3984 2
                '\\voku\\helper\\UTF8',
3985
                'chr',
3986
            ),
3987 2
            range(48, 79)
3988
        );
3989
3990
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
3991
3992
        $chars = array_values(array_filter($chars));
3993
      } else {
3994
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
3995
      }
3996
3997
      $chars_len = count($chars);
3998
    }
3999
4000
    $hash = '';
4001
4002
    for (; $len; --$len) {
4003
      $hash .= $chars[mt_rand() % $chars_len];
4004
    }
4005
4006
    return $hash;
4007
  }
4008 1
4009
  /**
4010 1
   * @alias of UTF8::chr_map()
4011
   *
4012 1
   * @param $callback
4013
   * @param $str
4014
   *
4015
   * @return array
4016
   */
4017
  public static function callback($callback, $str)
4018
  {
4019
    return self::chr_map($callback, $str);
4020
  }
4021
4022
  /**
4023 1
   * Applies callback to all characters of a string.
4024
   *
4025 1
   * @param    string $callback The callback function.
4026 1
   * @param    string $str      UTF-8 string to run callback on.
4027 1
   *
4028
   * @return   array The outcome of callback.
4029 1
   */
4030 1
4031 1
  public static function chr_map($callback, $str)
4032 1
  {
4033 1
    $chars = self::split($str);
4034
4035 1
    return array_map($callback, $chars);
4036
  }
4037
4038
  /**
4039
   * Returns a single UTF-8 character from string.
4040
   *
4041
   * @param    string $str A UTF-8 string.
4042
   * @param    int    $pos    The position of character to return.
4043
   *
4044
   * @return   string Single Multi-Byte character.
4045
   */
4046
  public static function access($str, $pos)
4047
  {
4048
    // Return the character at the specified position: $str[1] like functionality.
4049
4050
    return self::substr($str, $pos, 1);
4051
  }
4052
4053
  /**
4054
   * Sort all characters according to code points.
4055
   *
4056
   * @param    string $str    A UTF-8 string.
4057
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4058
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4059
   *
4060
   * @return   string String of sorted characters
4061 8
   */
4062
  public static function str_sort($str, $unique = false, $desc = false)
4063 8
  {
4064 8
    $array = self::codepoints($str);
4065
4066 8
    if ($unique) {
4067 2
      $array = array_flip(array_flip($array));
4068
    }
4069
4070
    if ($desc) {
4071 7
      arsort($array);
4072
    } else {
4073 7
      asort($array);
4074 1
    }
4075 1
4076 1
    return self::string($array);
4077
  }
4078
4079 7
  /**
4080 1
   * Makes a UTF-8 string from code points.
4081 1
   *
4082
   * @param    array $array Integer or Hexadecimal codepoints
4083 7
   *
4084
   * @return   string UTF-8 encoded string
4085
   */
4086
  public static function string($array)
4087
  {
4088
    return implode(
4089
        array_map(
4090
            array(
4091
                '\\voku\\helper\\UTF8',
4092
                'chr',
4093 1
            ),
4094
            $array
4095 1
        )
4096
    );
4097
  }
4098
4099
  /**
4100
   * Strip HTML and PHP tags from a string.
4101
   *
4102
   * @link http://php.net/manual/en/function.strip-tags.php
4103
   *
4104
   * @param string $str            <p>
4105 1
   *                               The input string.
4106 1
   *                               </p>
4107 1
   * @param string $allowable_tags [optional] <p>
4108 1
   *                               You can use the optional second parameter to specify tags which should
4109 1
   *                               not be stripped.
4110
   *                               </p>
4111 1
   *                               <p>
4112
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4113
   *                               can not be changed with allowable_tags.
4114
   *                               </p>
4115
   *
4116
   * @return string the stripped string.
4117
   */
4118
  public static function strip_tags($str, $allowable_tags = null)
4119
  {
4120
    //clean broken utf8
4121
    $str = self::clean($str);
4122
4123
    return strip_tags($str, $allowable_tags);
4124
  }
4125
4126
  /**
4127
   * Pad a UTF-8 string to given length with another string.
4128
   *
4129
   * @param    string $input      The input string
4130
   * @param    int    $pad_length The length of return string
4131
   * @param    string $pad_string String to use for padding the input string
4132
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4133 13
   *
4134
   * @return   string Returns the padded string
4135 13
   */
4136
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4137
  {
4138
    $input_length = self::strlen($input);
4139
4140
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4141
      $ps_length = self::strlen($pad_string);
4142
4143
      $diff = $pad_length - $input_length;
4144
4145
      switch ($pad_type) {
4146 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4147
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4148
          $pre = self::substr($pre, 0, $diff);
4149
          $post = '';
4150 14
          break;
4151
4152 14
        case STR_PAD_BOTH:
4153
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4154 14
          $pre = self::substr($pre, 0, (int)$diff / 2);
4155 4
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4156
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4157
          break;
4158
4159 13
        case STR_PAD_RIGHT:
4160 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4161 13
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4162 13
          $post = self::substr($post, 0, $diff);
4163
          $pre = '';
4164
      }
4165
4166
      return $pre . $input . $post;
4167
    }
4168
4169
    return $input;
4170
  }
4171
4172
  /**
4173
   * Repeat a string.
4174
   *
4175
   * @param string $input      <p>
4176
   *                           The string to be repeated.
4177
   *                           </p>
4178
   * @param int    $multiplier <p>
4179
   *                           Number of time the input string should be
4180
   *                           repeated.
4181
   *                           </p>
4182
   *                           <p>
4183
   *                           multiplier has to be greater than or equal to 0.
4184
   *                           If the multiplier is set to 0, the function
4185
   *                           will return an empty string.
4186
   *                           </p>
4187
   *
4188
   * @return string the repeated string.
4189
   */
4190
  public static function str_repeat($input, $multiplier)
4191
  {
4192
    $input = self::filter($input);
4193
4194
    return str_repeat($input, $multiplier);
4195
  }
4196
4197
  /**
4198
   * Removes duplicate occurrences of a string in another string.
4199
   *
4200
   * @param    string       $str  The base string
4201
   * @param    string|array $what String to search for in the base string
4202
   *
4203
   * @return   string The result string with removed duplicates
4204
   */
4205
  public static function remove_duplicates($str, $what = ' ')
4206
  {
4207
    if (is_string($what)) {
4208
      $what = array($what);
4209
    }
4210
4211
    if (is_array($what)) {
4212
      foreach ($what as $item) {
4213
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4214
      }
4215
    }
4216
4217
    return $str;
4218
  }
4219
4220
  /**
4221
   * Finds position of first occurrence of a string within another, case insensitive.
4222
   *
4223
   * @link http://php.net/manual/en/function.mb-stripos.php
4224
   *
4225
   * @param string  $haystack  <p>
4226
   *                           The string from which to get the position of the first occurrence
4227
   *                           of needle
4228
   *                           </p>
4229
   * @param string  $needle    <p>
4230
   *                           The string to find in haystack
4231
   *                           </p>
4232
   * @param int     $offset    [optional] <p>
4233
   *                           The position in haystack
4234
   *                           to start searching
4235
   *                           </p>
4236
   * @param string  $encoding
4237
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4238
   *
4239
   * @return int Return the numeric position of the first occurrence of
4240
   * needle in the haystack
4241
   * string, or false if needle is not found.
4242
   */
4243
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4244
  {
4245
    $haystack = (string)$haystack;
4246
    $needle = (string)$needle;
4247
4248
    if (!isset($haystack[0]) || !isset($needle[0])) {
4249
      return false;
4250
    }
4251
4252
    // init
4253
    self::checkForSupport();
4254
4255
    if ($cleanUtf8 === true) {
4256
      $haystack = self::clean($haystack);
4257
      $needle = self::clean($needle);
4258
    }
4259
4260
    // INFO: this is only a fallback for old versions
4261
    if ($encoding === true || $encoding === false) {
4262
      $encoding = 'UTF-8';
4263
    }
4264
4265
    return mb_stripos($haystack, $needle, $offset, $encoding);
4266
  }
4267
4268
  /**
4269
   * Fix a double (or multiple) encoded UTF8 string.
4270
   *
4271
   * @param array|string $str
4272
   *
4273
   * @return string
4274
   */
4275
  public static function fix_utf8($str)
4276
  {
4277
    if (is_array($str)) {
4278
4279
      foreach ($str as $k => $v) {
4280
        /** @noinspection AlterInForeachInspection */
4281
        $str[$k] = self::fix_utf8($v);
4282
      }
4283
4284
      return $str;
4285
    }
4286
4287
    $last = '';
4288
    while ($last <> $str) {
4289
      $last = $str;
4290
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 4290 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
4291
    }
4292
4293
    return $str;
4294
  }
4295
4296
  /**
4297
   * alias for "UTF8::ucfirst"
4298
   *
4299
   * @param $str
4300
   *
4301
   * @return string
4302
   */
4303
  public static function ucword($str)
4304
  {
4305
    return self::ucfirst($str);
4306
  }
4307
4308
  /**
4309
   * Makes string's first char uppercase.
4310
   *
4311
   * @param    string $str The input string
4312
   *
4313
   * @return   string The resulting string
4314
   */
4315
  public static function ucfirst($str)
4316
  {
4317
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
4318
  }
4319
4320
  /**
4321
   * Make a string uppercase.
4322
   *
4323
   * @link http://php.net/manual/en/function.mb-strtoupper.php
4324
   *
4325
   * @param string $str <p>
4326
   *                    The string being uppercased.
4327
   *                    </p>
4328
   * @param string $encoding
4329
   *
4330
   * @return string str with all alphabetic characters converted to uppercase.
4331
   */
4332
  public static function strtoupper($str, $encoding = 'UTF-8')
4333
  {
4334
    $str = (string)$str;
4335
4336
    if (!isset($str[0])) {
4337
      return '';
4338
    }
4339
4340
    // init
4341
    self::checkForSupport();
4342
4343
    if (self::$support['mbstring'] === true) {
4344
      return mb_strtoupper($str, $encoding);
4345
    } else {
4346
4347
      // fallback
4348
4349
      static $caseTableKeys = null;
4350
      static $caseTableValues = null;
4351
4352
      if ($caseTableKeys === null) {
4353
        $caseTable = self::case_table();
4354
        $caseTableKeys = array_keys($caseTable);
4355
        $caseTableValues = array_values($caseTable);
4356
      }
4357
4358
      $str = self::clean($str);
4359
4360
      return str_replace($caseTableKeys, $caseTableValues, $str);
4361
    }
4362
  }
4363
4364
  /**
4365
   * Returns an array of all lower and upper case UTF-8 encoded characters.
4366
   *
4367
   * @return   string An array with lower case chars as keys and upper chars as values.
4368
   */
4369
  protected static function case_table()
4370
  {
4371
    static $case = array(
4372
4373
      // lower => upper
4374
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
4375
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
4376
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
4377
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
4378
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
4379
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
4380
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
4381
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
4382
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
4383
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
4384
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
4385
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
4386
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
4387
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
4388
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
4389
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
4390
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
4391
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
4392
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
4393
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
4394
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
4395
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
4396
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
4397
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
4398
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
4399
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
4400
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
4401
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
4402
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
4403
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
4404
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
4405
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
4406
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
4407
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
4408
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
4409
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
4410
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
4411
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
4412
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
4413
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
4414
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
4415
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
4416
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
4417
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
4418
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
4419
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
4420
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
4421
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
4422
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
4423
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
4424
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
4425
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
4426
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
4427
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
4428
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
4429
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
4430
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
4431
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
4432
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
4433
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
4434
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
4435
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
4436
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
4437
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
4438
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
4439
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
4440
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
4441
      "\xea\x9e\x87"     => "\xea\x9e\x86",
4442
      "\xea\x9e\x85"     => "\xea\x9e\x84",
4443
      "\xea\x9e\x83"     => "\xea\x9e\x82",
4444
      "\xea\x9e\x81"     => "\xea\x9e\x80",
4445
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
4446
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
4447
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
4448
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
4449
      "\xea\x9d\xad"     => "\xea\x9d\xac",
4450
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
4451
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
4452
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
4453
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
4454
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
4455
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
4456
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
4457
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
4458
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
4459
      "\xea\x9d\x99"     => "\xea\x9d\x98",
4460
      "\xea\x9d\x97"     => "\xea\x9d\x96",
4461
      "\xea\x9d\x95"     => "\xea\x9d\x94",
4462
      "\xea\x9d\x93"     => "\xea\x9d\x92",
4463
      "\xea\x9d\x91"     => "\xea\x9d\x90",
4464
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
4465
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
4466
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
4467
      "\xea\x9d\x89"     => "\xea\x9d\x88",
4468
      "\xea\x9d\x87"     => "\xea\x9d\x86",
4469
      "\xea\x9d\x85"     => "\xea\x9d\x84",
4470
      "\xea\x9d\x83"     => "\xea\x9d\x82",
4471
      "\xea\x9d\x81"     => "\xea\x9d\x80",
4472
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
4473
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
4474
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
4475
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
4476
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
4477
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
4478
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
4479
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
4480
      "\xea\x9c\xad"     => "\xea\x9c\xac",
4481
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
4482
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
4483
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
4484
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
4485
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
4486
      "\xea\x9a\x97"     => "\xea\x9a\x96",
4487
      "\xea\x9a\x95"     => "\xea\x9a\x94",
4488
      "\xea\x9a\x93"     => "\xea\x9a\x92",
4489
      "\xea\x9a\x91"     => "\xea\x9a\x90",
4490
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
4491
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
4492
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
4493
      "\xea\x9a\x89"     => "\xea\x9a\x88",
4494
      "\xea\x9a\x87"     => "\xea\x9a\x86",
4495
      "\xea\x9a\x85"     => "\xea\x9a\x84",
4496
      "\xea\x9a\x83"     => "\xea\x9a\x82",
4497
      "\xea\x9a\x81"     => "\xea\x9a\x80",
4498
      "\xea\x99\xad"     => "\xea\x99\xac",
4499
      "\xea\x99\xab"     => "\xea\x99\xaa",
4500
      "\xea\x99\xa9"     => "\xea\x99\xa8",
4501
      "\xea\x99\xa7"     => "\xea\x99\xa6",
4502
      "\xea\x99\xa5"     => "\xea\x99\xa4",
4503
      "\xea\x99\xa3"     => "\xea\x99\xa2",
4504
      "\xea\x99\x9f"     => "\xea\x99\x9e",
4505
      "\xea\x99\x9d"     => "\xea\x99\x9c",
4506
      "\xea\x99\x9b"     => "\xea\x99\x9a",
4507
      "\xea\x99\x99"     => "\xea\x99\x98",
4508
      "\xea\x99\x97"     => "\xea\x99\x96",
4509
      "\xea\x99\x95"     => "\xea\x99\x94",
4510
      "\xea\x99\x93"     => "\xea\x99\x92",
4511
      "\xea\x99\x91"     => "\xea\x99\x90",
4512
      "\xea\x99\x8f"     => "\xea\x99\x8e",
4513
      "\xea\x99\x8d"     => "\xea\x99\x8c",
4514
      "\xea\x99\x8b"     => "\xea\x99\x8a",
4515
      "\xea\x99\x89"     => "\xea\x99\x88",
4516
      "\xea\x99\x87"     => "\xea\x99\x86",
4517
      "\xea\x99\x85"     => "\xea\x99\x84",
4518
      "\xea\x99\x83"     => "\xea\x99\x82",
4519
      "\xea\x99\x81"     => "\xea\x99\x80",
4520
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
4521
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
4522
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
4523
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
4524
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
4525
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
4526
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
4527
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
4528
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
4529
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
4530
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
4531
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
4532
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
4533
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
4534
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
4535
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
4536
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
4537
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
4538
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
4539
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
4540
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
4541
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
4542
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
4543
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
4544
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
4545
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
4546
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
4547
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
4548
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
4549
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
4550
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
4551
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
4552
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
4553
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
4554
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
4555
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
4556
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
4557
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
4558
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
4559
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
4560
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
4561
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
4562
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
4563
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
4564
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
4565
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
4566
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
4567
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
4568
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
4569
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
4570
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
4571
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
4572
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
4573
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
4574
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
4575
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
4576
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
4577
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
4578
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
4579
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
4580
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
4581
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
4582
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
4583
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
4584
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
4585
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
4586
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
4587
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
4588
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
4589
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
4590
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
4591
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
4592
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
4593
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
4594
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
4595
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
4596
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
4597
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
4598
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
4599
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
4600
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
4601
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
4602
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
4603
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
4604
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
4605
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
4606
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
4607
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
4608
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
4609
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
4610
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
4611
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
4612
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
4613
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
4614
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
4615
      "\xe2\xb1\xa6"     => "\xc8\xbe",
4616
      "\xe2\xb1\xa5"     => "\xc8\xba",
4617
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
4618
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
4619
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
4620
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
4621
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
4622
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
4623
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
4624
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
4625
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
4626
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
4627
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
4628
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
4629
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
4630
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
4631
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
4632
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
4633
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
4634
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
4635
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
4636
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
4637
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
4638
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
4639
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
4640
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
4641
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
4642
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
4643
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
4644
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
4645
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
4646
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
4647
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
4648
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
4649
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
4650
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
4651
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
4652
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
4653
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
4654
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
4655
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
4656
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
4657
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
4658
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
4659
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
4660
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
4661
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
4662
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
4663
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
4664
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
4665
      "\xe2\x86\x84"     => "\xe2\x86\x83",
4666
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
4667
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
4668
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
4669
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
4670
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
4671
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
4672
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
4673
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
4674
      "\xe1\xbe\xbe"     => "\xce\x99",
4675
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
4676
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
4677
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
4678
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
4679
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
4680
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
4681
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
4682
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
4683
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
4684
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
4685
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
4686
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
4687
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
4688
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
4689
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
4690
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
4691
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
4692
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
4693
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
4694
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
4695
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
4696
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
4697
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
4698
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
4699
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
4700
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
4701
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
4702
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
4703
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
4704
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
4705
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
4706
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
4707
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
4708
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
4709
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
4710
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
4711
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
4712
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
4713
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
4714
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
4715
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
4716
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
4717
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
4718
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
4719
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
4720
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
4721
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
4722
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
4723
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
4724
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
4725
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
4726
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
4727
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
4728
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
4729
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
4730
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
4731
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
4732
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
4733
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
4734
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
4735
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
4736
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
4737
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
4738
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
4739
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
4740
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
4741
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
4742
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
4743
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
4744
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
4745
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
4746
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
4747
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
4748
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
4749
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
4750
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
4751
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
4752
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
4753
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
4754
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
4755
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
4756
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
4757
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
4758
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
4759
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
4760
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
4761
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
4762
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
4763
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
4764
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
4765
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
4766
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
4767
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
4768
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
4769
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
4770
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
4771
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
4772
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
4773
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
4774
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
4775
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
4776
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
4777
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
4778
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
4779
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
4780
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
4781
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
4782
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
4783
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
4784
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
4785
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
4786
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
4787
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
4788
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
4789
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
4790
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
4791
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
4792
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
4793
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
4794
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
4795
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
4796
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
4797
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
4798
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
4799
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
4800
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
4801
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
4802
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
4803
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
4804
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
4805
      "\xe1\xba\xad"     => "\xe1\xba\xac",
4806
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
4807
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
4808
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
4809
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
4810
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
4811
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
4812
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
4813
      "\xe1\xba\x95"     => "\xe1\xba\x94",
4814
      "\xe1\xba\x93"     => "\xe1\xba\x92",
4815
      "\xe1\xba\x91"     => "\xe1\xba\x90",
4816
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
4817
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
4818
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
4819
      "\xe1\xba\x89"     => "\xe1\xba\x88",
4820
      "\xe1\xba\x87"     => "\xe1\xba\x86",
4821
      "\xe1\xba\x85"     => "\xe1\xba\x84",
4822
      "\xe1\xba\x83"     => "\xe1\xba\x82",
4823
      "\xe1\xba\x81"     => "\xe1\xba\x80",
4824
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
4825
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
4826
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
4827
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
4828
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
4829
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
4830
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
4831
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
4832
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
4833
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
4834
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
4835
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
4836
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
4837
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
4838
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
4839
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
4840
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
4841
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
4842
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
4843
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
4844
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
4845
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
4846
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
4847
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
4848
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
4849
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
4850
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
4851
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
4852
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
4853
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
4854
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
4855
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
4856
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
4857
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
4858
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
4859
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
4860
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
4861
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
4862
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
4863
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
4864
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
4865
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
4866
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
4867
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
4868
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
4869
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
4870
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
4871
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
4872
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
4873
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
4874
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
4875
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
4876
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
4877
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
4878
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
4879
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
4880
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
4881
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
4882
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
4883
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
4884
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
4885
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
4886
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
4887
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
4888
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
4889
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
4890
      "\xd6\x86"         => "\xd5\x96",
4891
      "\xd6\x85"         => "\xd5\x95",
4892
      "\xd6\x84"         => "\xd5\x94",
4893
      "\xd6\x83"         => "\xd5\x93",
4894
      "\xd6\x82"         => "\xd5\x92",
4895
      "\xd6\x81"         => "\xd5\x91",
4896
      "\xd6\x80"         => "\xd5\x90",
4897
      "\xd5\xbf"         => "\xd5\x8f",
4898
      "\xd5\xbe"         => "\xd5\x8e",
4899
      "\xd5\xbd"         => "\xd5\x8d",
4900
      "\xd5\xbc"         => "\xd5\x8c",
4901
      "\xd5\xbb"         => "\xd5\x8b",
4902
      "\xd5\xba"         => "\xd5\x8a",
4903
      "\xd5\xb9"         => "\xd5\x89",
4904
      "\xd5\xb8"         => "\xd5\x88",
4905
      "\xd5\xb7"         => "\xd5\x87",
4906
      "\xd5\xb6"         => "\xd5\x86",
4907
      "\xd5\xb5"         => "\xd5\x85",
4908
      "\xd5\xb4"         => "\xd5\x84",
4909
      "\xd5\xb3"         => "\xd5\x83",
4910
      "\xd5\xb2"         => "\xd5\x82",
4911
      "\xd5\xb1"         => "\xd5\x81",
4912
      "\xd5\xb0"         => "\xd5\x80",
4913
      "\xd5\xaf"         => "\xd4\xbf",
4914
      "\xd5\xae"         => "\xd4\xbe",
4915
      "\xd5\xad"         => "\xd4\xbd",
4916
      "\xd5\xac"         => "\xd4\xbc",
4917
      "\xd5\xab"         => "\xd4\xbb",
4918
      "\xd5\xaa"         => "\xd4\xba",
4919
      "\xd5\xa9"         => "\xd4\xb9",
4920
      "\xd5\xa8"         => "\xd4\xb8",
4921
      "\xd5\xa7"         => "\xd4\xb7",
4922
      "\xd5\xa6"         => "\xd4\xb6",
4923
      "\xd5\xa5"         => "\xd4\xb5",
4924
      "\xd5\xa4"         => "\xd4\xb4",
4925
      "\xd5\xa3"         => "\xd4\xb3",
4926
      "\xd5\xa2"         => "\xd4\xb2",
4927
      "\xd5\xa1"         => "\xd4\xb1",
4928
      "\xd4\xa5"         => "\xd4\xa4",
4929
      "\xd4\xa3"         => "\xd4\xa2",
4930
      "\xd4\xa1"         => "\xd4\xa0",
4931
      "\xd4\x9f"         => "\xd4\x9e",
4932
      "\xd4\x9d"         => "\xd4\x9c",
4933
      "\xd4\x9b"         => "\xd4\x9a",
4934
      "\xd4\x99"         => "\xd4\x98",
4935
      "\xd4\x97"         => "\xd4\x96",
4936
      "\xd4\x95"         => "\xd4\x94",
4937
      "\xd4\x93"         => "\xd4\x92",
4938
      "\xd4\x91"         => "\xd4\x90",
4939
      "\xd4\x8f"         => "\xd4\x8e",
4940
      "\xd4\x8d"         => "\xd4\x8c",
4941
      "\xd4\x8b"         => "\xd4\x8a",
4942
      "\xd4\x89"         => "\xd4\x88",
4943
      "\xd4\x87"         => "\xd4\x86",
4944
      "\xd4\x85"         => "\xd4\x84",
4945
      "\xd4\x83"         => "\xd4\x82",
4946
      "\xd4\x81"         => "\xd4\x80",
4947
      "\xd3\xbf"         => "\xd3\xbe",
4948
      "\xd3\xbd"         => "\xd3\xbc",
4949
      "\xd3\xbb"         => "\xd3\xba",
4950
      "\xd3\xb9"         => "\xd3\xb8",
4951
      "\xd3\xb7"         => "\xd3\xb6",
4952
      "\xd3\xb5"         => "\xd3\xb4",
4953
      "\xd3\xb3"         => "\xd3\xb2",
4954
      "\xd3\xb1"         => "\xd3\xb0",
4955
      "\xd3\xaf"         => "\xd3\xae",
4956
      "\xd3\xad"         => "\xd3\xac",
4957
      "\xd3\xab"         => "\xd3\xaa",
4958
      "\xd3\xa9"         => "\xd3\xa8",
4959
      "\xd3\xa7"         => "\xd3\xa6",
4960
      "\xd3\xa5"         => "\xd3\xa4",
4961
      "\xd3\xa3"         => "\xd3\xa2",
4962
      "\xd3\xa1"         => "\xd3\xa0",
4963
      "\xd3\x9f"         => "\xd3\x9e",
4964
      "\xd3\x9d"         => "\xd3\x9c",
4965
      "\xd3\x9b"         => "\xd3\x9a",
4966
      "\xd3\x99"         => "\xd3\x98",
4967
      "\xd3\x97"         => "\xd3\x96",
4968
      "\xd3\x95"         => "\xd3\x94",
4969
      "\xd3\x93"         => "\xd3\x92",
4970
      "\xd3\x91"         => "\xd3\x90",
4971
      "\xd3\x8f"         => "\xd3\x80",
4972
      "\xd3\x8e"         => "\xd3\x8d",
4973
      "\xd3\x8c"         => "\xd3\x8b",
4974
      "\xd3\x8a"         => "\xd3\x89",
4975
      "\xd3\x88"         => "\xd3\x87",
4976
      "\xd3\x86"         => "\xd3\x85",
4977
      "\xd3\x84"         => "\xd3\x83",
4978
      "\xd3\x82"         => "\xd3\x81",
4979
      "\xd2\xbf"         => "\xd2\xbe",
4980
      "\xd2\xbd"         => "\xd2\xbc",
4981
      "\xd2\xbb"         => "\xd2\xba",
4982
      "\xd2\xb9"         => "\xd2\xb8",
4983
      "\xd2\xb7"         => "\xd2\xb6",
4984
      "\xd2\xb5"         => "\xd2\xb4",
4985
      "\xd2\xb3"         => "\xd2\xb2",
4986
      "\xd2\xb1"         => "\xd2\xb0",
4987
      "\xd2\xaf"         => "\xd2\xae",
4988
      "\xd2\xad"         => "\xd2\xac",
4989
      "\xd2\xab"         => "\xd2\xaa",
4990
      "\xd2\xa9"         => "\xd2\xa8",
4991
      "\xd2\xa7"         => "\xd2\xa6",
4992
      "\xd2\xa5"         => "\xd2\xa4",
4993
      "\xd2\xa3"         => "\xd2\xa2",
4994
      "\xd2\xa1"         => "\xd2\xa0",
4995
      "\xd2\x9f"         => "\xd2\x9e",
4996
      "\xd2\x9d"         => "\xd2\x9c",
4997
      "\xd2\x9b"         => "\xd2\x9a",
4998
      "\xd2\x99"         => "\xd2\x98",
4999
      "\xd2\x97"         => "\xd2\x96",
5000
      "\xd2\x95"         => "\xd2\x94",
5001
      "\xd2\x93"         => "\xd2\x92",
5002
      "\xd2\x91"         => "\xd2\x90",
5003
      "\xd2\x8f"         => "\xd2\x8e",
5004
      "\xd2\x8d"         => "\xd2\x8c",
5005
      "\xd2\x8b"         => "\xd2\x8a",
5006
      "\xd2\x81"         => "\xd2\x80",
5007
      "\xd1\xbf"         => "\xd1\xbe",
5008
      "\xd1\xbd"         => "\xd1\xbc",
5009
      "\xd1\xbb"         => "\xd1\xba",
5010
      "\xd1\xb9"         => "\xd1\xb8",
5011
      "\xd1\xb7"         => "\xd1\xb6",
5012
      "\xd1\xb5"         => "\xd1\xb4",
5013
      "\xd1\xb3"         => "\xd1\xb2",
5014
      "\xd1\xb1"         => "\xd1\xb0",
5015
      "\xd1\xaf"         => "\xd1\xae",
5016
      "\xd1\xad"         => "\xd1\xac",
5017
      "\xd1\xab"         => "\xd1\xaa",
5018
      "\xd1\xa9"         => "\xd1\xa8",
5019
      "\xd1\xa7"         => "\xd1\xa6",
5020
      "\xd1\xa5"         => "\xd1\xa4",
5021
      "\xd1\xa3"         => "\xd1\xa2",
5022
      "\xd1\xa1"         => "\xd1\xa0",
5023
      "\xd1\x9f"         => "\xd0\x8f",
5024
      "\xd1\x9e"         => "\xd0\x8e",
5025
      "\xd1\x9d"         => "\xd0\x8d",
5026
      "\xd1\x9c"         => "\xd0\x8c",
5027
      "\xd1\x9b"         => "\xd0\x8b",
5028
      "\xd1\x9a"         => "\xd0\x8a",
5029
      "\xd1\x99"         => "\xd0\x89",
5030
      "\xd1\x98"         => "\xd0\x88",
5031
      "\xd1\x97"         => "\xd0\x87",
5032
      "\xd1\x96"         => "\xd0\x86",
5033
      "\xd1\x95"         => "\xd0\x85",
5034
      "\xd1\x94"         => "\xd0\x84",
5035
      "\xd1\x93"         => "\xd0\x83",
5036
      "\xd1\x92"         => "\xd0\x82",
5037
      "\xd1\x91"         => "\xd0\x81",
5038
      "\xd1\x90"         => "\xd0\x80",
5039
      "\xd1\x8f"         => "\xd0\xaf",
5040
      "\xd1\x8e"         => "\xd0\xae",
5041
      "\xd1\x8d"         => "\xd0\xad",
5042
      "\xd1\x8c"         => "\xd0\xac",
5043
      "\xd1\x8b"         => "\xd0\xab",
5044
      "\xd1\x8a"         => "\xd0\xaa",
5045
      "\xd1\x89"         => "\xd0\xa9",
5046
      "\xd1\x88"         => "\xd0\xa8",
5047
      "\xd1\x87"         => "\xd0\xa7",
5048
      "\xd1\x86"         => "\xd0\xa6",
5049
      "\xd1\x85"         => "\xd0\xa5",
5050
      "\xd1\x84"         => "\xd0\xa4",
5051
      "\xd1\x83"         => "\xd0\xa3",
5052
      "\xd1\x82"         => "\xd0\xa2",
5053
      "\xd1\x81"         => "\xd0\xa1",
5054
      "\xd1\x80"         => "\xd0\xa0",
5055
      "\xd0\xbf"         => "\xd0\x9f",
5056
      "\xd0\xbe"         => "\xd0\x9e",
5057
      "\xd0\xbd"         => "\xd0\x9d",
5058
      "\xd0\xbc"         => "\xd0\x9c",
5059
      "\xd0\xbb"         => "\xd0\x9b",
5060
      "\xd0\xba"         => "\xd0\x9a",
5061
      "\xd0\xb9"         => "\xd0\x99",
5062
      "\xd0\xb8"         => "\xd0\x98",
5063
      "\xd0\xb7"         => "\xd0\x97",
5064
      "\xd0\xb6"         => "\xd0\x96",
5065
      "\xd0\xb5"         => "\xd0\x95",
5066
      "\xd0\xb4"         => "\xd0\x94",
5067
      "\xd0\xb3"         => "\xd0\x93",
5068
      "\xd0\xb2"         => "\xd0\x92",
5069
      "\xd0\xb1"         => "\xd0\x91",
5070
      "\xd0\xb0"         => "\xd0\x90",
5071
      "\xcf\xbb"         => "\xcf\xba",
5072
      "\xcf\xb8"         => "\xcf\xb7",
5073
      "\xcf\xb5"         => "\xce\x95",
5074
      "\xcf\xb2"         => "\xcf\xb9",
5075
      "\xcf\xb1"         => "\xce\xa1",
5076
      "\xcf\xb0"         => "\xce\x9a",
5077
      "\xcf\xaf"         => "\xcf\xae",
5078
      "\xcf\xad"         => "\xcf\xac",
5079
      "\xcf\xab"         => "\xcf\xaa",
5080
      "\xcf\xa9"         => "\xcf\xa8",
5081
      "\xcf\xa7"         => "\xcf\xa6",
5082
      "\xcf\xa5"         => "\xcf\xa4",
5083
      "\xcf\xa3"         => "\xcf\xa2",
5084
      "\xcf\xa1"         => "\xcf\xa0",
5085
      "\xcf\x9f"         => "\xcf\x9e",
5086
      "\xcf\x9d"         => "\xcf\x9c",
5087
      "\xcf\x9b"         => "\xcf\x9a",
5088
      "\xcf\x99"         => "\xcf\x98",
5089
      "\xcf\x97"         => "\xcf\x8f",
5090
      "\xcf\x96"         => "\xce\xa0",
5091
      "\xcf\x95"         => "\xce\xa6",
5092
      "\xcf\x91"         => "\xce\x98",
5093
      "\xcf\x90"         => "\xce\x92",
5094
      "\xcf\x8e"         => "\xce\x8f",
5095
      "\xcf\x8d"         => "\xce\x8e",
5096
      "\xcf\x8c"         => "\xce\x8c",
5097
      "\xcf\x8b"         => "\xce\xab",
5098
      "\xcf\x8a"         => "\xce\xaa",
5099
      "\xcf\x89"         => "\xce\xa9",
5100
      "\xcf\x88"         => "\xce\xa8",
5101
      "\xcf\x87"         => "\xce\xa7",
5102
      "\xcf\x86"         => "\xce\xa6",
5103
      "\xcf\x85"         => "\xce\xa5",
5104
      "\xcf\x84"         => "\xce\xa4",
5105
      "\xcf\x83"         => "\xce\xa3",
5106
      "\xcf\x82"         => "\xce\xa3",
5107
      "\xcf\x81"         => "\xce\xa1",
5108
      "\xcf\x80"         => "\xce\xa0",
5109
      "\xce\xbf"         => "\xce\x9f",
5110
      "\xce\xbe"         => "\xce\x9e",
5111
      "\xce\xbd"         => "\xce\x9d",
5112
      "\xce\xbc"         => "\xce\x9c",
5113
      "\xce\xbb"         => "\xce\x9b",
5114
      "\xce\xba"         => "\xce\x9a",
5115
      "\xce\xb9"         => "\xce\x99",
5116
      "\xce\xb8"         => "\xce\x98",
5117
      "\xce\xb7"         => "\xce\x97",
5118
      "\xce\xb6"         => "\xce\x96",
5119
      "\xce\xb5"         => "\xce\x95",
5120
      "\xce\xb4"         => "\xce\x94",
5121
      "\xce\xb3"         => "\xce\x93",
5122
      "\xce\xb2"         => "\xce\x92",
5123
      "\xce\xb1"         => "\xce\x91",
5124
      "\xce\xaf"         => "\xce\x8a",
5125
      "\xce\xae"         => "\xce\x89",
5126
      "\xce\xad"         => "\xce\x88",
5127
      "\xce\xac"         => "\xce\x86",
5128
      "\xcd\xbd"         => "\xcf\xbf",
5129
      "\xcd\xbc"         => "\xcf\xbe",
5130
      "\xcd\xbb"         => "\xcf\xbd",
5131
      "\xcd\xb7"         => "\xcd\xb6",
5132
      "\xcd\xb3"         => "\xcd\xb2",
5133
      "\xcd\xb1"         => "\xcd\xb0",
5134
      "\xca\x92"         => "\xc6\xb7",
5135
      "\xca\x8c"         => "\xc9\x85",
5136
      "\xca\x8b"         => "\xc6\xb2",
5137
      "\xca\x8a"         => "\xc6\xb1",
5138
      "\xca\x89"         => "\xc9\x84",
5139
      "\xca\x88"         => "\xc6\xae",
5140
      "\xca\x83"         => "\xc6\xa9",
5141
      "\xca\x80"         => "\xc6\xa6",
5142
      "\xc9\xbd"         => "\xe2\xb1\xa4",
5143
      "\xc9\xb5"         => "\xc6\x9f",
5144
      "\xc9\xb2"         => "\xc6\x9d",
5145
      "\xc9\xb1"         => "\xe2\xb1\xae",
5146
      "\xc9\xaf"         => "\xc6\x9c",
5147
      "\xc9\xab"         => "\xe2\xb1\xa2",
5148
      "\xc9\xa9"         => "\xc6\x96",
5149
      "\xc9\xa8"         => "\xc6\x97",
5150
      "\xc9\xa5"         => "\xea\x9e\x8d",
5151
      "\xc9\xa3"         => "\xc6\x94",
5152
      "\xc9\xa0"         => "\xc6\x93",
5153
      "\xc9\x9b"         => "\xc6\x90",
5154
      "\xc9\x99"         => "\xc6\x8f",
5155
      "\xc9\x97"         => "\xc6\x8a",
5156
      "\xc9\x96"         => "\xc6\x89",
5157
      "\xc9\x94"         => "\xc6\x86",
5158
      "\xc9\x93"         => "\xc6\x81",
5159
      "\xc9\x92"         => "\xe2\xb1\xb0",
5160
      "\xc9\x91"         => "\xe2\xb1\xad",
5161
      "\xc9\x90"         => "\xe2\xb1\xaf",
5162
      "\xc9\x8f"         => "\xc9\x8e",
5163
      "\xc9\x8d"         => "\xc9\x8c",
5164
      "\xc9\x8b"         => "\xc9\x8a",
5165
      "\xc9\x89"         => "\xc9\x88",
5166
      "\xc9\x87"         => "\xc9\x86",
5167
      "\xc9\x82"         => "\xc9\x81",
5168
      "\xc9\x80"         => "\xe2\xb1\xbf",
5169
      "\xc8\xbf"         => "\xe2\xb1\xbe",
5170
      "\xc8\xbc"         => "\xc8\xbb",
5171
      "\xc8\xb3"         => "\xc8\xb2",
5172
      "\xc8\xb1"         => "\xc8\xb0",
5173
      "\xc8\xaf"         => "\xc8\xae",
5174
      "\xc8\xad"         => "\xc8\xac",
5175
      "\xc8\xab"         => "\xc8\xaa",
5176
      "\xc8\xa9"         => "\xc8\xa8",
5177
      "\xc8\xa7"         => "\xc8\xa6",
5178
      "\xc8\xa5"         => "\xc8\xa4",
5179
      "\xc8\xa3"         => "\xc8\xa2",
5180
      "\xc8\x9f"         => "\xc8\x9e",
5181
      "\xc8\x9d"         => "\xc8\x9c",
5182
      "\xc8\x9b"         => "\xc8\x9a",
5183
      "\xc8\x99"         => "\xc8\x98",
5184
      "\xc8\x97"         => "\xc8\x96",
5185
      "\xc8\x95"         => "\xc8\x94",
5186
      "\xc8\x93"         => "\xc8\x92",
5187
      "\xc8\x91"         => "\xc8\x90",
5188
      "\xc8\x8f"         => "\xc8\x8e",
5189
      "\xc8\x8d"         => "\xc8\x8c",
5190
      "\xc8\x8b"         => "\xc8\x8a",
5191
      "\xc8\x89"         => "\xc8\x88",
5192
      "\xc8\x87"         => "\xc8\x86",
5193
      "\xc8\x85"         => "\xc8\x84",
5194
      "\xc8\x83"         => "\xc8\x82",
5195
      "\xc8\x81"         => "\xc8\x80",
5196
      "\xc7\xbf"         => "\xc7\xbe",
5197
      "\xc7\xbd"         => "\xc7\xbc",
5198 1
      "\xc7\xbb"         => "\xc7\xba",
5199
      "\xc7\xb9"         => "\xc7\xb8",
5200 1
      "\xc7\xb5"         => "\xc7\xb4",
5201
      "\xc7\xb3"         => "\xc7\xb2",
5202
      "\xc7\xaf"         => "\xc7\xae",
5203
      "\xc7\xad"         => "\xc7\xac",
5204
      "\xc7\xab"         => "\xc7\xaa",
5205
      "\xc7\xa9"         => "\xc7\xa8",
5206
      "\xc7\xa7"         => "\xc7\xa6",
5207
      "\xc7\xa5"         => "\xc7\xa4",
5208
      "\xc7\xa3"         => "\xc7\xa2",
5209
      "\xc7\xa1"         => "\xc7\xa0",
5210
      "\xc7\x9f"         => "\xc7\x9e",
5211
      "\xc7\x9d"         => "\xc6\x8e",
5212
      "\xc7\x9c"         => "\xc7\x9b",
5213
      "\xc7\x9a"         => "\xc7\x99",
5214
      "\xc7\x98"         => "\xc7\x97",
5215 1
      "\xc7\x96"         => "\xc7\x95",
5216
      "\xc7\x94"         => "\xc7\x93",
5217
      "\xc7\x92"         => "\xc7\x91",
5218
      "\xc7\x90"         => "\xc7\x8f",
5219
      "\xc7\x8e"         => "\xc7\x8d",
5220
      "\xc7\x8c"         => "\xc7\x8b",
5221
      "\xc7\x89"         => "\xc7\x88",
5222
      "\xc7\x86"         => "\xc7\x85",
5223
      "\xc6\xbf"         => "\xc7\xb7",
5224
      "\xc6\xbd"         => "\xc6\xbc",
5225
      "\xc6\xb9"         => "\xc6\xb8",
5226
      "\xc6\xb6"         => "\xc6\xb5",
5227
      "\xc6\xb4"         => "\xc6\xb3",
5228
      "\xc6\xb0"         => "\xc6\xaf",
5229
      "\xc6\xad"         => "\xc6\xac",
5230
      "\xc6\xa8"         => "\xc6\xa7",
5231 1
      "\xc6\xa5"         => "\xc6\xa4",
5232
      "\xc6\xa3"         => "\xc6\xa2",
5233 1
      "\xc6\xa1"         => "\xc6\xa0",
5234 1
      "\xc6\x9e"         => "\xc8\xa0",
5235
      "\xc6\x9a"         => "\xc8\xbd",
5236 1
      "\xc6\x99"         => "\xc6\x98",
5237
      "\xc6\x95"         => "\xc7\xb6",
5238
      "\xc6\x92"         => "\xc6\x91",
5239
      "\xc6\x8c"         => "\xc6\x8b",
5240
      "\xc6\x88"         => "\xc6\x87",
5241
      "\xc6\x85"         => "\xc6\x84",
5242
      "\xc6\x83"         => "\xc6\x82",
5243
      "\xc6\x80"         => "\xc9\x83",
5244
      "\xc5\xbf"         => "\x53",
5245
      "\xc5\xbe"         => "\xc5\xbd",
5246
      "\xc5\xbc"         => "\xc5\xbb",
5247 8
      "\xc5\xba"         => "\xc5\xb9",
5248
      "\xc5\xb7"         => "\xc5\xb6",
5249 8
      "\xc5\xb5"         => "\xc5\xb4",
5250
      "\xc5\xb3"         => "\xc5\xb2",
5251
      "\xc5\xb1"         => "\xc5\xb0",
5252
      "\xc5\xaf"         => "\xc5\xae",
5253
      "\xc5\xad"         => "\xc5\xac",
5254
      "\xc5\xab"         => "\xc5\xaa",
5255
      "\xc5\xa9"         => "\xc5\xa8",
5256
      "\xc5\xa7"         => "\xc5\xa6",
5257
      "\xc5\xa5"         => "\xc5\xa4",
5258
      "\xc5\xa3"         => "\xc5\xa2",
5259
      "\xc5\xa1"         => "\xc5\xa0",
5260 7
      "\xc5\x9f"         => "\xc5\x9e",
5261
      "\xc5\x9d"         => "\xc5\x9c",
5262 7
      "\xc5\x9b"         => "\xc5\x9a",
5263 2
      "\xc5\x99"         => "\xc5\x98",
5264
      "\xc5\x97"         => "\xc5\x96",
5265
      "\xc5\x95"         => "\xc5\x94",
5266
      "\xc5\x93"         => "\xc5\x92",
5267 6
      "\xc5\x91"         => "\xc5\x90",
5268 6
      "\xc5\x8f"         => "\xc5\x8e",
5269
      "\xc5\x8d"         => "\xc5\x8c",
5270 6
      "\xc5\x8b"         => "\xc5\x8a",
5271 1
      "\xc5\x88"         => "\xc5\x87",
5272 1
      "\xc5\x86"         => "\xc5\x85",
5273 6
      "\xc5\x84"         => "\xc5\x83",
5274
      "\xc5\x82"         => "\xc5\x81",
5275
      "\xc5\x80"         => "\xc4\xbf",
5276 6
      "\xc4\xbe"         => "\xc4\xbd",
5277
      "\xc4\xbc"         => "\xc4\xbb",
5278 6
      "\xc4\xba"         => "\xc4\xb9",
5279
      "\xc4\xb7"         => "\xc4\xb6",
5280
      "\xc4\xb5"         => "\xc4\xb4",
5281
      "\xc4\xb3"         => "\xc4\xb2",
5282 1
      "\xc4\xb1"         => "\x49",
5283 1
      "\xc4\xaf"         => "\xc4\xae",
5284 1
      "\xc4\xad"         => "\xc4\xac",
5285 6
      "\xc4\xab"         => "\xc4\xaa",
5286 6
      "\xc4\xa9"         => "\xc4\xa8",
5287 6
      "\xc4\xa7"         => "\xc4\xa6",
5288 6
      "\xc4\xa5"         => "\xc4\xa4",
5289 6
      "\xc4\xa3"         => "\xc4\xa2",
5290
      "\xc4\xa1"         => "\xc4\xa0",
5291 6
      "\xc4\x9f"         => "\xc4\x9e",
5292
      "\xc4\x9d"         => "\xc4\x9c",
5293
      "\xc4\x9b"         => "\xc4\x9a",
5294
      "\xc4\x99"         => "\xc4\x98",
5295
      "\xc4\x97"         => "\xc4\x96",
5296
      "\xc4\x95"         => "\xc4\x94",
5297
      "\xc4\x93"         => "\xc4\x92",
5298
      "\xc4\x91"         => "\xc4\x90",
5299
      "\xc4\x8f"         => "\xc4\x8e",
5300
      "\xc4\x8d"         => "\xc4\x8c",
5301
      "\xc4\x8b"         => "\xc4\x8a",
5302
      "\xc4\x89"         => "\xc4\x88",
5303
      "\xc4\x87"         => "\xc4\x86",
5304 1
      "\xc4\x85"         => "\xc4\x84",
5305
      "\xc4\x83"         => "\xc4\x82",
5306 1
      "\xc4\x81"         => "\xc4\x80",
5307 1
      "\xc3\xbf"         => "\xc5\xb8",
5308
      "\xc3\xbe"         => "\xc3\x9e",
5309
      "\xc3\xbd"         => "\xc3\x9d",
5310
      "\xc3\xbc"         => "\xc3\x9c",
5311
      "\xc3\xbb"         => "\xc3\x9b",
5312
      "\xc3\xba"         => "\xc3\x9a",
5313
      "\xc3\xb9"         => "\xc3\x99",
5314
      "\xc3\xb8"         => "\xc3\x98",
5315
      "\xc3\xb6"         => "\xc3\x96",
5316
      "\xc3\xb5"         => "\xc3\x95",
5317
      "\xc3\xb4"         => "\xc3\x94",
5318
      "\xc3\xb3"         => "\xc3\x93",
5319
      "\xc3\xb2"         => "\xc3\x92",
5320 1
      "\xc3\xb1"         => "\xc3\x91",
5321
      "\xc3\xb0"         => "\xc3\x90",
5322 1
      "\xc3\xaf"         => "\xc3\x8f",
5323
      "\xc3\xae"         => "\xc3\x8e",
5324
      "\xc3\xad"         => "\xc3\x8d",
5325
      "\xc3\xac"         => "\xc3\x8c",
5326
      "\xc3\xab"         => "\xc3\x8b",
5327
      "\xc3\xaa"         => "\xc3\x8a",
5328
      "\xc3\xa9"         => "\xc3\x89",
5329
      "\xc3\xa8"         => "\xc3\x88",
5330
      "\xc3\xa7"         => "\xc3\x87",
5331
      "\xc3\xa6"         => "\xc3\x86",
5332
      "\xc3\xa5"         => "\xc3\x85",
5333
      "\xc3\xa4"         => "\xc3\x84",
5334
      "\xc3\xa3"         => "\xc3\x83",
5335
      "\xc3\xa2"         => "\xc3\x82",
5336
      "\xc3\xa1"         => "\xc3\x81",
5337
      "\xc3\xa0"         => "\xc3\x80",
5338
      "\xc2\xb5"         => "\xce\x9c",
5339
      "\x7a"             => "\x5a",
5340
      "\x79"             => "\x59",
5341
      "\x78"             => "\x58",
5342
      "\x77"             => "\x57",
5343
      "\x76"             => "\x56",
5344
      "\x75"             => "\x55",
5345
      "\x74"             => "\x54",
5346
      "\x73"             => "\x53",
5347
      "\x72"             => "\x52",
5348
      "\x71"             => "\x51",
5349
      "\x70"             => "\x50",
5350
      "\x6f"             => "\x4f",
5351
      "\x6e"             => "\x4e",
5352
      "\x6d"             => "\x4d",
5353
      "\x6c"             => "\x4c",
5354
      "\x6b"             => "\x4b",
5355 12
      "\x6a"             => "\x4a",
5356
      "\x69"             => "\x49",
5357 12
      "\x68"             => "\x48",
5358
      "\x67"             => "\x47",
5359
      "\x66"             => "\x46",
5360
      "\x65"             => "\x45",
5361
      "\x64"             => "\x44",
5362
      "\x63"             => "\x43",
5363
      "\x62"             => "\x42",
5364
      "\x61"             => "\x41",
5365
5366
    );
5367
5368
    return $case;
5369
  }
5370 13
5371
  /**
5372 13
   * Translate characters or replace sub-strings.
5373
   *
5374
   * @param string $s
5375 13
   * @param string $from
5376 13
   * @param string $to
5377 1
   *
5378 1
   * @return string
5379 12
   */
5380
  public static function strtr($s, $from, $to = INF)
5381 13
  {
5382
    if (INF !== $to) {
5383 13
      $from = self::str_split($from);
5384 13
      $to = self::str_split($to);
5385
      $a = count($from);
5386 13
      $b = count($to);
5387
5388
      if ($a > $b) {
5389
        $from = array_slice($from, 0, $b);
5390
      } elseif ($a < $b) {
5391
        $to = array_slice($to, 0, $a);
5392
      }
5393
5394
      $from = array_combine($from, $to);
5395
    }
5396 6
5397
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5380 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5398 6
  }
5399
5400
  /**
5401
   * Binary safe comparison of two strings from an offset, up to length characters.
5402
   *
5403
   * @param string  $main_str           The main string being compared.
5404
   * @param string  $str                The secondary string being compared.
5405
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5406
   *                                    end of the string.
5407
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5408
   *                                    the str compared to the length of main_str less the offset.
5409
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5410 1
   *
5411
   * @return int
5412 1
   */
5413
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5414
  {
5415
    $main_str = self::substr($main_str, $offset, $length);
5416
    $str = self::substr($str, 0, self::strlen($main_str));
5417
5418
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5419
  }
5420
5421
  /**
5422
   * Case-insensitive string comparison.
5423
   *
5424
   * @param string $str1
5425
   * @param string $str2
5426
   *
5427
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5428
   */
5429
  public static function strcasecmp($str1, $str2)
5430
  {
5431
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5432
  }
5433
5434
  /**
5435
   * Uppercase for all words in the string.
5436
   *
5437 10
   * @param  string $str
5438
   * @param array   $exceptions
5439 10
   *
5440 10
   * @return string
5441
   */
5442 10
  public static function ucwords($str, $exceptions = array())
5443 2
  {
5444
    if (!$str) {
5445
      return '';
5446
    }
5447 9
5448
    // init
5449 9
    $words = explode(' ', $str);
5450
    $newwords = array();
5451
5452
    if (count($exceptions) > 0) {
5453 9
      $useExceptions = true;
5454 9
    } else {
5455
      $useExceptions = false;
5456 9
    }
5457
5458
    foreach ($words as $word) {
5459 1
      if (
5460 1
          ($useExceptions === false)
5461 1
          ||
5462
          (
5463 9
              $useExceptions === true
5464 9
              &&
5465
              !in_array($word, $exceptions, true)
5466
          )
5467
      ) {
5468
        $word = self::ucfirst($word);
5469
      }
5470
      $newwords[] = $word;
5471
    }
5472
5473
    return self::ucfirst(implode(' ', $newwords));
5474
  }
5475
5476
  /**
5477
   * Format a number with grouped thousands.
5478
   *
5479
   * @param float  $number
5480
   * @param int    $decimals
5481
   * @param string $dec_point
5482
   * @param string $thousands_sep
5483
   *
5484
   * @return string
5485
   */
5486
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
5487
  {
5488
    if (Bootup::is_php('5.4') === true) {
5489
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
5490
        return str_replace(
5491
            array(
5492
                '.',
5493
                ',',
5494
            ),
5495
            array(
5496
                $dec_point,
5497
                $thousands_sep,
5498
            ),
5499 1
            number_format($number, $decimals, '.', ',')
5500
        );
5501 1
      }
5502
    }
5503
5504
    return number_format($number, $decimals, $dec_point, $thousands_sep);
5505
  }
5506
5507
  /**
5508
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
5509
   *
5510
   * (PHP 4, PHP 5)<br/>
5511
   * Replace all occurrences of the search string with the replacement string
5512
   *
5513
   * @link http://php.net/manual/en/function.str-replace.php
5514
   *
5515
   * @param mixed $search  <p>
5516
   *                       The value being searched for, otherwise known as the needle.
5517
   *                       An array may be used to designate multiple needles.
5518
   *                       </p>
5519
   * @param mixed $replace <p>
5520
   *                       The replacement value that replaces found search
5521
   *                       values. An array may be used to designate multiple replacements.
5522
   *                       </p>
5523
   * @param mixed $subject <p>
5524
   *                       The string or array being searched and replaced on,
5525
   *                       otherwise known as the haystack.
5526
   *                       </p>
5527
   *                       <p>
5528
   *                       If subject is an array, then the search and
5529
   *                       replace is performed with every entry of
5530
   *                       subject, and the return value is an array as
5531
   *                       well.
5532
   *                       </p>
5533
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5534
   *
5535
   * @return mixed This function returns a string or an array with the replaced values.
5536
   */
5537
  public static function str_replace($search, $replace, $subject, &$count = null)
5538
  {
5539
    return str_replace($search, $replace, $subject, $count);
5540
  }
5541
5542
  /**
5543
   * str_ireplace
5544
   *
5545
   * @param string $search
5546
   * @param string $replace
5547
   * @param string $subject
5548
   * @param null   $count
5549
   *
5550
   * @return string
5551
   */
5552
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5553
  {
5554
    $search = (array)$search;
5555
5556
    /** @noinspection AlterInForeachInspection */
5557
    foreach ($search as &$s) {
5558
      if ('' === $s .= '') {
5559
        $s = '/^(?<=.)$/';
5560
      } else {
5561
        $s = '/' . preg_quote($s, '/') . '/ui';
5562
      }
5563
    }
5564 1
5565
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
5566
    $count = $replace;
5567 1
5568
    return $subject;
5569 1
  }
5570
5571 1
  /**
5572 1
   * Makes string's first char lowercase.
5573
   *
5574
   * @param    string $str The input string
5575
   *
5576
   * @return   string The resulting string
5577
   */
5578
  public static function lcfirst($str)
5579
  {
5580
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
5581 1
  }
5582
5583 1
  /**
5584
   * Find position of last occurrence of a case-insensitive string.
5585 1
   *
5586 1
   * @param    string $haystack The string to look in
5587
   * @param    string $needle   The string to look for
5588
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5589 1
   *
5590
   * @return   int The position of offset
5591 1
   */
5592 1
  public static function strripos($haystack, $needle, $offset = 0)
5593 1
  {
5594 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5595 1
  }
5596 1
5597 1
  /**
5598 1
   * Find position of last occurrence of a string in a string.
5599 1
   *
5600 1
   * @link http://php.net/manual/en/function.mb-strrpos.php
5601 1
   *
5602
   * @param string  $haystack     <p>
5603
   *                              The string being checked, for the last occurrence
5604
   *                              of needle
5605
   *                              </p>
5606
   * @param string  $needle       <p>
5607
   *                              The string to find in haystack.
5608
   *                              </p>
5609
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
5610
   *                              the string. Negative values will stop searching at an arbitrary point
5611
   *                              prior to the end of the string.
5612
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
5613
   *
5614
   * @return int the numeric position of
5615
   * the last occurrence of needle in the
5616
   * haystack string. If
5617
   * needle is not found, it returns false.
5618
   */
5619
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5620
  {
5621 1
    $haystack = (string)$haystack;
5622 1
    $needle = (string)$needle;
5623
5624
    if (!isset($haystack[0]) || !isset($needle[0])) {
5625
      return false;
5626
    }
5627
5628
    // init
5629
    self::checkForSupport();
5630
5631
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5632
      $needle = self::chr($needle);
5633
    }
5634
5635
    $needle = (string)$needle;
5636
    $offset = (int)$offset;
5637
5638
    if ($cleanUtf8 === true) {
5639
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5640
5641
      $needle = self::clean($needle);
5642
      $haystack = self::clean($haystack);
5643
    }
5644
5645
    if (self::$support['mbstring'] === true) {
5646
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5647
    }
5648
5649
    if (self::$support['iconv'] === true) {
5650
      return grapheme_strrpos($haystack, $needle, $offset);
5651
    }
5652
5653
    // fallback
5654
5655
    if ($offset > 0) {
5656
      $haystack = self::substr($haystack, $offset);
5657
    } elseif ($offset < 0) {
5658
      $haystack = self::substr($haystack, 0, $offset);
5659
    }
5660
5661 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5662
      $left = substr($haystack, 0, $pos);
5663
5664
      // negative offset not supported in PHP strpos(), ignoring
5665
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5666
    }
5667
5668
    return false;
5669
  }
5670
5671
  /**
5672
   * Splits a string into smaller chunks and multiple lines, using the specified
5673
   * line ending character.
5674
   *
5675
   * @param    string $body     The original string to be split.
5676
   * @param    int    $chunklen The maximum character length of a chunk.
5677
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
5678
   *
5679
   * @return   string The chunked string
5680
   */
5681 2
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
5682
  {
5683 2
    return implode($end, self::split($body, $chunklen));
5684 2
  }
5685 2
5686
  /**
5687 2
   * alias for "UTF8::to_win1252()"
5688
   *
5689 2
   * @param   string $str
5690
   *
5691
   * @return  array|string
5692 2
   */
5693
  public static function to_iso8859($str)
5694 2
  {
5695 2
    return self::to_win1252($str);
5696 2
  }
5697
5698 1
  /**
5699 1
   * fix -> utf8-win1252 chars
5700 1
   *
5701
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
5702
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
5703
   * See: http://en.wikipedia.org/wiki/Windows-1252
5704
   *
5705
   * @deprecated use "UTF8::fix_simple_utf8()"
5706 2
   *
5707
   * @param   string $str
5708 2
   *
5709 2
   * @return  string
5710
   */
5711 2
  public static function utf8_fix_win1252_chars($str)
5712
  {
5713
    return self::fix_simple_utf8($str);
5714
  }
5715
5716
  /**
5717
   * Returns an array of Unicode White Space characters.
5718
   *
5719
   * @return   array An array with numeric code point as key and White Space Character as value.
5720
   */
5721 1
  public static function ws()
5722
  {
5723 1
    return self::$whitespace;
5724
  }
5725 1
5726 1
  /**
5727 1
   * Parses the string into variables.
5728
   *
5729 1
   * WARNING: This differs from parse_str() by returning the results
5730
   *    instead of placing them in the local scope!
5731
   *
5732
   * @link http://php.net/manual/en/function.parse-str.php
5733
   *
5734
   * @param string $str     <p>
5735
   *                        The input string.
5736
   *                        </p>
5737
   * @param array  $result  <p>
5738
   *                        If the second parameter arr is present,
5739
   *                        variables are stored in this variable as array elements instead.
5740
   *                        </p>
5741
   *
5742
   * @return void
5743
   */
5744
  public static function parse_str($str, &$result)
5745
  {
5746
    // init
5747
    self::checkForSupport();
5748
5749
    $str = self::filter($str);
5750
5751
    mb_parse_str($str, $result);
5752
  }
5753
5754
  /**
5755
   * Get character of a specific character.
5756
   *
5757
   * @param   string $chr Character.
5758
   *
5759
   * @return  string 'RTL' or 'LTR'
5760
   */
5761
  public static function getCharDirection($chr)
5762
  {
5763
    $c = static::chr_to_decimal($chr);
5764
5765
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
5766
      return 'LTR';
5767
    }
5768
5769
    if (0x85e >= $c) {
5770
5771
      if (0x5be === $c ||
5772
          0x5c0 === $c ||
5773
          0x5c3 === $c ||
5774
          0x5c6 === $c ||
5775
          (0x5d0 <= $c && 0x5ea >= $c) ||
5776
          (0x5f0 <= $c && 0x5f4 >= $c) ||
5777
          0x608 === $c ||
5778
          0x60b === $c ||
5779
          0x60d === $c ||
5780
          0x61b === $c ||
5781
          (0x61e <= $c && 0x64a >= $c) ||
5782
          (0x66d <= $c && 0x66f >= $c) ||
5783
          (0x671 <= $c && 0x6d5 >= $c) ||
5784
          (0x6e5 <= $c && 0x6e6 >= $c) ||
5785
          (0x6ee <= $c && 0x6ef >= $c) ||
5786
          (0x6fa <= $c && 0x70d >= $c) ||
5787
          0x710 === $c ||
5788
          (0x712 <= $c && 0x72f >= $c) ||
5789
          (0x74d <= $c && 0x7a5 >= $c) ||
5790
          0x7b1 === $c ||
5791
          (0x7c0 <= $c && 0x7ea >= $c) ||
5792
          (0x7f4 <= $c && 0x7f5 >= $c) ||
5793
          0x7fa === $c ||
5794
          (0x800 <= $c && 0x815 >= $c) ||
5795
          0x81a === $c ||
5796
          0x824 === $c ||
5797
          0x828 === $c ||
5798
          (0x830 <= $c && 0x83e >= $c) ||
5799
          (0x840 <= $c && 0x858 >= $c) ||
5800
          0x85e === $c
5801
      ) {
5802
        return 'RTL';
5803
      }
5804
5805
    } elseif (0x200f === $c) {
5806
5807
      return 'RTL';
5808
5809
    } elseif (0xfb1d <= $c) {
5810
5811
      if (0xfb1d === $c ||
5812
          (0xfb1f <= $c && 0xfb28 >= $c) ||
5813
          (0xfb2a <= $c && 0xfb36 >= $c) ||
5814
          (0xfb38 <= $c && 0xfb3c >= $c) ||
5815
          0xfb3e === $c ||
5816
          (0xfb40 <= $c && 0xfb41 >= $c) ||
5817
          (0xfb43 <= $c && 0xfb44 >= $c) ||
5818
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
5819
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
5820
          (0xfd50 <= $c && 0xfd8f >= $c) ||
5821
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
5822
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
5823
          (0xfe70 <= $c && 0xfe74 >= $c) ||
5824
          (0xfe76 <= $c && 0xfefc >= $c) ||
5825
          (0x10800 <= $c && 0x10805 >= $c) ||
5826
          0x10808 === $c ||
5827
          (0x1080a <= $c && 0x10835 >= $c) ||
5828
          (0x10837 <= $c && 0x10838 >= $c) ||
5829
          0x1083c === $c ||
5830
          (0x1083f <= $c && 0x10855 >= $c) ||
5831
          (0x10857 <= $c && 0x1085f >= $c) ||
5832
          (0x10900 <= $c && 0x1091b >= $c) ||
5833
          (0x10920 <= $c && 0x10939 >= $c) ||
5834
          0x1093f === $c ||
5835
          0x10a00 === $c ||
5836
          (0x10a10 <= $c && 0x10a13 >= $c) ||
5837
          (0x10a15 <= $c && 0x10a17 >= $c) ||
5838
          (0x10a19 <= $c && 0x10a33 >= $c) ||
5839
          (0x10a40 <= $c && 0x10a47 >= $c) ||
5840
          (0x10a50 <= $c && 0x10a58 >= $c) ||
5841
          (0x10a60 <= $c && 0x10a7f >= $c) ||
5842
          (0x10b00 <= $c && 0x10b35 >= $c) ||
5843
          (0x10b40 <= $c && 0x10b55 >= $c) ||
5844
          (0x10b58 <= $c && 0x10b72 >= $c) ||
5845
          (0x10b78 <= $c && 0x10b7f >= $c)
5846
      ) {
5847
        return 'RTL';
5848
      }
5849
    }
5850
5851
    return 'LTR';
5852
  }
5853
5854
  /**
5855
   * Get a decimal code representation of a specific character.
5856
   *
5857
   * @param   string $chr The input character
5858
   *
5859
   * @return  int
5860
   */
5861
  public static function chr_to_decimal($chr)
5862
  {
5863
    $chr = (string)$chr;
5864
    $code = self::ord($chr[0]);
5865
    $bytes = 1;
5866
5867
    if (!($code & 0x80)) {
5868
      // 0xxxxxxx
5869
      return $code;
5870
    }
5871
5872
    if (($code & 0xe0) === 0xc0) {
5873
      // 110xxxxx
5874
      $bytes = 2;
5875
      $code &= ~0xc0;
5876
    } elseif (($code & 0xf0) == 0xe0) {
5877
      // 1110xxxx
5878
      $bytes = 3;
5879
      $code &= ~0xe0;
5880
    } elseif (($code & 0xf8) === 0xf0) {
5881
      // 11110xxx
5882
      $bytes = 4;
5883
      $code &= ~0xf0;
5884
    }
5885
5886
    for ($i = 2; $i <= $bytes; $i++) {
5887
      // 10xxxxxx
5888
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
5889
    }
5890
5891
    return $code;
5892
  }
5893
5894
  /**
5895
   * Get a UTF-8 character from its decimal code representation.
5896
   *
5897
   * @param   int $code Code.
5898
   *
5899
   * @return  string
5900
   */
5901
  public static function decimal_to_chr($code)
5902
  {
5903
    self::checkForSupport();
5904
5905
    return mb_convert_encoding(
5906
        '&#x' . dechex($code) . ';',
5907
        'UTF-8',
5908
        'HTML-ENTITIES'
5909
    );
5910
  }
5911
5912
  /**
5913
   * Return a array with "urlencoded"-win1252 -> UTF-8
5914
   *
5915
   * @return mixed
5916
   */
5917
  protected static function urldecode_fix_win1252_chars()
5918
  {
5919
    static $array = array(
5920
        '%20' => ' ',
5921
        '%21' => '!',
5922
        '%22' => '"',
5923
        '%23' => '#',
5924
        '%24' => '$',
5925
        '%25' => '%',
5926
        '%26' => '&',
5927
        '%27' => "'",
5928
        '%28' => '(',
5929
        '%29' => ')',
5930
        '%2A' => '*',
5931
        '%2B' => '+',
5932
        '%2C' => ',',
5933
        '%2D' => '-',
5934
        '%2E' => '.',
5935
        '%2F' => '/',
5936
        '%30' => '0',
5937
        '%31' => '1',
5938
        '%32' => '2',
5939
        '%33' => '3',
5940
        '%34' => '4',
5941
        '%35' => '5',
5942
        '%36' => '6',
5943
        '%37' => '7',
5944
        '%38' => '8',
5945
        '%39' => '9',
5946
        '%3A' => ':',
5947
        '%3B' => ';',
5948
        '%3C' => '<',
5949
        '%3D' => '=',
5950
        '%3E' => '>',
5951
        '%3F' => '?',
5952
        '%40' => '@',
5953
        '%41' => 'A',
5954
        '%42' => 'B',
5955
        '%43' => 'C',
5956
        '%44' => 'D',
5957
        '%45' => 'E',
5958
        '%46' => 'F',
5959
        '%47' => 'G',
5960
        '%48' => 'H',
5961
        '%49' => 'I',
5962
        '%4A' => 'J',
5963
        '%4B' => 'K',
5964
        '%4C' => 'L',
5965
        '%4D' => 'M',
5966
        '%4E' => 'N',
5967
        '%4F' => 'O',
5968
        '%50' => 'P',
5969
        '%51' => 'Q',
5970
        '%52' => 'R',
5971
        '%53' => 'S',
5972
        '%54' => 'T',
5973
        '%55' => 'U',
5974
        '%56' => 'V',
5975
        '%57' => 'W',
5976
        '%58' => 'X',
5977
        '%59' => 'Y',
5978
        '%5A' => 'Z',
5979
        '%5B' => '[',
5980
        '%5C' => '\\',
5981
        '%5D' => ']',
5982
        '%5E' => '^',
5983
        '%5F' => '_',
5984
        '%60' => '`',
5985
        '%61' => 'a',
5986
        '%62' => 'b',
5987
        '%63' => 'c',
5988
        '%64' => 'd',
5989
        '%65' => 'e',
5990
        '%66' => 'f',
5991
        '%67' => 'g',
5992
        '%68' => 'h',
5993
        '%69' => 'i',
5994
        '%6A' => 'j',
5995
        '%6B' => 'k',
5996
        '%6C' => 'l',
5997
        '%6D' => 'm',
5998
        '%6E' => 'n',
5999
        '%6F' => 'o',
6000
        '%70' => 'p',
6001
        '%71' => 'q',
6002
        '%72' => 'r',
6003
        '%73' => 's',
6004
        '%74' => 't',
6005
        '%75' => 'u',
6006
        '%76' => 'v',
6007
        '%77' => 'w',
6008
        '%78' => 'x',
6009
        '%79' => 'y',
6010
        '%7A' => 'z',
6011
        '%7B' => '{',
6012
        '%7C' => '|',
6013
        '%7D' => '}',
6014
        '%7E' => '~',
6015
        '%7F' => '',
6016
        '%80' => '`',
6017
        '%81' => '',
6018
        '%82' => '‚',
6019
        '%83' => 'ƒ',
6020
        '%84' => '„',
6021
        '%85' => '…',
6022
        '%86' => '†',
6023
        '%87' => '‡',
6024
        '%88' => 'ˆ',
6025
        '%89' => '‰',
6026
        '%8A' => 'Š',
6027
        '%8B' => '‹',
6028
        '%8C' => 'Œ',
6029
        '%8D' => '',
6030
        '%8E' => 'Ž',
6031
        '%8F' => '',
6032
        '%90' => '',
6033
        '%91' => '‘',
6034
        '%92' => '’',
6035
        '%93' => '“',
6036
        '%94' => '”',
6037
        '%95' => '•',
6038
        '%96' => '–',
6039
        '%97' => '—',
6040
        '%98' => '˜',
6041
        '%99' => '™',
6042
        '%9A' => 'š',
6043
        '%9B' => '›',
6044
        '%9C' => 'œ',
6045
        '%9D' => '',
6046
        '%9E' => 'ž',
6047
        '%9F' => 'Ÿ',
6048
        '%A0' => '',
6049
        '%A1' => '¡',
6050
        '%A2' => '¢',
6051
        '%A3' => '£',
6052
        '%A4' => '¤',
6053
        '%A5' => '¥',
6054
        '%A6' => '¦',
6055
        '%A7' => '§',
6056
        '%A8' => '¨',
6057
        '%A9' => '©',
6058
        '%AA' => 'ª',
6059
        '%AB' => '«',
6060
        '%AC' => '¬',
6061
        '%AD' => '',
6062
        '%AE' => '®',
6063
        '%AF' => '¯',
6064
        '%B0' => '°',
6065
        '%B1' => '±',
6066
        '%B2' => '²',
6067
        '%B3' => '³',
6068
        '%B4' => '´',
6069
        '%B5' => 'µ',
6070
        '%B6' => '¶',
6071
        '%B7' => '·',
6072
        '%B8' => '¸',
6073
        '%B9' => '¹',
6074
        '%BA' => 'º',
6075
        '%BB' => '»',
6076
        '%BC' => '¼',
6077
        '%BD' => '½',
6078
        '%BE' => '¾',
6079
        '%BF' => '¿',
6080
        '%C0' => 'À',
6081
        '%C1' => 'Á',
6082
        '%C2' => 'Â',
6083
        '%C3' => 'Ã',
6084
        '%C4' => 'Ä',
6085
        '%C5' => 'Å',
6086
        '%C6' => 'Æ',
6087
        '%C7' => 'Ç',
6088
        '%C8' => 'È',
6089
        '%C9' => 'É',
6090
        '%CA' => 'Ê',
6091
        '%CB' => 'Ë',
6092
        '%CC' => 'Ì',
6093
        '%CD' => 'Í',
6094
        '%CE' => 'Î',
6095
        '%CF' => 'Ï',
6096
        '%D0' => 'Ð',
6097
        '%D1' => 'Ñ',
6098
        '%D2' => 'Ò',
6099
        '%D3' => 'Ó',
6100
        '%D4' => 'Ô',
6101
        '%D5' => 'Õ',
6102
        '%D6' => 'Ö',
6103
        '%D7' => '×',
6104
        '%D8' => 'Ø',
6105
        '%D9' => 'Ù',
6106
        '%DA' => 'Ú',
6107
        '%DB' => 'Û',
6108
        '%DC' => 'Ü',
6109
        '%DD' => 'Ý',
6110
        '%DE' => 'Þ',
6111
        '%DF' => 'ß',
6112
        '%E0' => 'à',
6113
        '%E1' => 'á',
6114
        '%E2' => 'â',
6115
        '%E3' => 'ã',
6116
        '%E4' => 'ä',
6117
        '%E5' => 'å',
6118
        '%E6' => 'æ',
6119
        '%E7' => 'ç',
6120
        '%E8' => 'è',
6121
        '%E9' => 'é',
6122
        '%EA' => 'ê',
6123
        '%EB' => 'ë',
6124
        '%EC' => 'ì',
6125
        '%ED' => 'í',
6126
        '%EE' => 'î',
6127
        '%EF' => 'ï',
6128
        '%F0' => 'ð',
6129
        '%F1' => 'ñ',
6130
        '%F2' => 'ò',
6131
        '%F3' => 'ó',
6132
        '%F4' => 'ô',
6133
        '%F5' => 'õ',
6134
        '%F6' => 'ö',
6135
        '%F7' => '÷',
6136
        '%F8' => 'ø',
6137
        '%F9' => 'ù',
6138
        '%FA' => 'ú',
6139
        '%FB' => 'û',
6140
        '%FC' => 'ü',
6141
        '%FD' => 'ý',
6142
        '%FE' => 'þ',
6143
        '%FF' => 'ÿ',
6144
    );
6145
6146
    return $array;
6147
  }
6148
6149
}
6150