Completed
Push — master ( cd0bff...6028a2 )
by Lars
06:23
created

UTF8   D

Complexity

Total Complexity 668

Size/Duplication

Total Lines 6618
Duplicated Lines 4.76 %

Coupling/Cohesion

Components 3
Dependencies 3

Test Coverage

Coverage 78.52%

Importance

Changes 62
Bugs 15 Features 12
Metric Value
wmc 668
c 62
b 15
f 12
lcom 3
cbo 3
dl 315
loc 6618
ccs 1276
cts 1625
cp 0.7852
rs 4.4102

145 Methods

Rating   Name   Duplication   Size   Complexity  
A checkForSupport() 0 10 2
A __construct() 0 4 1
A access() 0 6 1
A add_bom_to_string() 0 8 2
A bom() 0 4 1
A callback() 0 4 1
B case_table() 0 1001 1
A chr() 0 13 3
A chr_map() 0 6 1
A chr_size_list() 0 8 2
B chr_to_decimal() 0 32 6
A chr_to_hex() 0 4 1
A chunk_split() 0 4 1
B clean() 0 35 4
A cleanup() 0 20 2
B codepoints() 0 26 3
A count_chars() 0 8 1
A decimal_to_chr() 0 10 1
A encode() 0 14 3
C file_get_contents() 0 49 8
A file_has_bom() 0 4 1
C filter() 34 43 13
A filter_input() 10 10 2
A filter_input_array() 10 10 2
A filter_var() 10 10 2
A filter_var_array() 10 10 2
A fits_inside() 0 4 1
A fix_simple_utf8() 0 18 3
A fix_utf8() 0 20 4
D getCharDirection() 0 92 115
A getData() 0 9 2
B hash() 0 40 5
A hex_to_int() 0 8 2
A html_encode() 0 12 1
B html_entity_decode() 0 36 6
A entityCallback() 0 12 2
A htmlentities() 0 4 1
A htmlspecialchars() 0 4 1
A iconv_loaded() 0 4 2
A int_to_hex() 0 12 3
A intl_loaded() 0 4 2
A isAscii() 0 4 1
A isBase64() 0 4 1
A isBom() 0 4 1
A isJson() 0 18 4
A isUtf8() 0 4 1
A is_ascii() 0 4 1
A is_base64() 0 14 3
B is_binary() 0 17 5
A is_binary_file() 0 12 2
A is_bom() 0 4 1
C is_utf16() 47 47 14
C is_utf32() 47 47 14
D is_utf8() 21 124 22
A json_decode() 0 12 2
A json_encode() 0 12 2
A lcfirst() 0 4 1
A ltrim() 12 12 3
A max() 8 8 2
A max_chr_width() 0 9 2
A mbstring_loaded() 0 10 2
A min() 8 8 2
B normalizeEncoding() 0 28 2
A normalize_msword() 0 12 2
B normalize_whitespace() 0 29 5
A number_format() 0 20 4
C ord() 0 23 9
A parse_str() 0 9 1
A pcre_utf8_support() 0 5 1
D range() 14 38 9
C removeBOM() 13 40 12
A remove_duplicates() 0 14 4
A remove_invisible_characters() 0 20 3
A replace_diamond_question_mark() 0 14 1
A rtrim() 12 12 3
C rxClass() 0 36 7
A showSupport() 0 6 2
A single_chr_html_encode() 0 8 2
C split() 12 69 22
C str_detect_encoding() 0 65 13
A str_ireplace() 0 18 3
B str_limit_after_word() 0 29 5
C str_pad() 9 35 7
A str_repeat() 0 6 1
A str_replace() 0 4 1
A str_shuffle() 0 8 1
A str_sort() 0 16 3
C str_split() 0 41 7
A str_to_binary() 0 19 3
F str_transliterate() 6 92 21
B str_word_count() 0 25 5
A strcasecmp() 0 4 1
A strcmp() 0 7 2
B strcspn() 0 19 5
A string() 0 12 1
A string_has_bom() 0 4 1
A strip_tags() 0 7 1
B stripos() 0 24 6
A stristr() 0 11 2
B strlen() 0 22 6
A strnatcasecmp() 0 4 1
A strnatcmp() 0 4 2
A strncasecmp() 0 4 1
A strncmp() 0 4 1
A strpbrk() 0 8 2
C strpos() 6 54 13
A strrchr() 0 6 1
A strrev() 0 4 1
A strrichr() 0 6 1
A strripos() 0 4 1
C strrpos() 6 51 12
A strspn() 0 8 4
A strstr() 0 6 1
B strtocasefold() 0 27 4
A strtolower() 0 13 2
A strtonatfold() 0 4 1
B strtoupper() 0 31 4
A strtr() 0 19 4
A strwidth() 0 7 1
C substr() 0 46 8
A substr_compare() 0 7 2
A substr_count() 0 16 4
C substr_replace() 0 66 14
B swapCase() 0 26 3
A toAscii() 0 4 1
A toLatin1() 0 4 1
A toUTF8() 0 4 1
C to_ascii() 0 71 15
A to_iso8859() 0 4 1
A to_latin1() 0 4 1
D to_utf8() 20 106 26
A to_win1252() 0 16 4
A trim() 0 15 4
A ucfirst() 0 4 1
A ucword() 0 4 1
C ucwords() 0 33 7
A urldecode() 0 23 3
B urldecode_fix_win1252_chars() 0 231 1
A utf8_decode() 0 23 3
A utf8_encode() 0 19 3
A utf8_fix_win1252_chars() 0 4 1
A whitespace_table() 0 4 1
A words_limit() 0 20 4
C wordwrap() 0 56 13
A ws() 0 4 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complex Class

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Intl\Normalizer\Normalizer;
7
use Symfony\Polyfill\Xml\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
86
   *
87
   * @var array
88
   */
89
  protected static $whitespace = array(
90
    // NUL Byte
91
    0     => "\x0",
92
    // Tab
93
    9     => "\x9",
94
    // New Line
95
    10    => "\xa",
96
    // Vertical Tab
97
    11    => "\xb",
98
    // Carriage Return
99
    13    => "\xd",
100
    // Ordinary Space
101
    32    => "\x20",
102
    // NO-BREAK SPACE
103
    160   => "\xc2\xa0",
104
    // OGHAM SPACE MARK
105
    5760  => "\xe1\x9a\x80",
106
    // MONGOLIAN VOWEL SEPARATOR
107
    6158  => "\xe1\xa0\x8e",
108
    // EN QUAD
109
    8192  => "\xe2\x80\x80",
110
    // EM QUAD
111
    8193  => "\xe2\x80\x81",
112
    // EN SPACE
113
    8194  => "\xe2\x80\x82",
114
    // EM SPACE
115
    8195  => "\xe2\x80\x83",
116
    // THREE-PER-EM SPACE
117
    8196  => "\xe2\x80\x84",
118
    // FOUR-PER-EM SPACE
119
    8197  => "\xe2\x80\x85",
120
    // SIX-PER-EM SPACE
121
    8198  => "\xe2\x80\x86",
122
    // FIGURE SPACE
123
    8199  => "\xe2\x80\x87",
124
    // PUNCTUATION SPACE
125
    8200  => "\xe2\x80\x88",
126
    // THIN SPACE
127
    8201  => "\xe2\x80\x89",
128
    //HAIR SPACE
129
    8202  => "\xe2\x80\x8a",
130
    // LINE SEPARATOR
131
    8232  => "\xe2\x80\xa8",
132
    // PARAGRAPH SEPARATOR
133
    8233  => "\xe2\x80\xa9",
134
    // NARROW NO-BREAK SPACE
135
    8239  => "\xe2\x80\xaf",
136
    // MEDIUM MATHEMATICAL SPACE
137
    8287  => "\xe2\x81\x9f",
138
    // IDEOGRAPHIC SPACE
139
    12288 => "\xe3\x80\x80",
140
  );
141
142
  /**
143
   * @var array
144
   */
145
  protected static $whitespaceTable = array(
146
      'SPACE'                     => "\x20",
147
      'NO-BREAK SPACE'            => "\xc2\xa0",
148
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
149
      'EN QUAD'                   => "\xe2\x80\x80",
150
      'EM QUAD'                   => "\xe2\x80\x81",
151
      'EN SPACE'                  => "\xe2\x80\x82",
152
      'EM SPACE'                  => "\xe2\x80\x83",
153
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
154
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
155
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
156
      'FIGURE SPACE'              => "\xe2\x80\x87",
157
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
158
      'THIN SPACE'                => "\xe2\x80\x89",
159
      'HAIR SPACE'                => "\xe2\x80\x8a",
160
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
161
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
162
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
163
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
164
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
165
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
166
  );
167
168
  /**
169
   * bidirectional text chars
170
   *
171
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
172
   *
173
   * @var array
174
   */
175
  protected static $bidiUniCodeControlsTable = array(
176
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
177
    8234 => "\xE2\x80\xAA",
178
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
179
    8235 => "\xE2\x80\xAB",
180
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
181
    8236 => "\xE2\x80\xAC",
182
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
183
    8237 => "\xE2\x80\xAD",
184
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
185
    8238 => "\xE2\x80\xAE",
186
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
187
    8294 => "\xE2\x81\xA6",
188
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
189
    8295 => "\xE2\x81\xA7",
190
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
191
    8296 => "\xE2\x81\xA8",
192
    // POP DIRECTIONAL ISOLATE
193
    8297 => "\xE2\x81\xA9",
194
  );
195
196
  /**
197
   * @var array
198
   */
199
  protected static $commonCaseFold = array(
200
      'ſ'            => 's',
201
      "\xCD\x85"     => 'ι',
202
      'ς'            => 'σ',
203
      "\xCF\x90"     => 'β',
204
      "\xCF\x91"     => 'θ',
205
      "\xCF\x95"     => 'φ',
206
      "\xCF\x96"     => 'π',
207
      "\xCF\xB0"     => 'κ',
208
      "\xCF\xB1"     => 'ρ',
209
      "\xCF\xB5"     => 'ε',
210
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
211
      "\xE1\xBE\xBE" => 'ι',
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  protected static $brokenUtf8ToUtf8 = array(
218
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
219
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
220
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
221
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
222
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
223
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
224
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
225
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
226
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
227
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
228
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
229
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
230
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
231
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
232
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
233
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
234
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
235
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
236
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
237
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
238
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
239
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
240
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
241
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
242
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
243
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
244
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
245
      'ü'       => 'ü',
246
      'ä'       => 'ä',
247
      'ö'       => 'ö',
248
      'Ö'       => 'Ö',
249
      'ß'       => 'ß',
250
      'Ã '       => 'à',
251
      'á'       => 'á',
252
      'â'       => 'â',
253
      'ã'       => 'ã',
254
      'ù'       => 'ù',
255
      'ú'       => 'ú',
256
      'û'       => 'û',
257
      'Ù'       => 'Ù',
258
      'Ú'       => 'Ú',
259
      'Û'       => 'Û',
260
      'Ü'       => 'Ü',
261
      'ò'       => 'ò',
262
      'ó'       => 'ó',
263
      'ô'       => 'ô',
264
      'è'       => 'è',
265
      'é'       => 'é',
266
      'ê'       => 'ê',
267
      'ë'       => 'ë',
268
      'À'       => 'À',
269
      'Á'       => 'Á',
270
      'Â'       => 'Â',
271
      'Ã'       => 'Ã',
272
      'Ä'       => 'Ä',
273
      'Ã…'       => 'Å',
274
      'Ç'       => 'Ç',
275
      'È'       => 'È',
276
      'É'       => 'É',
277
      'Ê'       => 'Ê',
278
      'Ë'       => 'Ë',
279
      'ÃŒ'       => 'Ì',
280
      'Í'       => 'Í',
281
      'ÃŽ'       => 'Î',
282
      'Ï'       => 'Ï',
283
      'Ñ'       => 'Ñ',
284
      'Ã’'       => 'Ò',
285
      'Ó'       => 'Ó',
286
      'Ô'       => 'Ô',
287
      'Õ'       => 'Õ',
288
      'Ø'       => 'Ø',
289
      'Ã¥'       => 'å',
290
      'æ'       => 'æ',
291
      'ç'       => 'ç',
292
      'ì'       => 'ì',
293
      'í'       => 'í',
294
      'î'       => 'î',
295
      'ï'       => 'ï',
296
      'ð'       => 'ð',
297
      'ñ'       => 'ñ',
298
      'õ'       => 'õ',
299
      'ø'       => 'ø',
300
      'ý'       => 'ý',
301
      'ÿ'       => 'ÿ',
302
      '€'      => '€',
303
  );
304
305
  /**
306
   * @var array
307
   */
308
  protected static $utf8ToWin1252 = array(
309
      "\xe2\x82\xac" => "\x80", // EURO SIGN
310
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
311
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
312
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
313
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
314
      "\xe2\x80\xa0" => "\x86", // DAGGER
315
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
316
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
317
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
318
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
319
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
320
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
321
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
322
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
323
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
324
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
325
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
326
      "\xe2\x80\xa2" => "\x95", // BULLET
327
      "\xe2\x80\x93" => "\x96", // EN DASH
328
      "\xe2\x80\x94" => "\x97", // EM DASH
329
      "\xcb\x9c"     => "\x98", // SMALL TILDE
330
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
331
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
332
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
333
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
334
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
335
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
336
  );
337
338
  /**
339
   * @var array
340
   */
341
  protected static $utf8MSWord = array(
342
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
343
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
344
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
345
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
346
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
347
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
348
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
349
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
350
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
351
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
352
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
353
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
354
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
355
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
356
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
357
  );
358
359
  protected static $iconvEncoding = array(
360
      'ANSI_X3.4-1968',
361
      'ANSI_X3.4-1986',
362
      'ASCII',
363
      'CP367',
364
      'IBM367',
365
      'ISO-IR-6',
366
      'ISO646-US',
367
      'ISO_646.IRV:1991',
368
      'US',
369
      'US-ASCII',
370
      'CSASCII',
371
      'UTF-8',
372
      'ISO-10646-UCS-2',
373
      'UCS-2',
374
      'CSUNICODE',
375
      'UCS-2BE',
376
      'UNICODE-1-1',
377
      'UNICODEBIG',
378
      'CSUNICODE11',
379
      'UCS-2LE',
380
      'UNICODELITTLE',
381
      'ISO-10646-UCS-4',
382
      'UCS-4',
383
      'CSUCS4',
384
      'UCS-4BE',
385
      'UCS-4LE',
386
      'UTF-16',
387
      'UTF-16BE',
388
      'UTF-16LE',
389
      'UTF-32',
390
      'UTF-32BE',
391
      'UTF-32LE',
392
      'UNICODE-1-1-UTF-7',
393
      'UTF-7',
394
      'CSUNICODE11UTF7',
395
      'UCS-2-INTERNAL',
396
      'UCS-2-SWAPPED',
397
      'UCS-4-INTERNAL',
398
      'UCS-4-SWAPPED',
399
      'C99',
400
      'JAVA',
401
      'CP819',
402
      'IBM819',
403
      'ISO-8859-1',
404
      'ISO-IR-100',
405
      'ISO8859-1',
406
      'ISO_8859-1',
407
      'ISO_8859-1:1987',
408
      'L1',
409
      'LATIN1',
410
      'CSISOLATIN1',
411
      'ISO-8859-2',
412
      'ISO-IR-101',
413
      'ISO8859-2',
414
      'ISO_8859-2',
415
      'ISO_8859-2:1987',
416
      'L2',
417
      'LATIN2',
418
      'CSISOLATIN2',
419
      'ISO-8859-3',
420
      'ISO-IR-109',
421
      'ISO8859-3',
422
      'ISO_8859-3',
423
      'ISO_8859-3:1988',
424
      'L3',
425
      'LATIN3',
426
      'CSISOLATIN3',
427
      'ISO-8859-4',
428
      'ISO-IR-110',
429
      'ISO8859-4',
430
      'ISO_8859-4',
431
      'ISO_8859-4:1988',
432
      'L4',
433
      'LATIN4',
434
      'CSISOLATIN4',
435
      'CYRILLIC',
436
      'ISO-8859-5',
437
      'ISO-IR-144',
438
      'ISO8859-5',
439
      'ISO_8859-5',
440
      'ISO_8859-5:1988',
441
      'CSISOLATINCYRILLIC',
442
      'ARABIC',
443
      'ASMO-708',
444
      'ECMA-114',
445
      'ISO-8859-6',
446
      'ISO-IR-127',
447
      'ISO8859-6',
448
      'ISO_8859-6',
449
      'ISO_8859-6:1987',
450
      'CSISOLATINARABIC',
451
      'ECMA-118',
452
      'ELOT_928',
453
      'GREEK',
454
      'GREEK8',
455
      'ISO-8859-7',
456
      'ISO-IR-126',
457
      'ISO8859-7',
458
      'ISO_8859-7',
459
      'ISO_8859-7:1987',
460
      'ISO_8859-7:2003',
461
      'CSISOLATINGREEK',
462
      'HEBREW',
463
      'ISO-8859-8',
464
      'ISO-IR-138',
465
      'ISO8859-8',
466
      'ISO_8859-8',
467
      'ISO_8859-8:1988',
468
      'CSISOLATINHEBREW',
469
      'ISO-8859-9',
470
      'ISO-IR-148',
471
      'ISO8859-9',
472
      'ISO_8859-9',
473
      'ISO_8859-9:1989',
474
      'L5',
475
      'LATIN5',
476
      'CSISOLATIN5',
477
      'ISO-8859-10',
478
      'ISO-IR-157',
479
      'ISO8859-10',
480
      'ISO_8859-10',
481
      'ISO_8859-10:1992',
482
      'L6',
483
      'LATIN6',
484
      'CSISOLATIN6',
485
      'ISO-8859-11',
486
      'ISO8859-11',
487
      'ISO_8859-11',
488
      'ISO-8859-13',
489
      'ISO-IR-179',
490
      'ISO8859-13',
491
      'ISO_8859-13',
492
      'L7',
493
      'LATIN7',
494
      'ISO-8859-14',
495
      'ISO-CELTIC',
496
      'ISO-IR-199',
497
      'ISO8859-14',
498
      'ISO_8859-14',
499
      'ISO_8859-14:1998',
500
      'L8',
501
      'LATIN8',
502
      'ISO-8859-15',
503
      'ISO-IR-203',
504
      'ISO8859-15',
505
      'ISO_8859-15',
506
      'ISO_8859-15:1998',
507
      'LATIN-9',
508
      'ISO-8859-16',
509
      'ISO-IR-226',
510
      'ISO8859-16',
511
      'ISO_8859-16',
512
      'ISO_8859-16:2001',
513
      'L10',
514
      'LATIN10',
515
      'KOI8-R',
516
      'CSKOI8R',
517
      'KOI8-U',
518
      'KOI8-RU',
519
      'CP1250',
520
      'MS-EE',
521
      'WINDOWS-1250',
522
      'CP1251',
523
      'MS-CYRL',
524
      'WINDOWS-1251',
525
      'CP1252',
526
      'MS-ANSI',
527
      'WINDOWS-1252',
528
      'CP1253',
529
      'MS-GREEK',
530
      'WINDOWS-1253',
531
      'CP1254',
532
      'MS-TURK',
533
      'WINDOWS-1254',
534
      'CP1255',
535
      'MS-HEBR',
536
      'WINDOWS-1255',
537
      'CP1256',
538
      'MS-ARAB',
539
      'WINDOWS-1256',
540
      'CP1257',
541
      'WINBALTRIM',
542
      'WINDOWS-1257',
543
      'CP1258',
544
      'WINDOWS-1258',
545
      '850',
546
      'CP850',
547
      'IBM850',
548
      'CSPC850MULTILINGUAL',
549
      '862',
550
      'CP862',
551
      'IBM862',
552
      'CSPC862LATINHEBREW',
553
      '866',
554
      'CP866',
555
      'IBM866',
556
      'CSIBM866',
557
      'MAC',
558
      'MACINTOSH',
559
      'MACROMAN',
560
      'CSMACINTOSH',
561
      'MACCENTRALEUROPE',
562
      'MACICELAND',
563
      'MACCROATIAN',
564
      'MACROMANIA',
565
      'MACCYRILLIC',
566
      'MACUKRAINE',
567
      'MACGREEK',
568
      'MACTURKISH',
569
      'MACHEBREW',
570
      'MACARABIC',
571
      'MACTHAI',
572
      'HP-ROMAN8',
573
      'R8',
574
      'ROMAN8',
575
      'CSHPROMAN8',
576
      'NEXTSTEP',
577
      'ARMSCII-8',
578
      'GEORGIAN-ACADEMY',
579
      'GEORGIAN-PS',
580
      'KOI8-T',
581
      'CP154',
582
      'CYRILLIC-ASIAN',
583
      'PT154',
584
      'PTCP154',
585
      'CSPTCP154',
586
      'KZ-1048',
587
      'RK1048',
588
      'STRK1048-2002',
589
      'CSKZ1048',
590
      'MULELAO-1',
591
      'CP1133',
592
      'IBM-CP1133',
593
      'ISO-IR-166',
594
      'TIS-620',
595
      'TIS620',
596
      'TIS620-0',
597
      'TIS620.2529-1',
598
      'TIS620.2533-0',
599
      'TIS620.2533-1',
600
      'CP874',
601
      'WINDOWS-874',
602
      'VISCII',
603
      'VISCII1.1-1',
604
      'CSVISCII',
605
      'TCVN',
606
      'TCVN-5712',
607
      'TCVN5712-1',
608
      'TCVN5712-1:1993',
609
      'ISO-IR-14',
610
      'ISO646-JP',
611
      'JIS_C6220-1969-RO',
612
      'JP',
613
      'CSISO14JISC6220RO',
614
      'JISX0201-1976',
615
      'JIS_X0201',
616
      'X0201',
617
      'CSHALFWIDTHKATAKANA',
618
      'ISO-IR-87',
619
      'JIS0208',
620
      'JIS_C6226-1983',
621
      'JIS_X0208',
622
      'JIS_X0208-1983',
623
      'JIS_X0208-1990',
624
      'X0208',
625
      'CSISO87JISX0208',
626
      'ISO-IR-159',
627
      'JIS_X0212',
628
      'JIS_X0212-1990',
629
      'JIS_X0212.1990-0',
630
      'X0212',
631
      'CSISO159JISX02121990',
632
      'CN',
633
      'GB_1988-80',
634
      'ISO-IR-57',
635
      'ISO646-CN',
636
      'CSISO57GB1988',
637
      'CHINESE',
638
      'GB_2312-80',
639
      'ISO-IR-58',
640
      'CSISO58GB231280',
641
      'CN-GB-ISOIR165',
642
      'ISO-IR-165',
643
      'ISO-IR-149',
644
      'KOREAN',
645
      'KSC_5601',
646
      'KS_C_5601-1987',
647
      'KS_C_5601-1989',
648
      'CSKSC56011987',
649
      'EUC-JP',
650
      'EUCJP',
651
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
652
      'CSEUCPKDFMTJAPANESE',
653
      'MS_KANJI',
654
      'SHIFT-JIS',
655
      'SHIFT_JIS',
656
      'SJIS',
657
      'CSSHIFTJIS',
658
      'CP932',
659
      'ISO-2022-JP',
660
      'CSISO2022JP',
661
      'ISO-2022-JP-1',
662
      'ISO-2022-JP-2',
663
      'CSISO2022JP2',
664
      'CN-GB',
665
      'EUC-CN',
666
      'EUCCN',
667
      'GB2312',
668
      'CSGB2312',
669
      'GBK',
670
      'CP936',
671
      'MS936',
672
      'WINDOWS-936',
673
      'GB18030',
674
      'ISO-2022-CN',
675
      'CSISO2022CN',
676
      'ISO-2022-CN-EXT',
677
      'HZ',
678
      'HZ-GB-2312',
679
      'EUC-TW',
680
      'EUCTW',
681
      'CSEUCTW',
682
      'BIG-5',
683
      'BIG-FIVE',
684
      'BIG5',
685
      'BIGFIVE',
686
      'CN-BIG5',
687
      'CSBIG5',
688
      'CP950',
689
      'BIG5-HKSCS:1999',
690
      'BIG5-HKSCS:2001',
691
      'BIG5-HKSCS',
692
      'BIG5-HKSCS:2004',
693
      'BIG5HKSCS',
694
      'EUC-KR',
695
      'EUCKR',
696
      'CSEUCKR',
697
      'CP949',
698
      'UHC',
699
      'CP1361',
700
      'JOHAB',
701
      'ISO-2022-KR',
702
      'CSISO2022KR',
703
      'CP856',
704
      'CP922',
705
      'CP943',
706
      'CP1046',
707
      'CP1124',
708
      'CP1129',
709
      'CP1161',
710
      'IBM-1161',
711
      'IBM1161',
712
      'CSIBM1161',
713
      'CP1162',
714
      'IBM-1162',
715
      'IBM1162',
716
      'CSIBM1162',
717
      'CP1163',
718
      'IBM-1163',
719
      'IBM1163',
720
      'CSIBM1163',
721
      'DEC-KANJI',
722
      'DEC-HANYU',
723
      '437',
724
      'CP437',
725
      'IBM437',
726
      'CSPC8CODEPAGE437',
727
      'CP737',
728
      'CP775',
729
      'IBM775',
730
      'CSPC775BALTIC',
731
      '852',
732
      'CP852',
733
      'IBM852',
734
      'CSPCP852',
735
      'CP853',
736
      '855',
737
      'CP855',
738
      'IBM855',
739
      'CSIBM855',
740
      '857',
741
      'CP857',
742
      'IBM857',
743
      'CSIBM857',
744
      'CP858',
745
      '860',
746
      'CP860',
747
      'IBM860',
748
      'CSIBM860',
749
      '861',
750
      'CP-IS',
751
      'CP861',
752
      'IBM861',
753
      'CSIBM861',
754
      '863',
755
      'CP863',
756
      'IBM863',
757
      'CSIBM863',
758
      'CP864',
759
      'IBM864',
760
      'CSIBM864',
761
      '865',
762
      'CP865',
763
      'IBM865',
764
      'CSIBM865',
765
      '869',
766
      'CP-GR',
767
      'CP869',
768
      'IBM869',
769
      'CSIBM869',
770
      'CP1125',
771
      'EUC-JISX0213',
772
      'SHIFT_JISX0213',
773
      'ISO-2022-JP-3',
774
      'BIG5-2003',
775
      'ISO-IR-230',
776
      'TDS565',
777
      'ATARI',
778
      'ATARIST',
779
      'RISCOS-LATIN1',
780
  );
781
782
  /**
783
   * @var array
784
   */
785
  private static $support = array();
786
787
  /**
788
   * __construct()
789
   */
790 1
  public function __construct()
791
  {
792 1
    self::checkForSupport();
793 1
  }
794
795
  /**
796
   * Returns a single UTF-8 character from string.
797
   *
798
   * @param    string $str A UTF-8 string.
799
   * @param    int    $pos The position of character to return.
800
   *
801
   * @return   string Single Multi-Byte character.
802
   */
803 1
  public static function access($str, $pos)
804
  {
805
    // Return the character at the specified position: $str[1] like functionality.
806
807 1
    return self::substr($str, $pos, 1);
808
  }
809
810
  /**
811
   * Prepends BOM character to the string and returns the whole string.
812
   *
813
   * INFO: If BOM already existed there, the Input string is returned.
814
   *
815
   * @param    string $str The input string
816
   *
817
   * @return   string The output string that contains BOM
818
   */
819
  public static function add_bom_to_string($str)
820
  {
821
    if (!self::is_bom(substr($str, 0, 3))) {
822
      $str = self::bom() . $str;
823
    }
824
825
    return $str;
826
  }
827
828
  /**
829
   * Returns the Byte Order Mark Character.
830
   *
831
   * @return   string Byte Order Mark
832
   */
833 2
  public static function bom()
834
  {
835 2
    return "\xEF\xBB\xBF";
836
  }
837
838
  /**
839
   * @alias of UTF8::chr_map()
840
   *
841
   * @param $callback
842
   * @param $str
843
   *
844
   * @return array
845
   */
846 1
  public static function callback($callback, $str)
847
  {
848 1
    return self::chr_map($callback, $str);
849
  }
850
851
  /**
852
   * Returns an array of all lower and upper case UTF-8 encoded characters.
853
   *
854
   * @return   string An array with lower case chars as keys and upper chars as values.
855
   */
856
  protected static function case_table()
857
  {
858
    static $case = array(
859
860
      // lower => upper
861
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
862
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
863
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
864
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
865
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
866
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
867
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
868
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
869
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
870
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
871
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
872
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
873
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
874
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
875
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
876
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
877
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
878
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
879
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
880
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
881
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
882
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
883
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
884
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
885
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
886
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
887
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
888
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
889
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
890
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
891
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
892
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
893
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
894
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
895
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
896
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
897
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
898
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
899
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
900
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
901
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
902
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
903
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
904
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
905
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
906
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
907
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
908
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
909
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
910
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
911
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
912
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
913
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
914
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
915
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
916
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
917
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
918
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
919
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
920
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
921
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
922
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
923
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
924
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
925
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
926
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
927
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
928
      "\xea\x9e\x87"     => "\xea\x9e\x86",
929
      "\xea\x9e\x85"     => "\xea\x9e\x84",
930
      "\xea\x9e\x83"     => "\xea\x9e\x82",
931
      "\xea\x9e\x81"     => "\xea\x9e\x80",
932
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
933
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
934
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
935
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
936
      "\xea\x9d\xad"     => "\xea\x9d\xac",
937
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
938
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
939
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
940
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
941
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
942
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
943
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
944
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
945
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
946
      "\xea\x9d\x99"     => "\xea\x9d\x98",
947
      "\xea\x9d\x97"     => "\xea\x9d\x96",
948
      "\xea\x9d\x95"     => "\xea\x9d\x94",
949
      "\xea\x9d\x93"     => "\xea\x9d\x92",
950
      "\xea\x9d\x91"     => "\xea\x9d\x90",
951
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
952
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
953
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
954
      "\xea\x9d\x89"     => "\xea\x9d\x88",
955
      "\xea\x9d\x87"     => "\xea\x9d\x86",
956
      "\xea\x9d\x85"     => "\xea\x9d\x84",
957
      "\xea\x9d\x83"     => "\xea\x9d\x82",
958
      "\xea\x9d\x81"     => "\xea\x9d\x80",
959
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
960
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
961
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
962
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
963
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
964
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
965
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
966
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
967
      "\xea\x9c\xad"     => "\xea\x9c\xac",
968
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
969
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
970
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
971
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
972
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
973
      "\xea\x9a\x97"     => "\xea\x9a\x96",
974
      "\xea\x9a\x95"     => "\xea\x9a\x94",
975
      "\xea\x9a\x93"     => "\xea\x9a\x92",
976
      "\xea\x9a\x91"     => "\xea\x9a\x90",
977
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
978
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
979
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
980
      "\xea\x9a\x89"     => "\xea\x9a\x88",
981
      "\xea\x9a\x87"     => "\xea\x9a\x86",
982
      "\xea\x9a\x85"     => "\xea\x9a\x84",
983
      "\xea\x9a\x83"     => "\xea\x9a\x82",
984
      "\xea\x9a\x81"     => "\xea\x9a\x80",
985
      "\xea\x99\xad"     => "\xea\x99\xac",
986
      "\xea\x99\xab"     => "\xea\x99\xaa",
987
      "\xea\x99\xa9"     => "\xea\x99\xa8",
988
      "\xea\x99\xa7"     => "\xea\x99\xa6",
989
      "\xea\x99\xa5"     => "\xea\x99\xa4",
990
      "\xea\x99\xa3"     => "\xea\x99\xa2",
991
      "\xea\x99\x9f"     => "\xea\x99\x9e",
992
      "\xea\x99\x9d"     => "\xea\x99\x9c",
993
      "\xea\x99\x9b"     => "\xea\x99\x9a",
994
      "\xea\x99\x99"     => "\xea\x99\x98",
995
      "\xea\x99\x97"     => "\xea\x99\x96",
996
      "\xea\x99\x95"     => "\xea\x99\x94",
997
      "\xea\x99\x93"     => "\xea\x99\x92",
998
      "\xea\x99\x91"     => "\xea\x99\x90",
999
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1000
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1001
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1002
      "\xea\x99\x89"     => "\xea\x99\x88",
1003
      "\xea\x99\x87"     => "\xea\x99\x86",
1004
      "\xea\x99\x85"     => "\xea\x99\x84",
1005
      "\xea\x99\x83"     => "\xea\x99\x82",
1006
      "\xea\x99\x81"     => "\xea\x99\x80",
1007
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1008
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1009
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1010
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1011
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1012
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1013
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1014
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1015
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1016
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1017
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1018
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1019
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1020
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1021
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1022
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1023
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1024
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1025
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1026
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1027
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1028
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1029
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1030
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1031
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1032
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1033
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1034
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1035
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1036
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1037
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1038
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1039
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1040
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1041
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1042
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1043
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1044
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1045
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1046
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1047
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1048
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1049
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1050
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1051
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1052
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1053
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1054
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1055
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1056
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1057
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1058
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1059
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1060
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1061
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1062
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1063
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1064
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1065
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1066
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1067
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1068
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1069
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1070
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1071
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1072
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1073
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1074
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1075
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1076
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1077
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1078
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1079
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1080
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1081
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1082
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1083
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1084
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1085
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1086
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1087
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1088
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1089
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1090
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1091
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1092
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1093
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1094
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1095
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1096
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1097
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1098
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1099
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1100
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1101
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1102
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1103
      "\xe2\xb1\xa5"     => "\xc8\xba",
1104
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1105
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1106
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1107
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1108
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1109
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1110
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1111
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1112
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1113
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1114
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1115
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1116
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1117
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1118
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1119
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1120
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1121
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1122
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1123
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1124
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1125
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1126
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1127
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1128
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1129
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1130
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1131
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1132
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1133
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1134
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1135
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1136
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1137
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1138
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1139
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1140
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1141
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1142
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1143
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1144
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1145
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1146
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1147
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1148
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1149
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1150
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1151
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1152
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1153
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1154
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1155
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1156
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1157
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1158
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1159
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1160
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1161
      "\xe1\xbe\xbe"     => "\xce\x99",
1162
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1163
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1164
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1165
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1166
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1167
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1168
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1169
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1170
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1171
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1172
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1173
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1174
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1175
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1176
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1177
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1178
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1179
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1180
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1181
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1182
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1183
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1184
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1185
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1186
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1187
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1188
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1189
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1190
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1191
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1192
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1193
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1194
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1195
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1196
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1197
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1198
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1199
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1200
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1201
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1202
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1203
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1204
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1205
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1206
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1207
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1208
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1209
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1210
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1211
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1212
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1213
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1214
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1215
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1216
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1217
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1218
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1219
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1220
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1221
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1222
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1223
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1224
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1225
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1226
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1227
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1228
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1229
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1230
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1231
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1232
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1233
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1234
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1235
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1236
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1237
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1238
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1239
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1240
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1241
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1242
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1243
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1244
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1245
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1246
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1247
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1248
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1249
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1250
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1251
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1252
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1253
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1254
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1255
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1256
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1257
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1258
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1259
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1260
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1261
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1262
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1263
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1264
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1265
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1266
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1267
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1268
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1269
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1270
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1271
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1272
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1273
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1274
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1275
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1276
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1277
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1278
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1279
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1280
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1281
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1282
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1283
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1284
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1285
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1286
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1287
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1288
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1289
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1290
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1291
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1292
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1293
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1294
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1295
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1296
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1297
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1298
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1299
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1300
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1301
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1302
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1303
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1304
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1305
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1306
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1307
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1308
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1309
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1310
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1311
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1312
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1313
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1314
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1315
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1316
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1317
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1318
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1319
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1320
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1321
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1322
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1323
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1324
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1325
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1326
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1327
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1328
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1329
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1330
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1331
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1332
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1333
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1334
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1335
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1336
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1337
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1338
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1339
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1340
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1341
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1342
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1343
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1344
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1345
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1346
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1347
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1348
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1349
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1350
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1351
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1352
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1353
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1354
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1355
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1356
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1357
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1358
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1359
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1360
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1361
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1362
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1363
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1364
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1365
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1366
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1367
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1368
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1369
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1370
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1371
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1372
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1373
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1374
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1375
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1376
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1377
      "\xd6\x86"         => "\xd5\x96",
1378
      "\xd6\x85"         => "\xd5\x95",
1379
      "\xd6\x84"         => "\xd5\x94",
1380
      "\xd6\x83"         => "\xd5\x93",
1381
      "\xd6\x82"         => "\xd5\x92",
1382
      "\xd6\x81"         => "\xd5\x91",
1383
      "\xd6\x80"         => "\xd5\x90",
1384
      "\xd5\xbf"         => "\xd5\x8f",
1385
      "\xd5\xbe"         => "\xd5\x8e",
1386
      "\xd5\xbd"         => "\xd5\x8d",
1387
      "\xd5\xbc"         => "\xd5\x8c",
1388
      "\xd5\xbb"         => "\xd5\x8b",
1389
      "\xd5\xba"         => "\xd5\x8a",
1390
      "\xd5\xb9"         => "\xd5\x89",
1391
      "\xd5\xb8"         => "\xd5\x88",
1392
      "\xd5\xb7"         => "\xd5\x87",
1393
      "\xd5\xb6"         => "\xd5\x86",
1394
      "\xd5\xb5"         => "\xd5\x85",
1395
      "\xd5\xb4"         => "\xd5\x84",
1396
      "\xd5\xb3"         => "\xd5\x83",
1397
      "\xd5\xb2"         => "\xd5\x82",
1398
      "\xd5\xb1"         => "\xd5\x81",
1399
      "\xd5\xb0"         => "\xd5\x80",
1400
      "\xd5\xaf"         => "\xd4\xbf",
1401
      "\xd5\xae"         => "\xd4\xbe",
1402
      "\xd5\xad"         => "\xd4\xbd",
1403
      "\xd5\xac"         => "\xd4\xbc",
1404
      "\xd5\xab"         => "\xd4\xbb",
1405
      "\xd5\xaa"         => "\xd4\xba",
1406
      "\xd5\xa9"         => "\xd4\xb9",
1407
      "\xd5\xa8"         => "\xd4\xb8",
1408
      "\xd5\xa7"         => "\xd4\xb7",
1409
      "\xd5\xa6"         => "\xd4\xb6",
1410
      "\xd5\xa5"         => "\xd4\xb5",
1411
      "\xd5\xa4"         => "\xd4\xb4",
1412
      "\xd5\xa3"         => "\xd4\xb3",
1413
      "\xd5\xa2"         => "\xd4\xb2",
1414
      "\xd5\xa1"         => "\xd4\xb1",
1415
      "\xd4\xa5"         => "\xd4\xa4",
1416
      "\xd4\xa3"         => "\xd4\xa2",
1417
      "\xd4\xa1"         => "\xd4\xa0",
1418
      "\xd4\x9f"         => "\xd4\x9e",
1419
      "\xd4\x9d"         => "\xd4\x9c",
1420
      "\xd4\x9b"         => "\xd4\x9a",
1421
      "\xd4\x99"         => "\xd4\x98",
1422
      "\xd4\x97"         => "\xd4\x96",
1423
      "\xd4\x95"         => "\xd4\x94",
1424
      "\xd4\x93"         => "\xd4\x92",
1425
      "\xd4\x91"         => "\xd4\x90",
1426
      "\xd4\x8f"         => "\xd4\x8e",
1427
      "\xd4\x8d"         => "\xd4\x8c",
1428
      "\xd4\x8b"         => "\xd4\x8a",
1429
      "\xd4\x89"         => "\xd4\x88",
1430
      "\xd4\x87"         => "\xd4\x86",
1431
      "\xd4\x85"         => "\xd4\x84",
1432
      "\xd4\x83"         => "\xd4\x82",
1433
      "\xd4\x81"         => "\xd4\x80",
1434
      "\xd3\xbf"         => "\xd3\xbe",
1435
      "\xd3\xbd"         => "\xd3\xbc",
1436
      "\xd3\xbb"         => "\xd3\xba",
1437
      "\xd3\xb9"         => "\xd3\xb8",
1438
      "\xd3\xb7"         => "\xd3\xb6",
1439
      "\xd3\xb5"         => "\xd3\xb4",
1440
      "\xd3\xb3"         => "\xd3\xb2",
1441
      "\xd3\xb1"         => "\xd3\xb0",
1442
      "\xd3\xaf"         => "\xd3\xae",
1443
      "\xd3\xad"         => "\xd3\xac",
1444
      "\xd3\xab"         => "\xd3\xaa",
1445
      "\xd3\xa9"         => "\xd3\xa8",
1446
      "\xd3\xa7"         => "\xd3\xa6",
1447
      "\xd3\xa5"         => "\xd3\xa4",
1448
      "\xd3\xa3"         => "\xd3\xa2",
1449
      "\xd3\xa1"         => "\xd3\xa0",
1450
      "\xd3\x9f"         => "\xd3\x9e",
1451
      "\xd3\x9d"         => "\xd3\x9c",
1452
      "\xd3\x9b"         => "\xd3\x9a",
1453
      "\xd3\x99"         => "\xd3\x98",
1454
      "\xd3\x97"         => "\xd3\x96",
1455
      "\xd3\x95"         => "\xd3\x94",
1456
      "\xd3\x93"         => "\xd3\x92",
1457
      "\xd3\x91"         => "\xd3\x90",
1458
      "\xd3\x8f"         => "\xd3\x80",
1459
      "\xd3\x8e"         => "\xd3\x8d",
1460
      "\xd3\x8c"         => "\xd3\x8b",
1461
      "\xd3\x8a"         => "\xd3\x89",
1462
      "\xd3\x88"         => "\xd3\x87",
1463
      "\xd3\x86"         => "\xd3\x85",
1464
      "\xd3\x84"         => "\xd3\x83",
1465
      "\xd3\x82"         => "\xd3\x81",
1466
      "\xd2\xbf"         => "\xd2\xbe",
1467
      "\xd2\xbd"         => "\xd2\xbc",
1468
      "\xd2\xbb"         => "\xd2\xba",
1469
      "\xd2\xb9"         => "\xd2\xb8",
1470
      "\xd2\xb7"         => "\xd2\xb6",
1471
      "\xd2\xb5"         => "\xd2\xb4",
1472
      "\xd2\xb3"         => "\xd2\xb2",
1473
      "\xd2\xb1"         => "\xd2\xb0",
1474
      "\xd2\xaf"         => "\xd2\xae",
1475
      "\xd2\xad"         => "\xd2\xac",
1476
      "\xd2\xab"         => "\xd2\xaa",
1477
      "\xd2\xa9"         => "\xd2\xa8",
1478
      "\xd2\xa7"         => "\xd2\xa6",
1479
      "\xd2\xa5"         => "\xd2\xa4",
1480
      "\xd2\xa3"         => "\xd2\xa2",
1481
      "\xd2\xa1"         => "\xd2\xa0",
1482
      "\xd2\x9f"         => "\xd2\x9e",
1483
      "\xd2\x9d"         => "\xd2\x9c",
1484
      "\xd2\x9b"         => "\xd2\x9a",
1485
      "\xd2\x99"         => "\xd2\x98",
1486
      "\xd2\x97"         => "\xd2\x96",
1487
      "\xd2\x95"         => "\xd2\x94",
1488
      "\xd2\x93"         => "\xd2\x92",
1489
      "\xd2\x91"         => "\xd2\x90",
1490
      "\xd2\x8f"         => "\xd2\x8e",
1491
      "\xd2\x8d"         => "\xd2\x8c",
1492
      "\xd2\x8b"         => "\xd2\x8a",
1493
      "\xd2\x81"         => "\xd2\x80",
1494
      "\xd1\xbf"         => "\xd1\xbe",
1495
      "\xd1\xbd"         => "\xd1\xbc",
1496
      "\xd1\xbb"         => "\xd1\xba",
1497
      "\xd1\xb9"         => "\xd1\xb8",
1498
      "\xd1\xb7"         => "\xd1\xb6",
1499
      "\xd1\xb5"         => "\xd1\xb4",
1500
      "\xd1\xb3"         => "\xd1\xb2",
1501
      "\xd1\xb1"         => "\xd1\xb0",
1502
      "\xd1\xaf"         => "\xd1\xae",
1503
      "\xd1\xad"         => "\xd1\xac",
1504
      "\xd1\xab"         => "\xd1\xaa",
1505
      "\xd1\xa9"         => "\xd1\xa8",
1506
      "\xd1\xa7"         => "\xd1\xa6",
1507
      "\xd1\xa5"         => "\xd1\xa4",
1508
      "\xd1\xa3"         => "\xd1\xa2",
1509
      "\xd1\xa1"         => "\xd1\xa0",
1510
      "\xd1\x9f"         => "\xd0\x8f",
1511
      "\xd1\x9e"         => "\xd0\x8e",
1512
      "\xd1\x9d"         => "\xd0\x8d",
1513
      "\xd1\x9c"         => "\xd0\x8c",
1514
      "\xd1\x9b"         => "\xd0\x8b",
1515
      "\xd1\x9a"         => "\xd0\x8a",
1516
      "\xd1\x99"         => "\xd0\x89",
1517
      "\xd1\x98"         => "\xd0\x88",
1518
      "\xd1\x97"         => "\xd0\x87",
1519
      "\xd1\x96"         => "\xd0\x86",
1520
      "\xd1\x95"         => "\xd0\x85",
1521
      "\xd1\x94"         => "\xd0\x84",
1522
      "\xd1\x93"         => "\xd0\x83",
1523
      "\xd1\x92"         => "\xd0\x82",
1524
      "\xd1\x91"         => "\xd0\x81",
1525
      "\xd1\x90"         => "\xd0\x80",
1526
      "\xd1\x8f"         => "\xd0\xaf",
1527
      "\xd1\x8e"         => "\xd0\xae",
1528
      "\xd1\x8d"         => "\xd0\xad",
1529
      "\xd1\x8c"         => "\xd0\xac",
1530
      "\xd1\x8b"         => "\xd0\xab",
1531
      "\xd1\x8a"         => "\xd0\xaa",
1532
      "\xd1\x89"         => "\xd0\xa9",
1533
      "\xd1\x88"         => "\xd0\xa8",
1534
      "\xd1\x87"         => "\xd0\xa7",
1535
      "\xd1\x86"         => "\xd0\xa6",
1536
      "\xd1\x85"         => "\xd0\xa5",
1537
      "\xd1\x84"         => "\xd0\xa4",
1538
      "\xd1\x83"         => "\xd0\xa3",
1539
      "\xd1\x82"         => "\xd0\xa2",
1540
      "\xd1\x81"         => "\xd0\xa1",
1541
      "\xd1\x80"         => "\xd0\xa0",
1542
      "\xd0\xbf"         => "\xd0\x9f",
1543
      "\xd0\xbe"         => "\xd0\x9e",
1544
      "\xd0\xbd"         => "\xd0\x9d",
1545
      "\xd0\xbc"         => "\xd0\x9c",
1546
      "\xd0\xbb"         => "\xd0\x9b",
1547
      "\xd0\xba"         => "\xd0\x9a",
1548
      "\xd0\xb9"         => "\xd0\x99",
1549
      "\xd0\xb8"         => "\xd0\x98",
1550
      "\xd0\xb7"         => "\xd0\x97",
1551
      "\xd0\xb6"         => "\xd0\x96",
1552
      "\xd0\xb5"         => "\xd0\x95",
1553
      "\xd0\xb4"         => "\xd0\x94",
1554
      "\xd0\xb3"         => "\xd0\x93",
1555
      "\xd0\xb2"         => "\xd0\x92",
1556
      "\xd0\xb1"         => "\xd0\x91",
1557
      "\xd0\xb0"         => "\xd0\x90",
1558
      "\xcf\xbb"         => "\xcf\xba",
1559
      "\xcf\xb8"         => "\xcf\xb7",
1560
      "\xcf\xb5"         => "\xce\x95",
1561
      "\xcf\xb2"         => "\xcf\xb9",
1562
      "\xcf\xb1"         => "\xce\xa1",
1563
      "\xcf\xb0"         => "\xce\x9a",
1564
      "\xcf\xaf"         => "\xcf\xae",
1565
      "\xcf\xad"         => "\xcf\xac",
1566
      "\xcf\xab"         => "\xcf\xaa",
1567
      "\xcf\xa9"         => "\xcf\xa8",
1568
      "\xcf\xa7"         => "\xcf\xa6",
1569
      "\xcf\xa5"         => "\xcf\xa4",
1570
      "\xcf\xa3"         => "\xcf\xa2",
1571
      "\xcf\xa1"         => "\xcf\xa0",
1572
      "\xcf\x9f"         => "\xcf\x9e",
1573
      "\xcf\x9d"         => "\xcf\x9c",
1574
      "\xcf\x9b"         => "\xcf\x9a",
1575
      "\xcf\x99"         => "\xcf\x98",
1576
      "\xcf\x97"         => "\xcf\x8f",
1577
      "\xcf\x96"         => "\xce\xa0",
1578
      "\xcf\x95"         => "\xce\xa6",
1579
      "\xcf\x91"         => "\xce\x98",
1580
      "\xcf\x90"         => "\xce\x92",
1581
      "\xcf\x8e"         => "\xce\x8f",
1582
      "\xcf\x8d"         => "\xce\x8e",
1583
      "\xcf\x8c"         => "\xce\x8c",
1584
      "\xcf\x8b"         => "\xce\xab",
1585
      "\xcf\x8a"         => "\xce\xaa",
1586
      "\xcf\x89"         => "\xce\xa9",
1587
      "\xcf\x88"         => "\xce\xa8",
1588
      "\xcf\x87"         => "\xce\xa7",
1589
      "\xcf\x86"         => "\xce\xa6",
1590
      "\xcf\x85"         => "\xce\xa5",
1591
      "\xcf\x84"         => "\xce\xa4",
1592
      "\xcf\x83"         => "\xce\xa3",
1593
      "\xcf\x82"         => "\xce\xa3",
1594
      "\xcf\x81"         => "\xce\xa1",
1595
      "\xcf\x80"         => "\xce\xa0",
1596
      "\xce\xbf"         => "\xce\x9f",
1597
      "\xce\xbe"         => "\xce\x9e",
1598
      "\xce\xbd"         => "\xce\x9d",
1599
      "\xce\xbc"         => "\xce\x9c",
1600
      "\xce\xbb"         => "\xce\x9b",
1601
      "\xce\xba"         => "\xce\x9a",
1602
      "\xce\xb9"         => "\xce\x99",
1603
      "\xce\xb8"         => "\xce\x98",
1604
      "\xce\xb7"         => "\xce\x97",
1605
      "\xce\xb6"         => "\xce\x96",
1606
      "\xce\xb5"         => "\xce\x95",
1607
      "\xce\xb4"         => "\xce\x94",
1608
      "\xce\xb3"         => "\xce\x93",
1609
      "\xce\xb2"         => "\xce\x92",
1610
      "\xce\xb1"         => "\xce\x91",
1611
      "\xce\xaf"         => "\xce\x8a",
1612
      "\xce\xae"         => "\xce\x89",
1613
      "\xce\xad"         => "\xce\x88",
1614
      "\xce\xac"         => "\xce\x86",
1615
      "\xcd\xbd"         => "\xcf\xbf",
1616
      "\xcd\xbc"         => "\xcf\xbe",
1617
      "\xcd\xbb"         => "\xcf\xbd",
1618
      "\xcd\xb7"         => "\xcd\xb6",
1619
      "\xcd\xb3"         => "\xcd\xb2",
1620
      "\xcd\xb1"         => "\xcd\xb0",
1621
      "\xca\x92"         => "\xc6\xb7",
1622
      "\xca\x8c"         => "\xc9\x85",
1623
      "\xca\x8b"         => "\xc6\xb2",
1624
      "\xca\x8a"         => "\xc6\xb1",
1625
      "\xca\x89"         => "\xc9\x84",
1626
      "\xca\x88"         => "\xc6\xae",
1627
      "\xca\x83"         => "\xc6\xa9",
1628
      "\xca\x80"         => "\xc6\xa6",
1629
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1630
      "\xc9\xb5"         => "\xc6\x9f",
1631
      "\xc9\xb2"         => "\xc6\x9d",
1632
      "\xc9\xb1"         => "\xe2\xb1\xae",
1633
      "\xc9\xaf"         => "\xc6\x9c",
1634
      "\xc9\xab"         => "\xe2\xb1\xa2",
1635
      "\xc9\xa9"         => "\xc6\x96",
1636
      "\xc9\xa8"         => "\xc6\x97",
1637
      "\xc9\xa5"         => "\xea\x9e\x8d",
1638
      "\xc9\xa3"         => "\xc6\x94",
1639
      "\xc9\xa0"         => "\xc6\x93",
1640
      "\xc9\x9b"         => "\xc6\x90",
1641
      "\xc9\x99"         => "\xc6\x8f",
1642
      "\xc9\x97"         => "\xc6\x8a",
1643
      "\xc9\x96"         => "\xc6\x89",
1644
      "\xc9\x94"         => "\xc6\x86",
1645
      "\xc9\x93"         => "\xc6\x81",
1646
      "\xc9\x92"         => "\xe2\xb1\xb0",
1647
      "\xc9\x91"         => "\xe2\xb1\xad",
1648
      "\xc9\x90"         => "\xe2\xb1\xaf",
1649
      "\xc9\x8f"         => "\xc9\x8e",
1650
      "\xc9\x8d"         => "\xc9\x8c",
1651
      "\xc9\x8b"         => "\xc9\x8a",
1652
      "\xc9\x89"         => "\xc9\x88",
1653
      "\xc9\x87"         => "\xc9\x86",
1654
      "\xc9\x82"         => "\xc9\x81",
1655
      "\xc9\x80"         => "\xe2\xb1\xbf",
1656
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1657
      "\xc8\xbc"         => "\xc8\xbb",
1658
      "\xc8\xb3"         => "\xc8\xb2",
1659
      "\xc8\xb1"         => "\xc8\xb0",
1660
      "\xc8\xaf"         => "\xc8\xae",
1661
      "\xc8\xad"         => "\xc8\xac",
1662
      "\xc8\xab"         => "\xc8\xaa",
1663
      "\xc8\xa9"         => "\xc8\xa8",
1664
      "\xc8\xa7"         => "\xc8\xa6",
1665
      "\xc8\xa5"         => "\xc8\xa4",
1666
      "\xc8\xa3"         => "\xc8\xa2",
1667
      "\xc8\x9f"         => "\xc8\x9e",
1668
      "\xc8\x9d"         => "\xc8\x9c",
1669
      "\xc8\x9b"         => "\xc8\x9a",
1670
      "\xc8\x99"         => "\xc8\x98",
1671
      "\xc8\x97"         => "\xc8\x96",
1672
      "\xc8\x95"         => "\xc8\x94",
1673
      "\xc8\x93"         => "\xc8\x92",
1674
      "\xc8\x91"         => "\xc8\x90",
1675
      "\xc8\x8f"         => "\xc8\x8e",
1676
      "\xc8\x8d"         => "\xc8\x8c",
1677
      "\xc8\x8b"         => "\xc8\x8a",
1678
      "\xc8\x89"         => "\xc8\x88",
1679
      "\xc8\x87"         => "\xc8\x86",
1680
      "\xc8\x85"         => "\xc8\x84",
1681
      "\xc8\x83"         => "\xc8\x82",
1682
      "\xc8\x81"         => "\xc8\x80",
1683
      "\xc7\xbf"         => "\xc7\xbe",
1684
      "\xc7\xbd"         => "\xc7\xbc",
1685
      "\xc7\xbb"         => "\xc7\xba",
1686
      "\xc7\xb9"         => "\xc7\xb8",
1687
      "\xc7\xb5"         => "\xc7\xb4",
1688
      "\xc7\xb3"         => "\xc7\xb2",
1689
      "\xc7\xaf"         => "\xc7\xae",
1690
      "\xc7\xad"         => "\xc7\xac",
1691
      "\xc7\xab"         => "\xc7\xaa",
1692
      "\xc7\xa9"         => "\xc7\xa8",
1693
      "\xc7\xa7"         => "\xc7\xa6",
1694
      "\xc7\xa5"         => "\xc7\xa4",
1695
      "\xc7\xa3"         => "\xc7\xa2",
1696
      "\xc7\xa1"         => "\xc7\xa0",
1697
      "\xc7\x9f"         => "\xc7\x9e",
1698
      "\xc7\x9d"         => "\xc6\x8e",
1699
      "\xc7\x9c"         => "\xc7\x9b",
1700
      "\xc7\x9a"         => "\xc7\x99",
1701
      "\xc7\x98"         => "\xc7\x97",
1702
      "\xc7\x96"         => "\xc7\x95",
1703
      "\xc7\x94"         => "\xc7\x93",
1704
      "\xc7\x92"         => "\xc7\x91",
1705
      "\xc7\x90"         => "\xc7\x8f",
1706
      "\xc7\x8e"         => "\xc7\x8d",
1707
      "\xc7\x8c"         => "\xc7\x8b",
1708
      "\xc7\x89"         => "\xc7\x88",
1709
      "\xc7\x86"         => "\xc7\x85",
1710
      "\xc6\xbf"         => "\xc7\xb7",
1711
      "\xc6\xbd"         => "\xc6\xbc",
1712
      "\xc6\xb9"         => "\xc6\xb8",
1713
      "\xc6\xb6"         => "\xc6\xb5",
1714
      "\xc6\xb4"         => "\xc6\xb3",
1715
      "\xc6\xb0"         => "\xc6\xaf",
1716
      "\xc6\xad"         => "\xc6\xac",
1717
      "\xc6\xa8"         => "\xc6\xa7",
1718
      "\xc6\xa5"         => "\xc6\xa4",
1719
      "\xc6\xa3"         => "\xc6\xa2",
1720
      "\xc6\xa1"         => "\xc6\xa0",
1721
      "\xc6\x9e"         => "\xc8\xa0",
1722
      "\xc6\x9a"         => "\xc8\xbd",
1723
      "\xc6\x99"         => "\xc6\x98",
1724
      "\xc6\x95"         => "\xc7\xb6",
1725
      "\xc6\x92"         => "\xc6\x91",
1726
      "\xc6\x8c"         => "\xc6\x8b",
1727
      "\xc6\x88"         => "\xc6\x87",
1728
      "\xc6\x85"         => "\xc6\x84",
1729
      "\xc6\x83"         => "\xc6\x82",
1730
      "\xc6\x80"         => "\xc9\x83",
1731
      "\xc5\xbf"         => "\x53",
1732
      "\xc5\xbe"         => "\xc5\xbd",
1733
      "\xc5\xbc"         => "\xc5\xbb",
1734
      "\xc5\xba"         => "\xc5\xb9",
1735
      "\xc5\xb7"         => "\xc5\xb6",
1736
      "\xc5\xb5"         => "\xc5\xb4",
1737
      "\xc5\xb3"         => "\xc5\xb2",
1738
      "\xc5\xb1"         => "\xc5\xb0",
1739
      "\xc5\xaf"         => "\xc5\xae",
1740
      "\xc5\xad"         => "\xc5\xac",
1741
      "\xc5\xab"         => "\xc5\xaa",
1742
      "\xc5\xa9"         => "\xc5\xa8",
1743
      "\xc5\xa7"         => "\xc5\xa6",
1744
      "\xc5\xa5"         => "\xc5\xa4",
1745
      "\xc5\xa3"         => "\xc5\xa2",
1746
      "\xc5\xa1"         => "\xc5\xa0",
1747
      "\xc5\x9f"         => "\xc5\x9e",
1748
      "\xc5\x9d"         => "\xc5\x9c",
1749
      "\xc5\x9b"         => "\xc5\x9a",
1750
      "\xc5\x99"         => "\xc5\x98",
1751
      "\xc5\x97"         => "\xc5\x96",
1752
      "\xc5\x95"         => "\xc5\x94",
1753
      "\xc5\x93"         => "\xc5\x92",
1754
      "\xc5\x91"         => "\xc5\x90",
1755
      "\xc5\x8f"         => "\xc5\x8e",
1756
      "\xc5\x8d"         => "\xc5\x8c",
1757
      "\xc5\x8b"         => "\xc5\x8a",
1758
      "\xc5\x88"         => "\xc5\x87",
1759
      "\xc5\x86"         => "\xc5\x85",
1760
      "\xc5\x84"         => "\xc5\x83",
1761
      "\xc5\x82"         => "\xc5\x81",
1762
      "\xc5\x80"         => "\xc4\xbf",
1763
      "\xc4\xbe"         => "\xc4\xbd",
1764
      "\xc4\xbc"         => "\xc4\xbb",
1765
      "\xc4\xba"         => "\xc4\xb9",
1766
      "\xc4\xb7"         => "\xc4\xb6",
1767
      "\xc4\xb5"         => "\xc4\xb4",
1768
      "\xc4\xb3"         => "\xc4\xb2",
1769
      "\xc4\xb1"         => "\x49",
1770
      "\xc4\xaf"         => "\xc4\xae",
1771
      "\xc4\xad"         => "\xc4\xac",
1772
      "\xc4\xab"         => "\xc4\xaa",
1773
      "\xc4\xa9"         => "\xc4\xa8",
1774
      "\xc4\xa7"         => "\xc4\xa6",
1775
      "\xc4\xa5"         => "\xc4\xa4",
1776
      "\xc4\xa3"         => "\xc4\xa2",
1777
      "\xc4\xa1"         => "\xc4\xa0",
1778
      "\xc4\x9f"         => "\xc4\x9e",
1779
      "\xc4\x9d"         => "\xc4\x9c",
1780
      "\xc4\x9b"         => "\xc4\x9a",
1781
      "\xc4\x99"         => "\xc4\x98",
1782
      "\xc4\x97"         => "\xc4\x96",
1783
      "\xc4\x95"         => "\xc4\x94",
1784
      "\xc4\x93"         => "\xc4\x92",
1785
      "\xc4\x91"         => "\xc4\x90",
1786
      "\xc4\x8f"         => "\xc4\x8e",
1787
      "\xc4\x8d"         => "\xc4\x8c",
1788
      "\xc4\x8b"         => "\xc4\x8a",
1789
      "\xc4\x89"         => "\xc4\x88",
1790
      "\xc4\x87"         => "\xc4\x86",
1791
      "\xc4\x85"         => "\xc4\x84",
1792
      "\xc4\x83"         => "\xc4\x82",
1793
      "\xc4\x81"         => "\xc4\x80",
1794
      "\xc3\xbf"         => "\xc5\xb8",
1795
      "\xc3\xbe"         => "\xc3\x9e",
1796
      "\xc3\xbd"         => "\xc3\x9d",
1797
      "\xc3\xbc"         => "\xc3\x9c",
1798
      "\xc3\xbb"         => "\xc3\x9b",
1799
      "\xc3\xba"         => "\xc3\x9a",
1800
      "\xc3\xb9"         => "\xc3\x99",
1801
      "\xc3\xb8"         => "\xc3\x98",
1802
      "\xc3\xb6"         => "\xc3\x96",
1803
      "\xc3\xb5"         => "\xc3\x95",
1804
      "\xc3\xb4"         => "\xc3\x94",
1805
      "\xc3\xb3"         => "\xc3\x93",
1806
      "\xc3\xb2"         => "\xc3\x92",
1807
      "\xc3\xb1"         => "\xc3\x91",
1808
      "\xc3\xb0"         => "\xc3\x90",
1809
      "\xc3\xaf"         => "\xc3\x8f",
1810
      "\xc3\xae"         => "\xc3\x8e",
1811
      "\xc3\xad"         => "\xc3\x8d",
1812
      "\xc3\xac"         => "\xc3\x8c",
1813
      "\xc3\xab"         => "\xc3\x8b",
1814
      "\xc3\xaa"         => "\xc3\x8a",
1815
      "\xc3\xa9"         => "\xc3\x89",
1816
      "\xc3\xa8"         => "\xc3\x88",
1817
      "\xc3\xa7"         => "\xc3\x87",
1818
      "\xc3\xa6"         => "\xc3\x86",
1819
      "\xc3\xa5"         => "\xc3\x85",
1820
      "\xc3\xa4"         => "\xc3\x84",
1821
      "\xc3\xa3"         => "\xc3\x83",
1822
      "\xc3\xa2"         => "\xc3\x82",
1823
      "\xc3\xa1"         => "\xc3\x81",
1824
      "\xc3\xa0"         => "\xc3\x80",
1825
      "\xc2\xb5"         => "\xce\x9c",
1826
      "\x7a"             => "\x5a",
1827
      "\x79"             => "\x59",
1828
      "\x78"             => "\x58",
1829
      "\x77"             => "\x57",
1830
      "\x76"             => "\x56",
1831
      "\x75"             => "\x55",
1832
      "\x74"             => "\x54",
1833
      "\x73"             => "\x53",
1834
      "\x72"             => "\x52",
1835
      "\x71"             => "\x51",
1836
      "\x70"             => "\x50",
1837
      "\x6f"             => "\x4f",
1838
      "\x6e"             => "\x4e",
1839
      "\x6d"             => "\x4d",
1840
      "\x6c"             => "\x4c",
1841
      "\x6b"             => "\x4b",
1842
      "\x6a"             => "\x4a",
1843
      "\x69"             => "\x49",
1844
      "\x68"             => "\x48",
1845
      "\x67"             => "\x47",
1846
      "\x66"             => "\x46",
1847
      "\x65"             => "\x45",
1848
      "\x64"             => "\x44",
1849
      "\x63"             => "\x43",
1850
      "\x62"             => "\x42",
1851
      "\x61"             => "\x41",
1852
1853
    );
1854
1855
    return $case;
1856
  }
1857
1858
  /**
1859
   * check for UTF8-Support
1860
   */
1861 157
  public static function checkForSupport()
1862
  {
1863 157
    if (!isset(self::$support['mbstring'])) {
1864
1865 1
      self::$support['mbstring'] = self::mbstring_loaded();
1866 1
      self::$support['iconv'] = self::iconv_loaded();
1867 1
      self::$support['intl'] = self::intl_loaded();
1868 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1869 1
    }
1870 157
  }
1871
1872
  /**
1873
   * Generates a UTF-8 encoded character from the given code point.
1874
   *
1875
   * @param    int $code_point The code point for which to generate a character.
1876
   *
1877
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1878
   */
1879 8
  public static function chr($code_point)
1880
  {
1881 8
    self::checkForSupport();
1882
1883 8
    if (($i = (int)$code_point) !== $code_point) {
1884
      // $code_point is a string, lets extract int code point from it
1885
      if (!($i = (int)self::hex_to_int($code_point))) {
1886
        return '';
1887
      }
1888
    }
1889
1890 8
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1891
  }
1892
1893
  /**
1894
   * Applies callback to all characters of a string.
1895
   *
1896
   * @param    string $callback The callback function.
1897
   * @param    string $str      UTF-8 string to run callback on.
1898
   *
1899
   * @return   array The outcome of callback.
1900
   */
1901
1902 1
  public static function chr_map($callback, $str)
1903
  {
1904 1
    $chars = self::split($str);
1905
1906 1
    return array_map($callback, $chars);
1907
  }
1908
1909
  /**
1910
   * Generates an array of byte length of each character of a Unicode string.
1911
   *
1912
   * 1 byte => U+0000  - U+007F
1913
   * 2 byte => U+0080  - U+07FF
1914
   * 3 byte => U+0800  - U+FFFF
1915
   * 4 byte => U+10000 - U+10FFFF
1916
   *
1917
   * @param    string $str The original Unicode string.
1918
   *
1919
   * @return   array An array of byte lengths of each character.
1920
   */
1921 2
  public static function chr_size_list($str)
1922
  {
1923 2
    if (!$str) {
1924 2
      return array();
1925
    }
1926
1927 2
    return array_map('strlen', self::split($str));
1928
  }
1929
1930
  /**
1931
   * Get a decimal code representation of a specific character.
1932
   *
1933
   * @param   string $chr The input character
1934
   *
1935
   * @return  int
1936
   */
1937 2
  public static function chr_to_decimal($chr)
1938
  {
1939 2
    $chr = (string)$chr;
1940 2
    $code = self::ord($chr[0]);
1941 2
    $bytes = 1;
1942
1943 2
    if (!($code & 0x80)) {
1944
      // 0xxxxxxx
1945 2
      return $code;
1946
    }
1947
1948 2
    if (($code & 0xe0) === 0xc0) {
1949
      // 110xxxxx
1950 2
      $bytes = 2;
1951 2
      $code &= ~0xc0;
1952 2
    } elseif (($code & 0xf0) == 0xe0) {
1953
      // 1110xxxx
1954 1
      $bytes = 3;
1955 1
      $code &= ~0xe0;
1956 1
    } elseif (($code & 0xf8) === 0xf0) {
1957
      // 11110xxx
1958
      $bytes = 4;
1959
      $code &= ~0xf0;
1960
    }
1961
1962 2
    for ($i = 2; $i <= $bytes; $i++) {
1963
      // 10xxxxxx
1964 2
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1965 2
    }
1966
1967 2
    return $code;
1968
  }
1969
1970
  /**
1971
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1972
   *
1973
   * @param    string $chr The input character
1974
   * @param    string $pfix
1975
   *
1976
   * @return   string The code point encoded as U+xxxx
1977
   */
1978
  public static function chr_to_hex($chr, $pfix = 'U+')
1979
  {
1980
    return self::int_to_hex(self::ord($chr), $pfix);
1981
  }
1982
1983
  /**
1984
   * Splits a string into smaller chunks and multiple lines, using the specified
1985
   * line ending character.
1986
   *
1987
   * @param    string $body     The original string to be split.
1988
   * @param    int    $chunklen The maximum character length of a chunk.
1989
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1990
   *
1991
   * @return   string The chunked string
1992
   */
1993 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1994
  {
1995 1
    return implode($end, self::split($body, $chunklen));
1996
  }
1997
1998
  /**
1999
   * accepts a string and removes all non-UTF-8 characters from it.
2000
   *
2001
   * @param string $str                     The string to be sanitized.
2002
   * @param bool   $remove_bom
2003
   * @param bool   $normalize_whitespace
2004
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2005
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2006
   *
2007
   * @return string Clean UTF-8 encoded string
2008
   */
2009 35
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2010
  {
2011
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2012
    // caused connection reset problem on larger strings
2013
2014
    $regx = '/
2015
      (
2016
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2017
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2018
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2019
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2020
        ){1,100}                      # ...one or more times
2021
      )
2022
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2023
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2024 35
    /x';
2025 35
    $str = preg_replace($regx, '$1', $str);
2026
2027 35
    $str = self::replace_diamond_question_mark($str, '');
2028 35
    $str = self::remove_invisible_characters($str);
2029
2030 35
    if ($normalize_whitespace === true) {
2031 7
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2032 7
    }
2033
2034 35
    if ($normalize_msword === true) {
2035 1
      $str = self::normalize_msword($str);
2036 1
    }
2037
2038 35
    if ($remove_bom === true) {
2039 4
      $str = self::removeBOM($str, true, false, false);
2040 4
    }
2041
2042 35
    return $str;
2043
  }
2044
2045
  /**
2046
   * Clean-up a and show only printable UTF-8 chars at the end.
2047
   *
2048
   * @param string|false $str
2049
   *
2050
   * @return string
2051
   */
2052 3
  public static function cleanup($str)
2053
  {
2054 3
    $str = (string)$str;
2055
2056 3
    if (!isset($str[0])) {
2057 1
      return '';
2058
    }
2059
2060
    // fixed ISO <-> UTF-8 Errors
2061 3
    $str = self::fix_simple_utf8($str);
2062
2063
    // remove all none UTF-8 symbols
2064
    // && remove diamond question mark (�)
2065
    // && remove remove invisible characters (e.g. "\0")
2066
    // && remove BOM
2067
    // && normalize whitespace chars (but keep non-breaking-spaces)
2068 3
    $str = self::clean($str, true, true, false, true);
2069
2070 3
    return (string)$str;
2071
  }
2072
2073
  /**
2074
   * Accepts a string and returns an array of Unicode code points.
2075
   *
2076
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
2077
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
2078
   *                          default, code points will be returned as integers.
2079
   *
2080
   * @return   array The array of code points
2081
   */
2082 3
  public static function codepoints($arg, $u_style = false)
2083
  {
2084 3
    if (is_string($arg)) {
2085 3
      $arg = self::split($arg);
2086 3
    }
2087
2088 3
    $arg = array_map(
2089
        array(
2090 3
            '\\voku\\helper\\UTF8',
2091 3
            'ord',
2092 3
        ),
2093
        $arg
2094 3
    );
2095
2096 3
    if ($u_style) {
2097
      $arg = array_map(
2098
          array(
2099
              '\\voku\\helper\\UTF8',
2100
              'int_to_hex',
2101
          ),
2102
          $arg
2103
      );
2104
    }
2105
2106 3
    return $arg;
2107
  }
2108
2109
  /**
2110
   * Returns count of characters used in a string.
2111
   *
2112
   * @param    string $str The input string.
2113
   *
2114
   * @return   array An associative array of Character as keys and
2115
   *           their count as values.
2116
   */
2117 3
  public static function count_chars($str) // there is no $mode parameters
2118
  {
2119 3
    $array = array_count_values(self::split($str));
2120
2121 3
    ksort($array);
2122
2123 3
    return $array;
2124
  }
2125
2126
  /**
2127
   * Get a UTF-8 character from its decimal code representation.
2128
   *
2129
   * @param   int $code Code.
2130
   *
2131
   * @return  string
2132
   */
2133 1
  public static function decimal_to_chr($code)
2134
  {
2135 1
    self::checkForSupport();
2136
2137 1
    return mb_convert_encoding(
2138 1
        '&#x' . dechex($code) . ';',
2139 1
        'UTF-8',
2140
        'HTML-ENTITIES'
2141 1
    );
2142
  }
2143
2144
  /**
2145
   * Encode to UTF8 or LATIN1.
2146
   *
2147
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2148
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2149
   *
2150
   * @param string $encodingLabel ISO-8859-1 || UTF-8
2151
   * @param string $str
2152
   *
2153
   * @return false|string Will return false on error.
2154
   */
2155 11
  public static function encode($encodingLabel, $str)
2156
  {
2157 11
    $encodingLabel = self::normalizeEncoding($encodingLabel);
2158
2159 11
    if ($encodingLabel === 'UTF-8') {
2160 11
      return self::to_utf8($str);
2161
    }
2162
2163 1
    if ($encodingLabel === 'ISO-8859-1') {
2164 1
      return self::to_latin1($str);
2165
    }
2166
2167
    return false;
2168
  }
2169
2170
  /**
2171
   * Reads entire file into a string.
2172
   *
2173
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2174
   *
2175
   * @link http://php.net/manual/en/function.file-get-contents.php
2176
   *
2177
   * @param string   $filename      <p>
2178
   *                                Name of the file to read.
2179
   *                                </p>
2180
   * @param int      $flags         [optional] <p>
2181
   *                                Prior to PHP 6, this parameter is called
2182
   *                                use_include_path and is a bool.
2183
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2184
   *                                to trigger include path
2185
   *                                search.
2186
   *                                </p>
2187
   *                                <p>
2188
   *                                The value of flags can be any combination of
2189
   *                                the following flags (with some restrictions), joined with the
2190
   *                                binary OR (|)
2191
   *                                operator.
2192
   *                                </p>
2193
   *                                <p>
2194
   *                                <table>
2195
   *                                Available flags
2196
   *                                <tr valign="top">
2197
   *                                <td>Flag</td>
2198
   *                                <td>Description</td>
2199
   *                                </tr>
2200
   *                                <tr valign="top">
2201
   *                                <td>
2202
   *                                FILE_USE_INCLUDE_PATH
2203
   *                                </td>
2204
   *                                <td>
2205
   *                                Search for filename in the include directory.
2206
   *                                See include_path for more
2207
   *                                information.
2208
   *                                </td>
2209
   *                                </tr>
2210
   *                                <tr valign="top">
2211
   *                                <td>
2212
   *                                FILE_TEXT
2213
   *                                </td>
2214
   *                                <td>
2215
   *                                As of PHP 6, the default encoding of the read
2216
   *                                data is UTF-8. You can specify a different encoding by creating a
2217
   *                                custom context or by changing the default using
2218
   *                                stream_default_encoding. This flag cannot be
2219
   *                                used with FILE_BINARY.
2220
   *                                </td>
2221
   *                                </tr>
2222
   *                                <tr valign="top">
2223
   *                                <td>
2224
   *                                FILE_BINARY
2225
   *                                </td>
2226
   *                                <td>
2227
   *                                With this flag, the file is read in binary mode. This is the default
2228
   *                                setting and cannot be used with FILE_TEXT.
2229
   *                                </td>
2230
   *                                </tr>
2231
   *                                </table>
2232
   *                                </p>
2233
   * @param resource $context       [optional] <p>
2234
   *                                A valid context resource created with
2235
   *                                stream_context_create. If you don't need to use a
2236
   *                                custom context, you can skip this parameter by &null;.
2237
   *                                </p>
2238
   * @param int      $offset        [optional] <p>
2239
   *                                The offset where the reading starts.
2240
   *                                </p>
2241
   * @param int      $maxlen        [optional] <p>
2242
   *                                Maximum length of data read. The default is to read until end
2243
   *                                of file is reached.
2244
   *                                </p>
2245
   * @param int      $timeout
2246
   *
2247
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2248
   *                                default utf-8 chars
2249
   *
2250
   * @return string The function returns the read data or false on failure.
2251
   */
2252 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2253
  {
2254
    // init
2255 2
    $timeout = (int)$timeout;
2256 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2257
2258 2
    if ($timeout && $context === null) {
2259 2
      $context = stream_context_create(
2260
          array(
2261
              'http' =>
2262
                  array(
2263 2
                      'timeout' => $timeout,
2264 2
                  ),
2265
          )
2266 2
      );
2267 2
    }
2268
2269 2
    if (is_int($maxlen)) {
2270 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2271 1
    } else {
2272 2
      $data = file_get_contents($filename, $flags, $context, $offset);
2273
    }
2274
2275
    // return false on error
2276 2
    if ($data === false) {
2277
      return false;
2278
    }
2279
2280 2
    if ($convertToUtf8 === true) {
2281 2
      self::checkForSupport();
2282
2283 2
      $encoding = self::str_detect_encoding($data);
2284
2285 2
      if ($encoding && $encoding != 'UTF-8') {
0 ignored issues
show
Bug Best Practice introduced by
The expression $encoding of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2286 1
        $data = mb_convert_encoding(
2287 1
            $data,
2288 1
            'UTF-8',
2289 1
            self::normalizeEncoding($encoding)
2290 1
        );
2291 1
      }
2292
2293 2
      $data = self::fix_simple_utf8($data);
2294 2
      $data = self::clean($data, false, true, false, true);
2295 2
      $data = self::removeBOM($data);
2296 2
    }
2297
2298
    // clean utf-8 string
2299 2
    return $data;
2300
  }
2301
2302
  /**
2303
   * Checks if a file starts with BOM character.
2304
   *
2305
   * @param    string $file_path Path to a valid file.
2306
   *
2307
   * @return   bool True if the file has BOM at the start, False otherwise.
2308
   */
2309 1
  public static function file_has_bom($file_path)
2310
  {
2311 1
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
2312
  }
2313
2314
  /**
2315
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2316
   *
2317
   * @param mixed  $var
2318
   * @param int    $normalization_form
2319
   * @param string $leading_combining
2320
   *
2321
   * @return mixed
2322
   */
2323 7
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2324
  {
2325 7
    switch (gettype($var)) {
2326 7 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2327 2
        foreach ($var as $k => $v) {
2328
          /** @noinspection AlterInForeachInspection */
2329 1
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2330 2
        }
2331 2
        break;
2332 7 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2333 1
        foreach ($var as $k => $v) {
2334 1
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2335 1
        }
2336 1
        break;
2337 7
      case 'string':
2338 7 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2339
          // Workaround https://bugs.php.net/65732
2340
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2341
        }
2342 7 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2343 7
          if (Normalizer::isNormalized($var, $normalization_form)) {
2344 1
            $n = '-';
2345 1
          } else {
2346 7
            $n = Normalizer::normalize($var, $normalization_form);
2347
2348 7
            if (isset($n[0])) {
2349 5
              $var = $n;
2350 5
            } else {
2351 4
              $var = self::encode('UTF-8', $var);
2352
            }
2353
2354
          }
2355 7
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2356
            // Prevent leading combining chars
2357
            // for NFC-safe concatenations.
2358
            $var = $leading_combining . $var;
2359
          }
2360 7
        }
2361 7
        break;
2362 7
    }
2363
2364 7
    return $var;
2365
  }
2366
2367
  /**
2368
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2369
   *
2370
   * @param int    $type
2371
   * @param string $var
2372
   * @param int    $filter
2373
   * @param mixed  $option
2374
   *
2375
   * @return mixed
2376
   */
2377 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2378
  {
2379
    if (4 > func_num_args()) {
2380
      $var = filter_input($type, $var, $filter);
2381
    } else {
2382
      $var = filter_input($type, $var, $filter, $option);
2383
    }
2384
2385
    return self::filter($var);
2386
  }
2387
2388
  /**
2389
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2390
   *
2391
   * @param int   $type
2392
   * @param mixed $definition
2393
   * @param bool  $add_empty
2394
   *
2395
   * @return mixed
2396
   */
2397 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2398
  {
2399
    if (2 > func_num_args()) {
2400
      $a = filter_input_array($type);
2401
    } else {
2402
      $a = filter_input_array($type, $definition, $add_empty);
2403
    }
2404
2405
    return self::filter($a);
2406
  }
2407
2408
  /**
2409
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2410
   *
2411
   * @param mixed $var
2412
   * @param int   $filter
2413
   * @param mixed $option
2414
   *
2415
   * @return mixed
2416
   */
2417 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2418
  {
2419 1
    if (3 > func_num_args()) {
2420 1
      $var = filter_var($var, $filter);
2421 1
    } else {
2422 1
      $var = filter_var($var, $filter, $option);
2423
    }
2424
2425 1
    return self::filter($var);
2426
  }
2427
2428
  /**
2429
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2430
   *
2431
   * @param array $data
2432
   * @param mixed $definition
2433
   * @param bool  $add_empty
2434
   *
2435
   * @return mixed
2436
   */
2437 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2438
  {
2439 1
    if (2 > func_num_args()) {
2440 1
      $a = filter_var_array($data);
2441 1
    } else {
2442 1
      $a = filter_var_array($data, $definition, $add_empty);
2443
    }
2444
2445 1
    return self::filter($a);
2446
  }
2447
2448
  /**
2449
   * Checks if the number of Unicode characters in a string are not
2450
   * more than the specified integer.
2451
   *
2452
   * @param    string $str      The original string to be checked.
2453
   * @param    int    $box_size The size in number of chars to be checked against string.
2454
   *
2455
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2456
   */
2457 1
  public static function fits_inside($str, $box_size)
2458
  {
2459 1
    return (self::strlen($str) <= $box_size);
2460
  }
2461
2462
  /**
2463
   * Fixing a broken UTF-8 string.
2464
   *
2465
   * @param string $str
2466
   *
2467
   * @return string
2468
   */
2469 8
  public static function fix_simple_utf8($str)
2470
  {
2471 8
    static $brokenUtf8ToUtf8Keys = null;
2472 8
    static $brokenUtf8ToUtf8Values = null;
2473
2474 8
    $str = (string)$str;
2475
2476 8
    if (!isset($str[0])) {
2477 2
      return '';
2478
    }
2479
2480 8
    if ($brokenUtf8ToUtf8Keys === null) {
2481 1
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2482 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2483 1
    }
2484
2485 8
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2486
  }
2487
2488
  /**
2489
   * Fix a double (or multiple) encoded UTF8 string.
2490
   *
2491
   * @param array|string $str
2492
   *
2493
   * @return string
2494
   */
2495 1
  public static function fix_utf8($str)
2496
  {
2497 1
    if (is_array($str)) {
2498
2499
      foreach ($str as $k => $v) {
2500
        /** @noinspection AlterInForeachInspection */
2501
        $str[$k] = self::fix_utf8($v);
2502
      }
2503
2504
      return $str;
2505
    }
2506
2507 1
    $last = '';
2508 1
    while ($last <> $str) {
2509 1
      $last = $str;
2510 1
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2510 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2511 1
    }
2512
2513 1
    return $str;
2514
  }
2515
2516
  /**
2517
   * Get character of a specific character.
2518
   *
2519
   * @param   string $chr Character.
2520
   *
2521
   * @return  string 'RTL' or 'LTR'
2522
   */
2523 1
  public static function getCharDirection($chr)
2524
  {
2525 1
    $c = static::chr_to_decimal($chr);
2526
2527 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2528 1
      return 'LTR';
2529
    }
2530
2531 1
    if (0x85e >= $c) {
2532
2533 1
      if (0x5be === $c ||
2534 1
          0x5c0 === $c ||
2535 1
          0x5c3 === $c ||
2536 1
          0x5c6 === $c ||
2537 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
2538 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2539 1
          0x608 === $c ||
2540 1
          0x60b === $c ||
2541 1
          0x60d === $c ||
2542 1
          0x61b === $c ||
2543 1
          (0x61e <= $c && 0x64a >= $c) ||
2544
          (0x66d <= $c && 0x66f >= $c) ||
2545
          (0x671 <= $c && 0x6d5 >= $c) ||
2546
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2547
          (0x6ee <= $c && 0x6ef >= $c) ||
2548
          (0x6fa <= $c && 0x70d >= $c) ||
2549
          0x710 === $c ||
2550
          (0x712 <= $c && 0x72f >= $c) ||
2551
          (0x74d <= $c && 0x7a5 >= $c) ||
2552
          0x7b1 === $c ||
2553
          (0x7c0 <= $c && 0x7ea >= $c) ||
2554
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2555
          0x7fa === $c ||
2556
          (0x800 <= $c && 0x815 >= $c) ||
2557
          0x81a === $c ||
2558
          0x824 === $c ||
2559
          0x828 === $c ||
2560
          (0x830 <= $c && 0x83e >= $c) ||
2561
          (0x840 <= $c && 0x858 >= $c) ||
2562
          0x85e === $c
2563 1
      ) {
2564 1
        return 'RTL';
2565
      }
2566
2567
    } elseif (0x200f === $c) {
2568
2569
      return 'RTL';
2570
2571
    } elseif (0xfb1d <= $c) {
2572
2573
      if (0xfb1d === $c ||
2574
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2575
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2576
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2577
          0xfb3e === $c ||
2578
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2579
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2580
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2581
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2582
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2583
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2584
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2585
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2586
          (0xfe76 <= $c && 0xfefc >= $c) ||
2587
          (0x10800 <= $c && 0x10805 >= $c) ||
2588
          0x10808 === $c ||
2589
          (0x1080a <= $c && 0x10835 >= $c) ||
2590
          (0x10837 <= $c && 0x10838 >= $c) ||
2591
          0x1083c === $c ||
2592
          (0x1083f <= $c && 0x10855 >= $c) ||
2593
          (0x10857 <= $c && 0x1085f >= $c) ||
2594
          (0x10900 <= $c && 0x1091b >= $c) ||
2595
          (0x10920 <= $c && 0x10939 >= $c) ||
2596
          0x1093f === $c ||
2597
          0x10a00 === $c ||
2598
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2599
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2600
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2601
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2602
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2603
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2604
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2605
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2606
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2607
          (0x10b78 <= $c && 0x10b7f >= $c)
2608
      ) {
2609
        return 'RTL';
2610
      }
2611
    }
2612
2613
    return 'LTR';
2614
  }
2615
2616
  /**
2617
   * get data from "/data/*.ser"
2618
   *
2619
   * @param string $file
2620
   *
2621
   * @return bool|string|array|int false on error
2622
   */
2623 2
  protected static function getData($file)
2624
  {
2625 2
    $file = __DIR__ . '/data/' . $file . '.ser';
2626 2
    if (file_exists($file)) {
2627 2
      return unserialize(file_get_contents($file));
2628
    } else {
2629
      return false;
2630
    }
2631
  }
2632
2633
  /**
2634
   * Creates a random string of UTF-8 characters.
2635
   *
2636
   * @param    int $len The length of string in characters.
2637
   *
2638
   * @return   string String consisting of random characters.
2639
   */
2640 1
  public static function hash($len = 8)
2641
  {
2642 1
    static $chars = array();
2643 1
    static $chars_len = null;
2644
2645 1
    if ($len <= 0) {
2646 1
      return '';
2647
    }
2648
2649
    // init
2650 1
    self::checkForSupport();
2651
2652 1
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2653 1
      if (self::$support['pcre_utf8'] === true) {
2654 1
        $chars = array_map(
2655
            array(
2656 1
                '\\voku\\helper\\UTF8',
2657 1
                'chr',
2658 1
            ),
2659 1
            range(48, 79)
2660 1
        );
2661
2662 1
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2663
2664 1
        $chars = array_values(array_filter($chars));
2665 1
      } else {
2666
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2667
      }
2668
2669 1
      $chars_len = count($chars);
2670 1
    }
2671
2672 1
    $hash = '';
2673
2674 1
    for (; $len; --$len) {
2675 1
      $hash .= $chars[mt_rand() % $chars_len];
2676 1
    }
2677
2678 1
    return $hash;
2679
  }
2680
2681
  /**
2682
   * Converts hexadecimal U+xxxx code point representation to Integer.
2683
   *
2684
   * INFO: opposite to UTF8::int_to_hex( )
2685
   *
2686
   * @param    string $str The hexadecimal code point representation.
2687
   *
2688
   * @return   int The code point, or 0 on failure.
2689
   */
2690
  public static function hex_to_int($str)
2691
  {
2692
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2693
      return intval($match[1], 16);
2694
    }
2695
2696
    return 0;
2697
  }
2698
2699
  /**
2700
   * Converts a UTF-8 string to a series of HTML numbered entities.
2701
   *
2702
   * e.g.: &#123;&#39;&#1740;
2703
   *
2704
   * @param  string $str The Unicode string to be encoded as numbered entities.
2705
   *
2706
   * @return string HTML numbered entities.
2707
   */
2708 1
  public static function html_encode($str)
2709
  {
2710 1
    return implode(
2711 1
        array_map(
2712
            array(
2713 1
                '\\voku\\helper\\UTF8',
2714 1
                'single_chr_html_encode',
2715 1
            ),
2716 1
            self::split($str)
2717 1
        )
2718 1
    );
2719
  }
2720
2721
  /**
2722
   * UTF-8 version of html_entity_decode()
2723
   *
2724
   * The reason we are not using html_entity_decode() by itself is because
2725
   * while it is not technically correct to leave out the semicolon
2726
   * at the end of an entity most browsers will still interpret the entity
2727
   * correctly. html_entity_decode() does not convert entities without
2728
   * semicolons, so we are left with our own little solution here. Bummer.
2729
   *
2730
   * Convert all HTML entities to their applicable characters
2731
   *
2732
   * @link http://php.net/manual/en/function.html-entity-decode.php
2733
   *
2734
   * @param string $str      <p>
2735
   *                         The input string.
2736
   *                         </p>
2737
   * @param int    $flags    [optional] <p>
2738
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2739
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2740
   *                         <table>
2741
   *                         Available <i>flags</i> constants
2742
   *                         <tr valign="top">
2743
   *                         <td>Constant Name</td>
2744
   *                         <td>Description</td>
2745
   *                         </tr>
2746
   *                         <tr valign="top">
2747
   *                         <td><b>ENT_COMPAT</b></td>
2748
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2749
   *                         </tr>
2750
   *                         <tr valign="top">
2751
   *                         <td><b>ENT_QUOTES</b></td>
2752
   *                         <td>Will convert both double and single quotes.</td>
2753
   *                         </tr>
2754
   *                         <tr valign="top">
2755
   *                         <td><b>ENT_NOQUOTES</b></td>
2756
   *                         <td>Will leave both double and single quotes unconverted.</td>
2757
   *                         </tr>
2758
   *                         <tr valign="top">
2759
   *                         <td><b>ENT_HTML401</b></td>
2760
   *                         <td>
2761
   *                         Handle code as HTML 4.01.
2762
   *                         </td>
2763
   *                         </tr>
2764
   *                         <tr valign="top">
2765
   *                         <td><b>ENT_XML1</b></td>
2766
   *                         <td>
2767
   *                         Handle code as XML 1.
2768
   *                         </td>
2769
   *                         </tr>
2770
   *                         <tr valign="top">
2771
   *                         <td><b>ENT_XHTML</b></td>
2772
   *                         <td>
2773
   *                         Handle code as XHTML.
2774
   *                         </td>
2775
   *                         </tr>
2776
   *                         <tr valign="top">
2777
   *                         <td><b>ENT_HTML5</b></td>
2778
   *                         <td>
2779
   *                         Handle code as HTML 5.
2780
   *                         </td>
2781
   *                         </tr>
2782
   *                         </table>
2783
   *                         </p>
2784
   * @param string $encoding [optional] <p>
2785
   *                         Encoding to use.
2786
   *                         </p>
2787
   *
2788
   * @return string the decoded string.
2789
   */
2790 15
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2791
  {
2792 15
    $str = (string)$str;
2793
2794 15
    if (!isset($str[0])) {
2795 3
      return '';
2796
    }
2797
2798 15
    if (strpos($str, '&') === false) {
2799 4
      return $str;
2800
    }
2801
2802 15
    if ($flags === null) {
2803 3
      if (Bootup::is_php('5.4') === true) {
2804 3
        $flags = ENT_COMPAT | ENT_HTML5;
2805 3
      } else {
2806
        $flags = ENT_COMPAT;
2807
      }
2808 3
    }
2809
2810
    do {
2811 15
      $str_compare = $str;
2812
2813 15
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2814
2815
      // decode numeric & UTF16 two byte entities
2816 15
      $str = html_entity_decode(
2817 15
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2818 15
          $flags,
2819
          $encoding
2820 15
      );
2821
2822 15
    } while ($str_compare !== $str);
2823
2824 15
    return $str;
2825
  }
2826
2827
  /**
2828
   * Callback function for preg_replace_callback use.
2829
   *
2830
   * @param  array $matches PREG matches
2831
   *
2832
   * @return string
2833
   */
2834 12
  protected static function entityCallback($matches)
2835
  {
2836 12
    self::checkForSupport();
2837
2838 12
    $return = mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2839
2840 12
    if ($return === "'") {
2841 5
      return '&#x27;';
2842
    }
2843
2844 11
    return $return;
2845
  }
2846
2847
  /**
2848
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2849
   *
2850
   * @link http://php.net/manual/en/function.htmlentities.php
2851
   *
2852
   * @param string $str           <p>
2853
   *                              The input string.
2854
   *                              </p>
2855
   * @param int    $flags         [optional] <p>
2856
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2857
   *                              invalid code unit sequences and the used document type. The default is
2858
   *                              ENT_COMPAT | ENT_HTML401.
2859
   *                              <table>
2860
   *                              Available <i>flags</i> constants
2861
   *                              <tr valign="top">
2862
   *                              <td>Constant Name</td>
2863
   *                              <td>Description</td>
2864
   *                              </tr>
2865
   *                              <tr valign="top">
2866
   *                              <td><b>ENT_COMPAT</b></td>
2867
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2868
   *                              </tr>
2869
   *                              <tr valign="top">
2870
   *                              <td><b>ENT_QUOTES</b></td>
2871
   *                              <td>Will convert both double and single quotes.</td>
2872
   *                              </tr>
2873
   *                              <tr valign="top">
2874
   *                              <td><b>ENT_NOQUOTES</b></td>
2875
   *                              <td>Will leave both double and single quotes unconverted.</td>
2876
   *                              </tr>
2877
   *                              <tr valign="top">
2878
   *                              <td><b>ENT_IGNORE</b></td>
2879
   *                              <td>
2880
   *                              Silently discard invalid code unit sequences instead of returning
2881
   *                              an empty string. Using this flag is discouraged as it
2882
   *                              may have security implications.
2883
   *                              </td>
2884
   *                              </tr>
2885
   *                              <tr valign="top">
2886
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2887
   *                              <td>
2888
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2889
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2890
   *                              </td>
2891
   *                              </tr>
2892
   *                              <tr valign="top">
2893
   *                              <td><b>ENT_DISALLOWED</b></td>
2894
   *                              <td>
2895
   *                              Replace invalid code points for the given document type with a
2896
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2897
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2898
   *                              instance, to ensure the well-formedness of XML documents with
2899
   *                              embedded external content.
2900
   *                              </td>
2901
   *                              </tr>
2902
   *                              <tr valign="top">
2903
   *                              <td><b>ENT_HTML401</b></td>
2904
   *                              <td>
2905
   *                              Handle code as HTML 4.01.
2906
   *                              </td>
2907
   *                              </tr>
2908
   *                              <tr valign="top">
2909
   *                              <td><b>ENT_XML1</b></td>
2910
   *                              <td>
2911
   *                              Handle code as XML 1.
2912
   *                              </td>
2913
   *                              </tr>
2914
   *                              <tr valign="top">
2915
   *                              <td><b>ENT_XHTML</b></td>
2916
   *                              <td>
2917
   *                              Handle code as XHTML.
2918
   *                              </td>
2919
   *                              </tr>
2920
   *                              <tr valign="top">
2921
   *                              <td><b>ENT_HTML5</b></td>
2922
   *                              <td>
2923
   *                              Handle code as HTML 5.
2924
   *                              </td>
2925
   *                              </tr>
2926
   *                              </table>
2927
   *                              </p>
2928
   * @param string $encoding      [optional] <p>
2929
   *                              Like <b>htmlspecialchars</b>,
2930
   *                              <b>htmlentities</b> takes an optional third argument
2931
   *                              <i>encoding</i> which defines encoding used in
2932
   *                              conversion.
2933
   *                              Although this argument is technically optional, you are highly
2934
   *                              encouraged to specify the correct value for your code.
2935
   *                              </p>
2936
   * @param bool   $double_encode [optional] <p>
2937
   *                              When <i>double_encode</i> is turned off PHP will not
2938
   *                              encode existing html entities. The default is to convert everything.
2939
   *                              </p>
2940
   *
2941
   *
2942
   * @return string the encoded string.
2943
   * </p>
2944
   * <p>
2945
   * If the input <i>string</i> contains an invalid code unit
2946
   * sequence within the given <i>encoding</i> an empty string
2947
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2948
   * <b>ENT_SUBSTITUTE</b> flags are set.
2949
   */
2950 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2951
  {
2952 2
    return htmlentities($str, $flags, $encoding, $double_encode);
2953
  }
2954
2955
  /**
2956
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
2957
   *
2958
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2959
   *
2960
   * @param string $str           <p>
2961
   *                              The string being converted.
2962
   *                              </p>
2963
   * @param int    $flags         [optional] <p>
2964
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2965
   *                              invalid code unit sequences and the used document type. The default is
2966
   *                              ENT_COMPAT | ENT_HTML401.
2967
   *                              <table>
2968
   *                              Available <i>flags</i> constants
2969
   *                              <tr valign="top">
2970
   *                              <td>Constant Name</td>
2971
   *                              <td>Description</td>
2972
   *                              </tr>
2973
   *                              <tr valign="top">
2974
   *                              <td><b>ENT_COMPAT</b></td>
2975
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2976
   *                              </tr>
2977
   *                              <tr valign="top">
2978
   *                              <td><b>ENT_QUOTES</b></td>
2979
   *                              <td>Will convert both double and single quotes.</td>
2980
   *                              </tr>
2981
   *                              <tr valign="top">
2982
   *                              <td><b>ENT_NOQUOTES</b></td>
2983
   *                              <td>Will leave both double and single quotes unconverted.</td>
2984
   *                              </tr>
2985
   *                              <tr valign="top">
2986
   *                              <td><b>ENT_IGNORE</b></td>
2987
   *                              <td>
2988
   *                              Silently discard invalid code unit sequences instead of returning
2989
   *                              an empty string. Using this flag is discouraged as it
2990
   *                              may have security implications.
2991
   *                              </td>
2992
   *                              </tr>
2993
   *                              <tr valign="top">
2994
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2995
   *                              <td>
2996
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2997
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2998
   *                              </td>
2999
   *                              </tr>
3000
   *                              <tr valign="top">
3001
   *                              <td><b>ENT_DISALLOWED</b></td>
3002
   *                              <td>
3003
   *                              Replace invalid code points for the given document type with a
3004
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3005
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3006
   *                              instance, to ensure the well-formedness of XML documents with
3007
   *                              embedded external content.
3008
   *                              </td>
3009
   *                              </tr>
3010
   *                              <tr valign="top">
3011
   *                              <td><b>ENT_HTML401</b></td>
3012
   *                              <td>
3013
   *                              Handle code as HTML 4.01.
3014
   *                              </td>
3015
   *                              </tr>
3016
   *                              <tr valign="top">
3017
   *                              <td><b>ENT_XML1</b></td>
3018
   *                              <td>
3019
   *                              Handle code as XML 1.
3020
   *                              </td>
3021
   *                              </tr>
3022
   *                              <tr valign="top">
3023
   *                              <td><b>ENT_XHTML</b></td>
3024
   *                              <td>
3025
   *                              Handle code as XHTML.
3026
   *                              </td>
3027
   *                              </tr>
3028
   *                              <tr valign="top">
3029
   *                              <td><b>ENT_HTML5</b></td>
3030
   *                              <td>
3031
   *                              Handle code as HTML 5.
3032
   *                              </td>
3033
   *                              </tr>
3034
   *                              </table>
3035
   *                              </p>
3036
   * @param string $encoding      [optional] <p>
3037
   *                              Defines encoding used in conversion.
3038
   *                              </p>
3039
   *                              <p>
3040
   *                              For the purposes of this function, the encodings
3041
   *                              ISO-8859-1, ISO-8859-15,
3042
   *                              UTF-8, cp866,
3043
   *                              cp1251, cp1252, and
3044
   *                              KOI8-R are effectively equivalent, provided the
3045
   *                              <i>string</i> itself is valid for the encoding, as
3046
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3047
   *                              the same positions in all of these encodings.
3048
   *                              </p>
3049
   * @param bool   $double_encode [optional] <p>
3050
   *                              When <i>double_encode</i> is turned off PHP will not
3051
   *                              encode existing html entities, the default is to convert everything.
3052
   *                              </p>
3053
   *
3054
   * @return string The converted string.
3055
   * </p>
3056
   * <p>
3057
   * If the input <i>string</i> contains an invalid code unit
3058
   * sequence within the given <i>encoding</i> an empty string
3059
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3060
   * <b>ENT_SUBSTITUTE</b> flags are set.
3061
   */
3062 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3063
  {
3064 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3065
  }
3066
3067
  /**
3068
   * checks whether iconv is available on the server
3069
   *
3070
   * @return   bool True if available, False otherwise
3071
   */
3072 1
  public static function iconv_loaded()
3073
  {
3074 1
    return extension_loaded('iconv') ? true : false;
3075
  }
3076
3077
  /**
3078
   * Converts Integer to hexadecimal U+xxxx code point representation.
3079
   *
3080
   * @param    int    $int The integer to be converted to hexadecimal code point.
3081
   * @param    string $pfix
3082
   *
3083
   * @return   string The code point, or empty string on failure.
3084
   */
3085
  public static function int_to_hex($int, $pfix = 'U+')
3086
  {
3087
    if (ctype_digit((string)$int)) {
3088
      $hex = dechex((int)$int);
3089
3090
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3091
3092
      return $pfix . $hex;
3093
    }
3094
3095
    return '';
3096
  }
3097
3098
  /**
3099
   * checks whether intl is available on the server
3100
   *
3101
   * @return   bool True if available, False otherwise
3102
   */
3103 1
  public static function intl_loaded()
3104
  {
3105 1
    return extension_loaded('intl') ? true : false;
3106
  }
3107
3108
  /**
3109
   * alias for "UTF8::is_ascii()"
3110
   *
3111
   * @param string $str
3112
   *
3113
   * @return boolean
3114
   */
3115 1
  public static function isAscii($str)
3116
  {
3117 1
    return self::is_ascii($str);
3118
  }
3119
3120
  /**
3121
   * alias for "UTF8::is_base64"
3122
   *
3123
   * @param string $str
3124
   *
3125
   * @return bool
3126
   */
3127 1
  public static function isBase64($str)
3128
  {
3129 1
    return self::is_base64($str);
3130
  }
3131
3132
  /**
3133
   * alias for "UTF8::is_bom"
3134
   *
3135
   * @param string $utf8_chr
3136
   *
3137
   * @return boolean
3138
   */
3139
  public static function isBom($utf8_chr)
3140
  {
3141
    return self::is_bom($utf8_chr);
3142
  }
3143
3144
  /**
3145
   * Try to check if a string is a json-string...
3146
   *
3147
   * @param $str
3148
   *
3149
   * @return bool
3150
   *
3151
   * @deprecated
3152
   */
3153
  public static function isJson($str)
3154
  {
3155
    $str = (string)$str;
3156
3157
    if (!isset($str[0])) {
3158
      return false;
3159
    }
3160
3161
    if (
3162
        is_object(json_decode($str))
3163
        &&
3164
        json_last_error() == JSON_ERROR_NONE
3165
    ) {
3166
      return true;
3167
    } else {
3168
      return false;
3169
    }
3170
  }
3171
3172
  /**
3173
   * alias for "UTF8::is_utf8"
3174
   *
3175
   * @param string $str
3176
   *
3177
   * @return bool
3178
   */
3179 16
  public static function isUtf8($str)
3180
  {
3181 16
    return self::is_utf8($str);
3182
  }
3183
3184
  /**
3185
   * Checks if a string is 7 bit ASCII.
3186
   *
3187
   * @param    string $str The string to check.
3188
   *
3189
   * @return   bool <strong>true</strong> if it is ASCII<br />
3190
   *                <strong>false</strong> otherwise
3191
   */
3192 4
  public static function is_ascii($str)
3193
  {
3194 4
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3195
  }
3196
3197
  /**
3198
   * Returns true if the string is base64 encoded, false otherwise.
3199
   *
3200
   * @param string $str
3201
   *
3202
   * @return bool Whether or not $str is base64 encoded
3203
   */
3204 1
  public static function is_base64($str)
3205
  {
3206 1
    $str = (string)$str;
3207
3208 1
    if (!isset($str[0])) {
3209 1
      return false;
3210
    }
3211
3212 1
    if (base64_encode(base64_decode($str, true)) === $str) {
3213 1
      return true;
3214
    } else {
3215 1
      return false;
3216
    }
3217
  }
3218
3219
  /**
3220
   * Check if the input is binary... (is look like a hack)
3221
   *
3222
   * @param string $input
3223
   *
3224
   * @return bool
3225
   */
3226 4
  public static function is_binary($input)
3227
  {
3228
3229 4
    $testLength = strlen($input);
3230
3231
    if (
3232 4
        preg_match('~^[01]+$~', $input)
3233
        ||
3234 4
        substr_count($input, "\x00") > 0
3235 4
        ||
3236 4
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
3237 4
    ) {
3238 3
      return true;
3239
    } else {
3240 4
      return false;
3241
    }
3242
  }
3243
3244
  /**
3245
   * Check if the file is binary.
3246
   *
3247
   * @param string $file
3248
   *
3249
   * @return boolean
3250
   */
3251
  public static function is_binary_file($file)
3252
  {
3253
    try {
3254
      $fp = fopen($file, 'r');
3255
      $block = fread($fp, 512);
3256
      fclose($fp);
3257
    } catch (\Exception $e) {
3258
      $block = '';
3259
    }
3260
3261
    return self::is_binary($block);
3262
  }
3263
3264
  /**
3265
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3266
   *
3267
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3268
   *
3269
   * @param    string $utf8_chr The input string.
3270
   *
3271
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3272
   */
3273 2
  public static function is_bom($utf8_chr)
3274
  {
3275 2
    return ($utf8_chr === self::bom());
3276
  }
3277
3278
  /**
3279
   * Check if the string is UTF-16.
3280
   *
3281
   * @param string $str
3282
   *
3283
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3284
   */
3285 2 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3286
  {
3287 2
    if (self::is_binary($str)) {
3288 2
      self::checkForSupport();
3289
3290 2
      $maybeUTF16LE = 0;
3291 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3292 2
      if ($test !== false && strlen($test) > 1) {
3293 2
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3294 2
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3295 2
        if ($test3 == $test) {
3296 2
          $strChars = self::count_chars($str);
3297 2
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3298 2
            if (in_array($test3char, $strChars, true) === true) {
3299 1
              $maybeUTF16LE++;
3300 1
            }
3301 2
          }
3302 2
        }
3303 2
      }
3304
3305 2
      $maybeUTF16BE = 0;
3306 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3307 2
      if ($test !== false && strlen($test) > 1) {
3308 2
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3309 2
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3310 2
        if ($test3 == $test) {
3311 2
          $strChars = self::count_chars($str);
3312 2
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3313 2
            if (in_array($test3char, $strChars, true) === true) {
3314 1
              $maybeUTF16BE++;
3315 1
            }
3316 2
          }
3317 2
        }
3318 2
      }
3319
3320 2
      if ($maybeUTF16BE != $maybeUTF16LE) {
3321 1
        if ($maybeUTF16LE > $maybeUTF16BE) {
3322 1
          return 1;
3323
        } else {
3324 1
          return 2;
3325
        }
3326
      }
3327
3328 2
    }
3329
3330 2
    return false;
3331
  }
3332
3333
  /**
3334
   * Check if the string is UTF-32.
3335
   *
3336
   * @param string $str
3337
   *
3338
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3339
   */
3340 2 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3341
  {
3342 2
    if (self::is_binary($str)) {
3343 2
      self::checkForSupport();
3344
3345 2
      $maybeUTF32LE = 0;
3346 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3347 2
      if ($test !== false && strlen($test) > 1) {
3348 2
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3349 2
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3350 2
        if ($test3 == $test) {
3351 2
          $strChars = self::count_chars($str);
3352 2
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3353 2
            if (in_array($test3char, $strChars, true) === true) {
3354
              $maybeUTF32LE++;
3355
            }
3356 2
          }
3357 2
        }
3358 2
      }
3359
3360 2
      $maybeUTF32BE = 0;
3361 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3362 2
      if ($test !== false && strlen($test) > 1) {
3363 1
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3364 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3365 1
        if ($test3 == $test) {
3366 1
          $strChars = self::count_chars($str);
3367 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3368 1
            if (in_array($test3char, $strChars, true) === true) {
3369
              $maybeUTF32BE++;
3370
            }
3371 1
          }
3372 1
        }
3373 1
      }
3374
3375 2
      if ($maybeUTF32BE != $maybeUTF32LE) {
3376
        if ($maybeUTF32LE > $maybeUTF32BE) {
3377
          return 1;
3378
        } else {
3379
          return 2;
3380
        }
3381
      }
3382
3383 2
    }
3384
3385 2
    return false;
3386
  }
3387
3388
  /**
3389
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3390
   *
3391
   * @see    http://hsivonen.iki.fi/php-utf8/
3392
   *
3393
   * @param    string $str The string to be checked.
3394
   *
3395
   * @return   bool
3396
   */
3397 34
  public static function is_utf8($str)
3398
  {
3399 34
    $str = (string)$str;
3400
3401 34
    if (!isset($str[0])) {
3402 3
      return true;
3403
    }
3404
3405 32
    if (self::pcre_utf8_support() !== true) {
3406
3407
      // If even just the first character can be matched, when the /u
3408
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3409
      // invalid, nothing at all will match, even if the string contains
3410
      // some valid sequences
3411
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
3412
3413
    } else {
3414
3415 32
      $mState = 0; // cached expected number of octets after the current octet
3416
      // until the beginning of the next UTF8 character sequence
3417 32
      $mUcs4 = 0; // cached Unicode character
3418 32
      $mBytes = 1; // cached expected number of octets in the current sequence
3419 32
      $len = strlen($str);
3420
3421
      /** @noinspection ForeachInvariantsInspection */
3422 32
      for ($i = 0; $i < $len; $i++) {
3423 32
        $in = ord($str[$i]);
3424 32
        if ($mState == 0) {
3425
          // When mState is zero we expect either a US-ASCII character or a
3426
          // multi-octet sequence.
3427 32
          if (0 == (0x80 & $in)) {
3428
            // US-ASCII, pass straight through.
3429 30
            $mBytes = 1;
3430 32 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3431
            // First octet of 2 octet sequence.
3432 28
            $mUcs4 = $in;
3433 28
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3434 28
            $mState = 1;
3435 28
            $mBytes = 2;
3436 30
          } elseif (0xE0 == (0xF0 & $in)) {
3437
            // First octet of 3 octet sequence.
3438 13
            $mUcs4 = $in;
3439 13
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3440 13
            $mState = 2;
3441 13
            $mBytes = 3;
3442 23 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3443
            // First octet of 4 octet sequence.
3444 6
            $mUcs4 = $in;
3445 6
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3446 6
            $mState = 3;
3447 6
            $mBytes = 4;
3448 12
          } elseif (0xF8 == (0xFC & $in)) {
3449
            /* First octet of 5 octet sequence.
3450
            *
3451
            * This is illegal because the encoded codepoint must be either
3452
            * (a) not the shortest form or
3453
            * (b) outside the Unicode range of 0-0x10FFFF.
3454
            * Rather than trying to resynchronize, we will carry on until the end
3455
            * of the sequence and let the later error handling code catch it.
3456
            */
3457 3
            $mUcs4 = $in;
3458 3
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3459 3
            $mState = 4;
3460 3
            $mBytes = 5;
3461 7 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3462
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3463 3
            $mUcs4 = $in;
3464 3
            $mUcs4 = ($mUcs4 & 1) << 30;
3465 3
            $mState = 5;
3466 3
            $mBytes = 6;
3467 3
          } else {
3468
            /* Current octet is neither in the US-ASCII range nor a legal first
3469
             * octet of a multi-octet sequence.
3470
             */
3471 3
            return false;
3472
          }
3473 32
        } else {
3474
          // When mState is non-zero, we expect a continuation of the multi-octet
3475
          // sequence
3476 30
          if (0x80 == (0xC0 & $in)) {
3477
            // Legal continuation.
3478 28
            $shift = ($mState - 1) * 6;
3479 28
            $tmp = $in;
3480 28
            $tmp = ($tmp & 0x0000003F) << $shift;
3481 28
            $mUcs4 |= $tmp;
3482
            /**
3483
             * End of the multi-octet sequence. mUcs4 now contains the final
3484
             * Unicode code point to be output
3485
             */
3486 28
            if (0 == --$mState) {
3487
              /*
3488
              * Check for illegal sequences and code points.
3489
              */
3490
              // From Unicode 3.1, non-shortest form is illegal
3491
              if (
3492 28
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
3493 28
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
3494 28
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
3495 28
                  (4 < $mBytes) ||
3496
                  // From Unicode 3.2, surrogate characters are illegal.
3497 28
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
3498
                  // Code points outside the Unicode range are illegal.
3499 28
                  ($mUcs4 > 0x10FFFF)
3500 28
              ) {
3501 5
                return false;
3502
              }
3503
              // initialize UTF8 cache
3504 28
              $mState = 0;
3505 28
              $mUcs4 = 0;
3506 28
              $mBytes = 1;
3507 28
            }
3508 28
          } else {
3509
            /**
3510
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3511
             * Incomplete multi-octet sequence.
3512
             */
3513 13
            return false;
3514
          }
3515
        }
3516 32
      }
3517
3518 14
      return true;
3519
    }
3520
  }
3521
3522
  /**
3523
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3524
   * Decodes a JSON string
3525
   *
3526
   * @link http://php.net/manual/en/function.json-decode.php
3527
   *
3528
   * @param string $json    <p>
3529
   *                        The <i>json</i> string being decoded.
3530
   *                        </p>
3531
   *                        <p>
3532
   *                        This function only works with UTF-8 encoded strings.
3533
   *                        </p>
3534
   *                        <p>PHP implements a superset of
3535
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3536
   *                        only supports these values when they are nested inside an array or an object.
3537
   *                        </p>
3538
   * @param bool   $assoc   [optional] <p>
3539
   *                        When <b>TRUE</b>, returned objects will be converted into
3540
   *                        associative arrays.
3541
   *                        </p>
3542
   * @param int    $depth   [optional] <p>
3543
   *                        User specified recursion depth.
3544
   *                        </p>
3545
   * @param int    $options [optional] <p>
3546
   *                        Bitmask of JSON decode options. Currently only
3547
   *                        <b>JSON_BIGINT_AS_STRING</b>
3548
   *                        is supported (default is to cast large integers as floats)
3549
   *                        </p>
3550
   *
3551
   * @return mixed the value encoded in <i>json</i> in appropriate
3552
   * PHP type. Values true, false and
3553
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3554
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3555
   * <i>json</i> cannot be decoded or if the encoded
3556
   * data is deeper than the recursion limit.
3557
   */
3558 2
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3559
  {
3560 2
    $json = self::filter($json);
3561
3562 2
    if (Bootup::is_php('5.4') === true) {
3563 2
      $json = json_decode($json, $assoc, $depth, $options);
3564 2
    } else {
3565
      $json = json_decode($json, $assoc, $depth);
3566
    }
3567
3568 2
    return $json;
3569
  }
3570
3571
  /**
3572
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3573
   * Returns the JSON representation of a value
3574
   *
3575
   * @link http://php.net/manual/en/function.json-encode.php
3576
   *
3577
   * @param mixed $value   <p>
3578
   *                       The <i>value</i> being encoded. Can be any type except
3579
   *                       a resource.
3580
   *                       </p>
3581
   *                       <p>
3582
   *                       All string data must be UTF-8 encoded.
3583
   *                       </p>
3584
   *                       <p>PHP implements a superset of
3585
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3586
   *                       only supports these values when they are nested inside an array or an object.
3587
   *                       </p>
3588
   * @param int   $options [optional] <p>
3589
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3590
   *                       <b>JSON_HEX_TAG</b>,
3591
   *                       <b>JSON_HEX_AMP</b>,
3592
   *                       <b>JSON_HEX_APOS</b>,
3593
   *                       <b>JSON_NUMERIC_CHECK</b>,
3594
   *                       <b>JSON_PRETTY_PRINT</b>,
3595
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3596
   *                       <b>JSON_FORCE_OBJECT</b>,
3597
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3598
   *                       constants is described on
3599
   *                       the JSON constants page.
3600
   *                       </p>
3601
   * @param int   $depth   [optional] <p>
3602
   *                       Set the maximum depth. Must be greater than zero.
3603
   *                       </p>
3604
   *
3605
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3606
   */
3607 1
  public static function json_encode($value, $options = 0, $depth = 512)
3608
  {
3609 1
    $value = self::filter($value);
3610
3611 1
    if (Bootup::is_php('5.5')) {
3612
      $json = json_encode($value, $options, $depth);
3613
    } else {
3614 1
      $json = json_encode($value, $options);
3615
    }
3616
3617 1
    return $json;
3618
  }
3619
3620
  /**
3621
   * Makes string's first char lowercase.
3622
   *
3623
   * @param    string $str The input string
3624
   *
3625
   * @return   string The resulting string
3626
   */
3627 6
  public static function lcfirst($str)
3628
  {
3629 6
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3630
  }
3631
3632
  /**
3633
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3634
   *
3635
   * WARNING: This is much slower then "ltrim()" !!!!
3636
   *
3637
   * @param    string $str   The string to be trimmed
3638
   * @param    string $chars Optional characters to be stripped
3639
   *
3640
   * @return   string The string with unwanted characters stripped from the left
3641
   */
3642 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3643
  {
3644 24
    $str = (string)$str;
3645
3646 24
    if (!isset($str[0])) {
3647 2
      return '';
3648
    }
3649
3650 23
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3651
3652 23
    return preg_replace("/^{$chars}+/u", '', $str);
3653
  }
3654
3655
  /**
3656
   * Returns the UTF-8 character with the maximum code point in the given data.
3657
   *
3658
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3659
   *
3660
   * @return   string The character with the highest code point than others.
3661
   */
3662 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3663
  {
3664 1
    if (is_array($arg)) {
3665
      $arg = implode($arg);
3666
    }
3667
3668 1
    return self::chr(max(self::codepoints($arg)));
3669
  }
3670
3671
  /**
3672
   * Calculates and returns the maximum number of bytes taken by any
3673
   * UTF-8 encoded character in the given string.
3674
   *
3675
   * @param    string $str The original Unicode string.
3676
   *
3677
   * @return   int An array of byte lengths of each character.
3678
   */
3679 1
  public static function max_chr_width($str)
3680
  {
3681 1
    $bytes = self::chr_size_list($str);
3682 1
    if (count($bytes) > 0) {
3683 1
      return (int)max($bytes);
3684
    } else {
3685 1
      return 0;
3686
    }
3687
  }
3688
3689
  /**
3690
   * checks whether mbstring is available on the server
3691
   *
3692
   * @return   bool True if available, False otherwise
3693
   */
3694 2
  public static function mbstring_loaded()
3695
  {
3696 2
    $return = extension_loaded('mbstring');
3697
3698 2
    if ($return === true) {
3699 2
      mb_internal_encoding('UTF-8');
3700 2
    }
3701
3702 2
    return $return;
3703
  }
3704
3705
  /**
3706
   * Returns the UTF-8 character with the minimum code point in the given data.
3707
   *
3708
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3709
   *
3710
   * @return   string The character with the lowest code point than others.
3711
   */
3712 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3713
  {
3714 1
    if (is_array($arg)) {
3715
      $arg = implode($arg);
3716
    }
3717
3718 1
    return self::chr(min(self::codepoints($arg)));
3719
  }
3720
3721
  /**
3722
   * Normalize the encoding-name input.
3723
   *
3724
   * @param string $encodingLabel e.g.: ISO, UTF8, WINDOWS-1251 etc.
3725
   *
3726
   * @return string e.g.: ISO-8859-1, UTF-8, ISO-8859-5 etc.
3727
   */
3728 13
  public static function normalizeEncoding($encodingLabel)
3729
  {
3730 13
    $encoding = strtoupper($encodingLabel);
3731
3732 13
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3733
3734
    $equivalences = array(
3735 13
        'ISO88591'    => 'ISO-8859-1',
3736 13
        'ISO8859'     => 'ISO-8859-1',
3737 13
        'ISO'         => 'ISO-8859-1',
3738 13
        'LATIN1'      => 'ISO-8859-1',
3739 13
        'LATIN'       => 'ISO-8859-1',
3740 13
        'UTF16'       => 'UTF-16',
3741 13
        'UTF32'       => 'UTF-32',
3742 13
        'UTF8'        => 'UTF-8',
3743 13
        'UTF'         => 'UTF-8',
3744 13
        'UTF7'        => 'UTF-7',
3745 13
        'WIN1252'     => 'ISO-8859-1',
3746 13
        'WINDOWS1252' => 'ISO-8859-1',
3747 13
        'WINDOWS1251' => 'ISO-8859-5',
3748 13
    );
3749
3750 13
    if (empty($equivalences[$encoding])) {
3751 2
      return $encodingLabel;
3752
    }
3753
3754 13
    return $equivalences[$encoding];
3755
  }
3756
3757
  /**
3758
   * Normalize MS Word special characters.
3759
   *
3760
   * @param string $str The string to be normalized.
3761
   *
3762
   * @return string
3763
   */
3764 2
  public static function normalize_msword($str)
3765
  {
3766 2
    static $utf8MSWordKeys = null;
3767 2
    static $utf8MSWordValues = null;
3768
3769 2
    if ($utf8MSWordKeys === null) {
3770 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3771 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3772 1
    }
3773
3774 2
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3775
  }
3776
3777
  /**
3778
   * Normalize the whitespace.
3779
   *
3780
   * @param string $str                     The string to be normalized.
3781
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3782
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3783
   *
3784
   * @return string
3785
   */
3786 8
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3787
  {
3788 8
    static $whitespaces = array();
3789 8
    static $bidiUniCodeControls = null;
3790
3791 8
    $cacheKey = (int)$keepNonBreakingSpace;
3792
3793 8
    if (!isset($whitespaces[$cacheKey])) {
3794
3795 2
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3796
3797 2
      if ($keepNonBreakingSpace === true) {
3798
        /** @noinspection OffsetOperationsInspection */
3799 1
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3800 1
      }
3801
3802 2
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3803 2
    }
3804
3805 8
    if ($keepBidiUnicodeControls === false) {
3806 8
      if ($bidiUniCodeControls === null) {
3807 1
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3808 1
      }
3809
3810 8
      $str = str_replace($bidiUniCodeControls, '', $str);
3811 8
    }
3812
3813 8
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3814
  }
3815
3816
  /**
3817
   * Format a number with grouped thousands.
3818
   *
3819
   * @param float  $number
3820
   * @param int    $decimals
3821
   * @param string $dec_point
3822
   * @param string $thousands_sep
3823
   *
3824
   * @return string
3825
   */
3826 1
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3827
  {
3828 1
    if (Bootup::is_php('5.4') === true) {
3829 1
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
3830
        return str_replace(
3831
            array(
3832
                '.',
3833
                ',',
3834
            ),
3835
            array(
3836
                $dec_point,
3837
                $thousands_sep,
3838
            ),
3839
            number_format($number, $decimals, '.', ',')
3840
        );
3841
      }
3842 1
    }
3843
3844 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3845
  }
3846
3847
  /**
3848
   * Calculates Unicode code point of the given UTF-8 encoded character.
3849
   *
3850
   * @param    string $s The character of which to calculate code point.
3851
   *
3852
   * @return   int Unicode code point of the given character,<br />
3853
   *           0 on invalid UTF-8 byte sequence.
3854
   */
3855 15
  public static function ord($s)
3856
  {
3857 15
    if (!$s) {
3858 2
      return 0;
3859
    }
3860
3861 14
    $s = unpack('C*', substr($s, 0, 4));
3862 14
    $a = $s ? $s[1] : 0;
3863
3864 14
    if (0xF0 <= $a && isset($s[4])) {
3865 2
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3866
    }
3867
3868 13
    if (0xE0 <= $a && isset($s[3])) {
3869 7
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3870
    }
3871
3872 12
    if (0xC0 <= $a && isset($s[2])) {
3873 8
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3874
    }
3875
3876 10
    return $a;
3877
  }
3878
3879
  /**
3880
   * Parses the string into variables.
3881
   *
3882
   * WARNING: This differs from parse_str() by returning the results
3883
   *    instead of placing them in the local scope!
3884
   *
3885
   * @link http://php.net/manual/en/function.parse-str.php
3886
   *
3887
   * @param string $str     <p>
3888
   *                        The input string.
3889
   *                        </p>
3890
   * @param array  $result  <p>
3891
   *                        If the second parameter arr is present,
3892
   *                        variables are stored in this variable as array elements instead.
3893
   *                        </p>
3894
   *
3895
   * @return void
3896
   */
3897 1
  public static function parse_str($str, &$result)
3898
  {
3899
    // init
3900 1
    self::checkForSupport();
3901
3902 1
    $str = self::filter($str);
3903
3904 1
    mb_parse_str($str, $result);
3905 1
  }
3906
3907
  /**
3908
   * checks if \u modifier is available that enables Unicode support in PCRE.
3909
   *
3910
   * @return   bool True if support is available, false otherwise
3911
   */
3912 33
  public static function pcre_utf8_support()
3913
  {
3914
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3915 33
    return (bool)@preg_match('//u', '');
3916
  }
3917
3918
  /**
3919
   * Create an array containing a range of UTF-8 characters.
3920
   *
3921
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3922
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3923
   *
3924
   * @return   array
3925
   */
3926 1
  public static function range($var1, $var2)
3927
  {
3928 1
    if (!$var1 || !$var2) {
3929 1
      return array();
3930
    }
3931
3932 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3933
      $start = (int)$var1;
3934 1
    } elseif (ctype_xdigit($var1)) {
3935
      $start = (int)self::hex_to_int($var1);
3936
    } else {
3937 1
      $start = self::ord($var1);
3938
    }
3939
3940 1
    if (!$start) {
3941
      return array();
3942
    }
3943
3944 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3945
      $end = (int)$var2;
3946 1
    } elseif (ctype_xdigit($var2)) {
3947
      $end = (int)self::hex_to_int($var2);
3948
    } else {
3949 1
      $end = self::ord($var2);
3950
    }
3951
3952 1
    if (!$end) {
3953
      return array();
3954
    }
3955
3956 1
    return array_map(
3957
        array(
3958 1
            '\\voku\\helper\\UTF8',
3959 1
            'chr',
3960 1
        ),
3961 1
        range($start, $end)
3962 1
    );
3963
  }
3964
3965
  /**
3966
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3967
   *
3968
   * @param string $str
3969
   * @param bool   $utf8
3970
   * @param bool   $utf16
3971
   * @param bool   $utf32
3972
   *
3973
   * @return string
3974
   */
3975 7
  public static function removeBOM($str = '', $utf8 = true, $utf16 = true, $utf32 = true)
3976
  {
3977 7 View Code Duplication
    if ($utf8 === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3978
      // UTF-8
3979
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3980 7
      if (0 === strpos($str, @pack('CCC', 0xef, 0xbb, 0xbf)) || 0 === strpos($str, '')) {
3981 2
        $str = substr($str, 3);
3982 2
      }
3983 7
    }
3984
3985 7
    if ($utf32 === true) {
3986
      // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3987
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3988 3
      if (0 === strpos($str, @pack('CCCC', 0x00, 0x00, 0xfe, 0xff))) {
3989 1
        $str = substr($str, 4);
3990 1
      }
3991
3992
      // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3993
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3994 3
      if (0 === strpos($str, @pack('CCCC', 0xff, 0xfe, 0x00, 0x00))) {
3995 1
        $str = substr($str, 4);
3996 1
      }
3997 3
    }
3998
3999 7
    if ($utf16 === true) {
4000
      // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4001
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4002 3 View Code Duplication
      if (0 === strpos($str, @pack('CC', 0xfe, 0xff)) || 0 === strpos($str, 'þÿ')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4003 1
        $str = substr($str, 2);
4004 1
      }
4005
4006
      // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4007
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4008 3 View Code Duplication
      if (0 === strpos($str, @pack('CC', 0xff, 0xfe)) || 0 === strpos($str, 'ÿþ')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4009 1
        $str = substr($str, 2);
4010 1
      }
4011 3
    }
4012
4013 7
    return $str;
4014
  }
4015
4016
  /**
4017
   * Removes duplicate occurrences of a string in another string.
4018
   *
4019
   * @param    string       $str  The base string
4020
   * @param    string|array $what String to search for in the base string
4021
   *
4022
   * @return   string The result string with removed duplicates
4023
   */
4024 1
  public static function remove_duplicates($str, $what = ' ')
4025
  {
4026 1
    if (is_string($what)) {
4027 1
      $what = array($what);
4028 1
    }
4029
4030 1
    if (is_array($what)) {
4031 1
      foreach ($what as $item) {
4032 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4033 1
      }
4034 1
    }
4035
4036 1
    return $str;
4037
  }
4038
4039
  /**
4040
   * Remove Invisible Characters
4041
   *
4042
   * This prevents sandwiching null characters
4043
   * between ascii characters, like Java\0script.
4044
   *
4045
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4046
   *
4047
   * @param  string $str
4048
   * @param  bool   $url_encoded
4049
   *
4050
   * @return  string
4051
   */
4052 36
  public static function remove_invisible_characters($str, $url_encoded = true)
4053
  {
4054
    // init
4055 36
    $non_displayables = array();
4056
4057
    // every control character except newline (dec 10),
4058
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4059 36
    if ($url_encoded) {
4060 36
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4061 36
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4062 36
    }
4063
4064 36
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4065
4066
    do {
4067 36
      $str = preg_replace($non_displayables, '', $str, -1, $count);
4068 36
    } while ($count !== 0);
4069
4070 36
    return $str;
4071
  }
4072
4073
  /**
4074
   * replace diamond question mark (�)
4075
   *
4076
   * @param string $str
4077
   * @param string $unknown
4078
   *
4079
   * @return string
4080
   */
4081 36
  public static function replace_diamond_question_mark($str, $unknown = '?')
4082
  {
4083 36
    return str_replace(
4084
        array(
4085 36
            "\xEF\xBF\xBD",
4086 36
            '�',
4087 36
        ),
4088
        array(
4089 36
            $unknown,
4090 36
            $unknown,
4091 36
        ),
4092
        $str
4093 36
    );
4094
  }
4095
4096
  /**
4097
   * Strip whitespace or other characters from end of a UTF-8 string.
4098
   *
4099
   * WARNING: This is much slower then "rtrim()" !!!!
4100
   *
4101
   * @param    string $str   The string to be trimmed
4102
   * @param    string $chars Optional characters to be stripped
4103
   *
4104
   * @return   string The string with unwanted characters stripped from the right
4105
   */
4106 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4107
  {
4108 23
    $str = (string)$str;
4109
4110 23
    if (!isset($str[0])) {
4111 5
      return '';
4112
    }
4113
4114 19
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4115
4116 19
    return preg_replace("/{$chars}+$/u", '', $str);
4117
  }
4118
4119
  /**
4120
   * rxClass
4121
   *
4122
   * @param string $s
4123
   * @param string $class
4124
   *
4125
   * @return string
4126
   */
4127 40
  protected static function rxClass($s, $class = '')
4128
  {
4129 40
    static $rxClassCache = array();
4130
4131 40
    $cacheKey = $s . $class;
4132
4133 40
    if (isset($rxClassCache[$cacheKey])) {
4134 30
      return $rxClassCache[$cacheKey];
4135
    }
4136
4137 16
    $class = array($class);
4138
4139 16
    foreach (self::str_split($s) as $s) {
4140 15
      if ('-' === $s) {
4141
        $class[0] = '-' . $class[0];
4142 15
      } elseif (!isset($s[2])) {
4143 14
        $class[0] .= preg_quote($s, '/');
4144 15
      } elseif (1 === self::strlen($s)) {
4145 1
        $class[0] .= $s;
4146 1
      } else {
4147
        $class[] = $s;
4148
      }
4149 16
    }
4150
4151 16
    $class[0] = '[' . $class[0] . ']';
4152
4153 16
    if (1 === count($class)) {
4154 16
      $return = $class[0];
4155 16
    } else {
4156
      $return = '(?:' . implode('|', $class) . ')';
4157
    }
4158
4159 16
    $rxClassCache[$cacheKey] = $return;
4160
4161 16
    return $return;
4162
  }
4163
4164
  /**
4165
   * Echo native UTF8-Support libs, e.g. for debugging.
4166
   */
4167
  public static function showSupport()
4168
  {
4169
    foreach (self::$support as $utf8Support) {
4170
      echo $utf8Support . "\n<br>";
4171
    }
4172
  }
4173
4174
  /**
4175
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4176
   *
4177
   * @param    string $chr The Unicode character to be encoded as numbered entity.
4178
   *
4179
   * @return   string The HTML numbered entity.
4180
   */
4181 2
  public static function single_chr_html_encode($chr)
4182
  {
4183 2
    if (!$chr) {
4184 1
      return '';
4185
    }
4186
4187 2
    return '&#' . self::ord($chr) . ';';
4188
  }
4189
4190
  /**
4191
   * Convert a string to an array of Unicode characters.
4192
   *
4193
   * @param    string  $str       The string to split into array.
4194
   * @param    int     $length    Max character length of each array element.
4195
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4196
   *
4197
   * @return   array An array containing chunks of the string.
4198
   */
4199 25
  public static function split($str, $length = 1, $cleanUtf8 = false)
4200
  {
4201 25
    $str = (string)$str;
4202
4203 25
    if (!isset($str[0])) {
4204 5
      return array();
4205
    }
4206
4207
    // init
4208 24
    self::checkForSupport();
4209 24
    $str = (string)$str;
4210 24
    $ret = array();
4211
4212 24
    if (self::$support['pcre_utf8'] === true) {
4213
4214 24
      if ($cleanUtf8 === true) {
4215
        $str = self::clean($str);
4216
      }
4217
4218 24
      preg_match_all('/./us', $str, $retArray);
4219 24
      if (isset($retArray[0])) {
4220 24
        $ret = $retArray[0];
4221 24
      }
4222 24
      unset($retArray);
4223
4224 24
    } else {
4225
4226
      // fallback
4227
4228
      $len = strlen($str);
4229
4230
      /** @noinspection ForeachInvariantsInspection */
4231
      for ($i = 0; $i < $len; $i++) {
4232
        if (($str[$i] & "\x80") === "\x00") {
4233
          $ret[] = $str[$i];
4234
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4235
          if (($str[$i + 1] & "\xC0") === "\x80") {
4236
            $ret[] = $str[$i] . $str[$i + 1];
4237
4238
            $i++;
4239
          }
4240 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4241
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4242
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4243
4244
            $i += 2;
4245
          }
4246
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4247 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4248
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4249
4250
            $i += 3;
4251
          }
4252
        }
4253
      }
4254
    }
4255
4256 24
    if ($length > 1) {
4257 5
      $ret = array_chunk($ret, $length);
4258
4259 5
      $ret = array_map('implode', $ret);
4260 5
    }
4261
4262 24
    if (isset($ret[0]) && $ret[0] === '') {
4263
      return array();
4264
    }
4265
4266 24
    return $ret;
4267
  }
4268
4269
  /**
4270
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4271
   *
4272
   * @param string $str
4273
   *
4274
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4275
   *                      otherwise it will return false.
4276
   */
4277 3
  public static function str_detect_encoding($str)
4278
  {
4279
4280
    //
4281
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4282
    //
4283
4284 3
    if (self::is_binary($str)) {
4285 2
      if (self::is_utf16($str) == 1) {
4286 1
        return 'UTF-16LE';
4287 2
      } elseif (self::is_utf16($str) == 2) {
4288 1
        return 'UTF-16BE';
4289 2
      } elseif (self::is_utf32($str) == 1) {
4290
        return 'UTF-32LE';
4291 2
      } elseif (self::is_utf32($str) == 2) {
4292
        return 'UTF-32BE';
4293
      }
4294 2
    }
4295
4296
    //
4297
    // 2.) simple check for ASCII chars
4298
    //
4299
4300 3
    if (self::is_ascii($str) === true) {
4301 1
      return 'ASCII';
4302
    }
4303
4304
    //
4305
    // 3.) check via "mb_detect_encoding()"
4306
    //
4307
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
4308
4309
    $detectOrder = array(
4310 3
        'UTF-8',
4311 3
        'windows-1251',
4312 3
        'ISO-8859-1',
4313 3
        'ASCII',
4314 3
    );
4315 3
    self::checkForSupport();
4316 3
    $encoding = mb_detect_encoding($str, $detectOrder, true);
4317 3
    if ($encoding) {
4318
      if (
4319
          $encoding != 'UTF-8'
4320 3
          ||
4321 3
          ($encoding == 'UTF-8' && self::is_utf8($str) === true)
4322 3
      ) {
4323 3
        return $encoding;
4324
      }
4325
    }
4326
4327
    //
4328
    // 4.) check via "iconv()"
4329
    //
4330
4331
    $md5 = md5($str);
4332
    foreach (self::$iconvEncoding as $encodingTmp) {
4333
      # INFO: //IGNORE and //TRANSLIT still throw notice
4334
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4335
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4336
        return $encodingTmp;
4337
      }
4338
    }
4339
4340
    return false;
4341
  }
4342
4343
  /**
4344
   * str_ireplace
4345
   *
4346
   * @param string $search
4347
   * @param string $replace
4348
   * @param string $subject
4349
   * @param null   $count
4350
   *
4351
   * @return string
4352
   */
4353 13
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4354
  {
4355 13
    $search = (array)$search;
4356
4357
    /** @noinspection AlterInForeachInspection */
4358 13
    foreach ($search as &$s) {
4359 13
      if ('' === $s .= '') {
4360 1
        $s = '/^(?<=.)$/';
4361 1
      } else {
4362 12
        $s = '/' . preg_quote($s, '/') . '/ui';
4363
      }
4364 13
    }
4365
4366 13
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4367 13
    $count = $replace;
4368
4369 13
    return $subject;
4370
  }
4371
4372
  /**
4373
   * Limit the number of characters in a string, but also after the next word.
4374
   *
4375
   * @param  string $str
4376
   * @param  int    $length
4377
   * @param  string $strAddOn
4378
   *
4379
   * @return string
4380
   */
4381 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4382
  {
4383 1
    if (!isset($str[0])) {
4384
      return '';
4385
    }
4386
4387 1
    $length = (int)$length;
4388
4389 1
    if (self::strlen($str) <= $length) {
4390
      return $str;
4391
    }
4392
4393 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4394 1
      return self::substr($str, 0, $length - 1) . $strAddOn;
4395
    }
4396
4397 1
    $str = self::substr($str, 0, $length);
4398 1
    $array = explode(' ', $str);
4399 1
    array_pop($array);
4400 1
    $new_str = implode(' ', $array);
4401
4402 1
    if ($new_str == '') {
4403
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4404
    } else {
4405 1
      $str = $new_str . $strAddOn;
4406
    }
4407
4408 1
    return $str;
4409
  }
4410
4411
  /**
4412
   * Pad a UTF-8 string to given length with another string.
4413
   *
4414
   * @param    string $input      The input string
4415
   * @param    int    $pad_length The length of return string
4416
   * @param    string $pad_string String to use for padding the input string
4417
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4418
   *
4419
   * @return   string Returns the padded string
4420
   */
4421 2
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4422
  {
4423 2
    $input_length = self::strlen($input);
4424
4425 2
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4426 2
      $ps_length = self::strlen($pad_string);
4427
4428 2
      $diff = $pad_length - $input_length;
4429
4430
      switch ($pad_type) {
4431 2 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4432 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4433 2
          $pre = self::substr($pre, 0, $diff);
4434 2
          $post = '';
4435 2
          break;
4436
4437 2
        case STR_PAD_BOTH:
4438 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4439 2
          $pre = self::substr($pre, 0, (int)$diff / 2);
4440 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4441 2
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4442 2
          break;
4443
4444 2
        case STR_PAD_RIGHT:
4445 2 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4446 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4447 2
          $post = self::substr($post, 0, $diff);
4448 2
          $pre = '';
4449 2
      }
4450
4451 2
      return $pre . $input . $post;
4452
    }
4453
4454 2
    return $input;
4455
  }
4456
4457
  /**
4458
   * Repeat a string.
4459
   *
4460
   * @param string $input      <p>
4461
   *                           The string to be repeated.
4462
   *                           </p>
4463
   * @param int    $multiplier <p>
4464
   *                           Number of time the input string should be
4465
   *                           repeated.
4466
   *                           </p>
4467
   *                           <p>
4468
   *                           multiplier has to be greater than or equal to 0.
4469
   *                           If the multiplier is set to 0, the function
4470
   *                           will return an empty string.
4471
   *                           </p>
4472
   *
4473
   * @return string the repeated string.
4474
   */
4475 1
  public static function str_repeat($input, $multiplier)
4476
  {
4477 1
    $input = self::filter($input);
4478
4479 1
    return str_repeat($input, $multiplier);
4480
  }
4481
4482
  /**
4483
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4484
   *
4485
   * (PHP 4, PHP 5)<br/>
4486
   * Replace all occurrences of the search string with the replacement string
4487
   *
4488
   * @link http://php.net/manual/en/function.str-replace.php
4489
   *
4490
   * @param mixed $search  <p>
4491
   *                       The value being searched for, otherwise known as the needle.
4492
   *                       An array may be used to designate multiple needles.
4493
   *                       </p>
4494
   * @param mixed $replace <p>
4495
   *                       The replacement value that replaces found search
4496
   *                       values. An array may be used to designate multiple replacements.
4497
   *                       </p>
4498
   * @param mixed $subject <p>
4499
   *                       The string or array being searched and replaced on,
4500
   *                       otherwise known as the haystack.
4501
   *                       </p>
4502
   *                       <p>
4503
   *                       If subject is an array, then the search and
4504
   *                       replace is performed with every entry of
4505
   *                       subject, and the return value is an array as
4506
   *                       well.
4507
   *                       </p>
4508
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4509
   *
4510
   * @return mixed This function returns a string or an array with the replaced values.
4511
   */
4512 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4513
  {
4514 12
    return str_replace($search, $replace, $subject, $count);
4515
  }
4516
4517
  /**
4518
   * Shuffles all the characters in the string.
4519
   *
4520
   * @param    string $str The input string
4521
   *
4522
   * @return   string The shuffled string.
4523
   */
4524
  public static function str_shuffle($str)
4525
  {
4526
    $array = self::split($str);
4527
4528
    shuffle($array);
4529
4530
    return implode('', $array);
4531
  }
4532
4533
  /**
4534
   * Sort all characters according to code points.
4535
   *
4536
   * @param    string $str    A UTF-8 string.
4537
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4538
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4539
   *
4540
   * @return   string String of sorted characters
4541
   */
4542 1
  public static function str_sort($str, $unique = false, $desc = false)
4543
  {
4544 1
    $array = self::codepoints($str);
4545
4546 1
    if ($unique) {
4547 1
      $array = array_flip(array_flip($array));
4548 1
    }
4549
4550 1
    if ($desc) {
4551 1
      arsort($array);
4552 1
    } else {
4553 1
      asort($array);
4554
    }
4555
4556 1
    return self::string($array);
4557
  }
4558
4559
  /**
4560
   * Convert a string to an array.
4561
   *
4562
   * @param string $str
4563
   * @param int    $len
4564
   *
4565
   * @return array
4566
   */
4567 17
  public static function str_split($str, $len = 1)
4568
  {
4569
    // init
4570 17
    self::checkForSupport();
4571
4572 17
    if (1 > $len = (int)$len) {
4573
      $len = func_get_arg(1);
4574
4575
      return str_split($str, $len);
4576
    }
4577
4578 17
    if (self::$support['intl'] === true) {
4579 17
      $a = array();
4580 17
      $p = 0;
4581 17
      $l = strlen($str);
4582 17
      while ($p < $l) {
4583 16
        $a[] = grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4584 16
      }
4585 17
    } else {
4586
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4587
      $a = $a[0];
4588
    }
4589
4590 17
    if (1 == $len) {
4591 17
      return $a;
4592
    }
4593
4594 1
    $arrayOutput = array();
4595 1
    $p = -1;
4596
4597
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4598 1
    foreach ($a as $l => $a) {
4599 1
      if ($l % $len) {
4600 1
        $arrayOutput[$p] .= $a;
4601 1
      } else {
4602 1
        $arrayOutput[++$p] = $a;
4603
      }
4604 1
    }
4605
4606 1
    return $arrayOutput;
4607
  }
4608
4609
  /**
4610
   * Get a binary representation of a specific character.
4611
   *
4612
   * @param   string $str The input character.
4613
   *
4614
   * @return  string
4615
   */
4616 1
  public static function str_to_binary($str)
4617
  {
4618 1
    $str = (string)$str;
4619
4620 1
    if (!isset($str[0])) {
4621
      return '';
4622
    }
4623
4624
    // init
4625 1
    $out = null;
4626 1
    $max = strlen($str);
4627
4628
    /** @noinspection ForeachInvariantsInspection */
4629 1
    for ($i = 0; $i < $max; ++$i) {
4630 1
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4631 1
    }
4632
4633 1
    return $out;
4634
  }
4635
4636
  /**
4637
   * US-ASCII transliterations of Unicode text.
4638
   *
4639
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4640
   * Warning: you should only pass this well formed UTF-8!
4641
   * Be aware it works by making a copy of the input string which it appends transliterated
4642
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4643
   * requiring up to the same amount again as the input string
4644
   *
4645
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4646
   *
4647
   * @author <[email protected]>
4648
   *
4649
   * @param string $str     UTF-8 string to convert
4650
   * @param string $unknown Character use if character unknown. (default is ?)
4651
   *
4652
   * @return string US-ASCII string
4653
   */
4654 8
  public static function str_transliterate($str, $unknown = '?')
4655
  {
4656 8
    static $UTF8_TO_ASCII;
4657
4658 8
    $str = (string)$str;
4659
4660 8
    if (!isset($str[0])) {
4661 2
      return '';
4662
    }
4663
4664 7
    $str = self::clean($str);
4665
4666 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4667 7
    $chars = $ar[0];
4668 7
    foreach ($chars as &$c) {
4669
4670 7
      $ordC0 = ord($c[0]);
4671
4672 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4673 6
        continue;
4674
      }
4675
4676 4
      $ordC1 = ord($c[1]);
4677
4678
      // ASCII - next please
4679 4
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4680 4
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4681 4
      }
4682
4683 4
      if ($ordC0 >= 224) {
4684 3
        $ordC2 = ord($c[2]);
4685
4686 3
        if ($ordC0 <= 239) {
4687 3
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4688 3
        }
4689
4690 3
        if ($ordC0 >= 240) {
4691 1
          $ordC3 = ord($c[3]);
4692
4693 1
          if ($ordC0 <= 247) {
4694 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4695 1
          }
4696
4697 1
          if ($ordC0 >= 248) {
4698
            $ordC4 = ord($c[4]);
4699
4700 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4701
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4702
            }
4703
4704
            if ($ordC0 >= 252) {
4705
              $ordC5 = ord($c[5]);
4706
4707 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4708
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4709
              }
4710
            }
4711
          }
4712 1
        }
4713 3
      }
4714
4715 4
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4716
        $c = $unknown;
4717
        continue;
4718
      }
4719
4720 4
      if (!isset($ord)) {
4721
        $c = $unknown;
4722
        continue;
4723
      }
4724
4725 4
      $bank = $ord >> 8;
4726 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4727 2
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4728 2
        if (file_exists($bankfile)) {
4729
          /** @noinspection PhpIncludeInspection */
4730 2
          include $bankfile;
4731 2
        } else {
4732 1
          $UTF8_TO_ASCII[$bank] = array();
4733
        }
4734 2
      }
4735
4736 4
      $newchar = $ord & 255;
4737 4
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4738 4
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4739 4
      } else {
4740 1
        $c = $unknown;
4741
      }
4742 7
    }
4743
4744 7
    return implode('', $chars);
4745
  }
4746
4747
  /**
4748
   * Counts number of words in the UTF-8 string.
4749
   *
4750
   * @param string $s The input string.
4751
   * @param int    $format
4752
   * @param string $charlist
4753
   *
4754
   * @return array|float|string The number of words in the string
4755
   */
4756 1
  public static function str_word_count($s, $format = 0, $charlist = '')
4757
  {
4758 1
    $charlist = self::rxClass($charlist, '\pL');
4759 1
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
4760 1
    $charlist = array();
4761 1
    $len = count($s);
4762
4763 1
    if (1 == $format) {
4764
      for ($i = 1; $i < $len; $i += 2) {
4765
        $charlist[] = $s[$i];
4766
      }
4767 1
    } elseif (2 == $format) {
4768
      self::checkForSupport();
4769
4770
      $offset = self::strlen($s[0]);
4771
      for ($i = 1; $i < $len; $i += 2) {
4772
        $charlist[$offset] = $s[$i];
4773
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
4774
      }
4775
    } else {
4776 1
      $charlist = ($len - 1) / 2;
4777
    }
4778
4779 1
    return $charlist;
4780
  }
4781
4782
  /**
4783
   * Case-insensitive string comparison.
4784
   *
4785
   * @param string $str1
4786
   * @param string $str2
4787
   *
4788
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4789
   */
4790 8
  public static function strcasecmp($str1, $str2)
4791
  {
4792 8
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4793
  }
4794
4795
  /**
4796
   * String comparison.
4797
   *
4798
   * @param string $str1
4799
   * @param string $str2
4800
   *
4801
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4802
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4803
   *              <strong>0</strong> if they are equal.
4804
   */
4805 8
  public static function strcmp($str1, $str2)
4806
  {
4807 8
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4808 5
        Normalizer::normalize($str1, Normalizer::NFD),
4809 5
        Normalizer::normalize($str2, Normalizer::NFD)
4810 8
    );
4811
  }
4812
4813
  /**
4814
   * Find length of initial segment not matching mask.
4815
   *
4816
   * @param string $str
4817
   * @param string $charlist
4818
   * @param int    $start
4819
   * @param int    $len
4820
   *
4821
   * @return int|null
4822
   */
4823 5
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
4824
  {
4825 5
    if ('' === $charlist .= '') {
4826
      return null;
4827
    }
4828
4829 5
    if ($start || 2147483647 != $len) {
4830
      $str = (string)self::substr($str, $start, $len);
4831
    } else {
4832 5
      $str = (string)$str;
4833
    }
4834
4835
    /* @var $len array */
4836 5
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
4837 5
      return self::strlen($len[1]);
4838
    } else {
4839
      return self::strlen($str);
4840
    }
4841
  }
4842
4843
  /**
4844
   * Makes a UTF-8 string from code points.
4845
   *
4846
   * @param    array $array Integer or Hexadecimal codepoints
4847
   *
4848
   * @return   string UTF-8 encoded string
4849
   */
4850 2
  public static function string($array)
4851
  {
4852 2
    return implode(
4853 2
        array_map(
4854
            array(
4855 2
                '\\voku\\helper\\UTF8',
4856 2
                'chr',
4857 2
            ),
4858
            $array
4859 2
        )
4860 2
    );
4861
  }
4862
4863
  /**
4864
   * Checks if string starts with "UTF-8 BOM" character.
4865
   *
4866
   * @param    string $str The input string.
4867
   *
4868
   * @return   bool True if the string has BOM at the start, False otherwise.
4869
   */
4870 1
  public static function string_has_bom($str)
4871
  {
4872 1
    return self::is_bom(substr($str, 0, 3));
4873
  }
4874
4875
  /**
4876
   * Strip HTML and PHP tags from a string.
4877
   *
4878
   * @link http://php.net/manual/en/function.strip-tags.php
4879
   *
4880
   * @param string $str            <p>
4881
   *                               The input string.
4882
   *                               </p>
4883
   * @param string $allowable_tags [optional] <p>
4884
   *                               You can use the optional second parameter to specify tags which should
4885
   *                               not be stripped.
4886
   *                               </p>
4887
   *                               <p>
4888
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4889
   *                               can not be changed with allowable_tags.
4890
   *                               </p>
4891
   *
4892
   * @return string the stripped string.
4893
   */
4894 2
  public static function strip_tags($str, $allowable_tags = null)
4895
  {
4896
    //clean broken utf8
4897 2
    $str = self::clean($str);
4898
4899 2
    return strip_tags($str, $allowable_tags);
4900
  }
4901
4902
  /**
4903
   * Finds position of first occurrence of a string within another, case insensitive.
4904
   *
4905
   * @link http://php.net/manual/en/function.mb-stripos.php
4906
   *
4907
   * @param string  $haystack  <p>
4908
   *                           The string from which to get the position of the first occurrence
4909
   *                           of needle
4910
   *                           </p>
4911
   * @param string  $needle    <p>
4912
   *                           The string to find in haystack
4913
   *                           </p>
4914
   * @param int     $offset    [optional] <p>
4915
   *                           The position in haystack
4916
   *                           to start searching
4917
   *                           </p>
4918
   * @param string  $encoding
4919
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4920
   *
4921
   * @return int Return the numeric position of the first occurrence of
4922
   * needle in the haystack
4923
   * string, or false if needle is not found.
4924
   */
4925 8
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4926
  {
4927 8
    $haystack = (string)$haystack;
4928 8
    $needle = (string)$needle;
4929
4930 8
    if (!isset($haystack[0]) || !isset($needle[0])) {
4931 2
      return false;
4932
    }
4933
4934
    // init
4935 7
    self::checkForSupport();
4936
4937 7
    if ($cleanUtf8 === true) {
4938 1
      $haystack = self::clean($haystack);
4939 1
      $needle = self::clean($needle);
4940 1
    }
4941
4942
    // INFO: this is only a fallback for old versions
4943 7
    if ($encoding === true || $encoding === false) {
4944 1
      $encoding = 'UTF-8';
4945 1
    }
4946
4947 7
    return mb_stripos($haystack, $needle, $offset, $encoding);
4948
  }
4949
4950
  /**
4951
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4952
   *
4953
   * @param string $str
4954
   * @param string $needle
4955
   * @param bool   $before_needle
4956
   *
4957
   * @return false|string
4958
   */
4959 7
  public static function stristr($str, $needle, $before_needle = false)
4960
  {
4961 7
    if ('' === $needle .= '') {
4962 2
      return false;
4963
    }
4964
4965
    // init
4966 5
    self::checkForSupport();
4967
4968 5
    return mb_stristr($str, $needle, $before_needle, 'UTF-8');
4969
  }
4970
4971
  /**
4972
   * Get the string length, not the byte-length!
4973
   *
4974
   * @link     http://php.net/manual/en/function.mb-strlen.php
4975
   *
4976
   * @param string  $str       The string being checked for length.
4977
   * @param string  $encoding  Set the charset for e.g. "mb_" function
4978
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4979
   *
4980
   * @return int the number of characters in
4981
   *           string str having character encoding
4982
   *           encoding. A multi-byte character is
4983
   *           counted as 1.
4984
   */
4985 66
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4986
  {
4987 66
    $str = (string)$str;
4988
4989 66
    if (!isset($str[0])) {
4990 4
      return 0;
4991
    }
4992
4993
    // init
4994 65
    self::checkForSupport();
4995
4996
    // INFO: this is only a fallback for old versions
4997 65
    if ($encoding === true || $encoding === false) {
4998
      $encoding = 'UTF-8';
4999
    }
5000
5001 65
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5002
      $str = self::clean($str);
5003
    }
5004
5005 65
    return mb_strlen($str, $encoding);
5006
  }
5007
5008
  /**
5009
   * Case insensitive string comparisons using a "natural order" algorithm.
5010
   *
5011
   * @param string $str1
5012
   * @param string $str2
5013
   *
5014
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
5015
   *             str1 is greater than str2, and 0 if they are equal.
5016
   */
5017 1
  public static function strnatcasecmp($str1, $str2)
5018
  {
5019 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5020
  }
5021
5022
  /**
5023
   * String comparisons using a "natural order" algorithm.
5024
   *
5025
   * @param string $str1
5026
   * @param string $str2
5027
   *
5028
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
5029
   *             str1 is greater than str2, and 0 if they are equal.
5030
   */
5031 2
  public static function strnatcmp($str1, $str2)
5032
  {
5033 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5034
  }
5035
5036
  /**
5037
   * Case-insensitive string comparison of the first n characters.
5038
   *
5039
   * @param string $str1
5040
   * @param string $str2
5041
   * @param int    $len
5042
   *
5043
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5044
   */
5045
  public static function strncasecmp($str1, $str2, $len)
5046
  {
5047
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5048
  }
5049
5050
  /**
5051
   * Comparison of the first n characters.
5052
   *
5053
   * @param string $str1
5054
   * @param string $str2
5055
   * @param int    $len
5056
   *
5057
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
5058
   *              <strong>> 0</strong> if str1 is greater than str2<br />
5059
   *              <strong>0</strong> if they are equal
5060
   */
5061
  public static function strncmp($str1, $str2, $len)
5062
  {
5063
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5064
  }
5065
5066
  /**
5067
   * Search a string for any of a set of characters.
5068
   *
5069
   * @param string $s
5070
   * @param string $charList
5071
   *
5072
   * @return string|false
5073
   */
5074
  public static function strpbrk($s, $charList)
5075
  {
5076
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
5077
      return substr($s, strpos($s, $m[0]));
5078
    } else {
5079
      return false;
5080
    }
5081
  }
5082
5083
  /**
5084
   * Find position of first occurrence of string in a string.
5085
   *
5086
   * @link http://php.net/manual/en/function.mb-strpos.php
5087
   *
5088
   * @param string  $haystack     <p>
5089
   *                              The string being checked.
5090
   *                              </p>
5091
   * @param string  $needle       <p>
5092
   *                              The position counted from the beginning of haystack.
5093
   *                              </p>
5094
   * @param int     $offset       [optional] <p>
5095
   *                              The search offset. If it is not specified, 0 is used.
5096
   *                              </p>
5097
   * @param string  $encoding
5098
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5099
   *
5100
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5101
   *             If needle is not found it returns false.
5102
   */
5103 11
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5104
  {
5105 11
    $haystack = (string)$haystack;
5106 11
    $needle = (string)$needle;
5107
5108 11
    if (!isset($haystack[0]) || !isset($needle[0])) {
5109 2
      return false;
5110
    }
5111
5112
    // init
5113 10
    self::checkForSupport();
5114 10
    $offset = (int)$offset;
5115
5116
    // iconv and mbstring do not support integer $needle
5117
5118 10
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5119
      $needle = self::chr($needle);
5120
    }
5121
5122 10
    if ($cleanUtf8 === true) {
5123
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5124
      // iconv_strpos is not tolerant to invalid characters
5125
5126 1
      $needle = self::clean((string)$needle);
5127 1
      $haystack = self::clean($haystack);
5128 1
    }
5129
5130 10
    if (self::$support['mbstring'] === true) {
5131
5132
      // INFO: this is only a fallback for old versions
5133 10
      if ($encoding === true || $encoding === false) {
5134 1
        $encoding = 'UTF-8';
5135 1
      }
5136
5137 10
      return mb_strpos($haystack, $needle, $offset, $encoding);
5138
    }
5139
5140
    if (self::$support['iconv'] === true) {
5141
      return grapheme_strpos($haystack, $needle, $offset);
5142
    }
5143
5144
    if ($offset > 0) {
5145
      $haystack = self::substr($haystack, $offset);
5146
    }
5147
5148 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5149
      $left = substr($haystack, 0, $pos);
5150
5151
      // negative offset not supported in PHP strpos(), ignoring
5152
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5153
    }
5154
5155
    return false;
5156
  }
5157
5158
  /**
5159
   * Finds the last occurrence of a character in a string within another.
5160
   *
5161
   * @link http://php.net/manual/en/function.mb-strrchr.php
5162
   *
5163
   * @param string $haystack <p>
5164
   *                         The string from which to get the last occurrence
5165
   *                         of needle
5166
   *                         </p>
5167
   * @param string $needle   <p>
5168
   *                         The string to find in haystack
5169
   *                         </p>
5170
   * @param bool   $part     [optional] <p>
5171
   *                         Determines which portion of haystack
5172
   *                         this function returns.
5173
   *                         If set to true, it returns all of haystack
5174
   *                         from the beginning to the last occurrence of needle.
5175
   *                         If set to false, it returns all of haystack
5176
   *                         from the last occurrence of needle to the end,
5177
   *                         </p>
5178
   * @param string $encoding [optional] <p>
5179
   *                         Character encoding name to use.
5180
   *                         If it is omitted, internal character encoding is used.
5181
   *                         </p>
5182
   *
5183
   * @return string the portion of haystack.
5184
   * or false if needle is not found.
5185
   */
5186 1
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5187
  {
5188 1
    self::checkForSupport();
5189
5190 1
    return mb_strrchr($haystack, $needle, $part, $encoding);
5191
  }
5192
5193
  /**
5194
   * Reverses characters order in the string.
5195
   *
5196
   * @param    string $str The input string
5197
   *
5198
   * @return   string The string with characters in the reverse sequence
5199
   */
5200 4
  public static function strrev($str)
5201
  {
5202 4
    return implode(array_reverse(self::split($str)));
5203
  }
5204
5205
  /**
5206
   * Finds the last occurrence of a character in a string within another, case insensitive.
5207
   *
5208
   * @link http://php.net/manual/en/function.mb-strrichr.php
5209
   *
5210
   * @param string $haystack <p>
5211
   *                         The string from which to get the last occurrence
5212
   *                         of needle
5213
   *                         </p>
5214
   * @param string $needle   <p>
5215
   *                         The string to find in haystack
5216
   *                         </p>
5217
   * @param bool   $part     [optional] <p>
5218
   *                         Determines which portion of haystack
5219
   *                         this function returns.
5220
   *                         If set to true, it returns all of haystack
5221
   *                         from the beginning to the last occurrence of needle.
5222
   *                         If set to false, it returns all of haystack
5223
   *                         from the last occurrence of needle to the end,
5224
   *                         </p>
5225
   * @param string $encoding [optional] <p>
5226
   *                         Character encoding name to use.
5227
   *                         If it is omitted, internal character encoding is used.
5228
   *                         </p>
5229
   *
5230
   * @return string the portion of haystack.
5231
   * or false if needle is not found.
5232
   */
5233 1
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5234
  {
5235 1
    self::checkForSupport();
5236
5237 1
    return mb_strrichr($haystack, $needle, $part, $encoding);
5238
  }
5239
5240
  /**
5241
   * Find position of last occurrence of a case-insensitive string.
5242
   *
5243
   * @param    string $haystack The string to look in
5244
   * @param    string $needle   The string to look for
5245
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5246
   *
5247
   * @return   int The position of offset
5248
   */
5249 1
  public static function strripos($haystack, $needle, $offset = 0)
5250
  {
5251 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5252
  }
5253
5254
  /**
5255
   * Find position of last occurrence of a string in a string.
5256
   *
5257
   * @link http://php.net/manual/en/function.mb-strrpos.php
5258
   *
5259
   * @param string  $haystack     <p>
5260
   *                              The string being checked, for the last occurrence
5261
   *                              of needle
5262
   *                              </p>
5263
   * @param string  $needle       <p>
5264
   *                              The string to find in haystack.
5265
   *                              </p>
5266
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
5267
   *                              the string. Negative values will stop searching at an arbitrary point
5268
   *                              prior to the end of the string.
5269
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
5270
   *
5271
   * @return int the numeric position of
5272
   * the last occurrence of needle in the
5273
   * haystack string. If
5274
   * needle is not found, it returns false.
5275
   */
5276 10
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5277
  {
5278 10
    $haystack = (string)$haystack;
5279 10
    $needle = (string)$needle;
5280
5281 10
    if (!isset($haystack[0]) || !isset($needle[0])) {
5282 2
      return false;
5283
    }
5284
5285
    // init
5286 9
    self::checkForSupport();
5287
5288 9
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5289
      $needle = self::chr($needle);
5290
    }
5291
5292 9
    $needle = (string)$needle;
5293 9
    $offset = (int)$offset;
5294
5295 9
    if ($cleanUtf8 === true) {
5296
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5297
5298 1
      $needle = self::clean($needle);
5299 1
      $haystack = self::clean($haystack);
5300 1
    }
5301
5302 9
    if (self::$support['mbstring'] === true) {
5303 9
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5304
    }
5305
5306
    if (self::$support['iconv'] === true) {
5307
      return grapheme_strrpos($haystack, $needle, $offset);
5308
    }
5309
5310
    // fallback
5311
5312
    if ($offset > 0) {
5313
      $haystack = self::substr($haystack, $offset);
5314
    } elseif ($offset < 0) {
5315
      $haystack = self::substr($haystack, 0, $offset);
5316
    }
5317
5318 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5319
      $left = substr($haystack, 0, $pos);
5320
5321
      // negative offset not supported in PHP strpos(), ignoring
5322
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5323
    }
5324
5325
    return false;
5326
  }
5327
5328
  /**
5329
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5330
   * mask.
5331
   *
5332
   * @param string $s
5333
   * @param string $mask
5334
   * @param int    $start
5335
   * @param int    $len
5336
   *
5337
   * @return int|null
5338
   */
5339 6
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
5340
  {
5341 6
    if ($start || 2147483647 != $len) {
5342
      $s = self::substr($s, $start, $len);
5343
    }
5344
5345 6
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
5346
  }
5347
5348
  /**
5349
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5350
   *
5351
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5352
   *
5353
   * @param string $haystack      <p>
5354
   *                              The input string. Must be valid UTF-8.
5355
   *                              </p>
5356
   * @param string $needle        <p>
5357
   *                              The string to look for. Must be valid UTF-8.
5358
   *                              </p>
5359
   * @param bool   $before_needle [optional] <p>
5360
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5361
   *                              haystack before the first occurrence of the needle (excluding the needle).
5362
   *                              </p>
5363
   *
5364
   * @return string the portion of string, or FALSE if needle is not found.
5365
   */
5366 1
  public static function strstr($haystack, $needle, $before_needle = false)
5367
  {
5368 1
    self::checkForSupport();
5369
5370 1
    return grapheme_strstr($haystack, $needle, $before_needle);
5371
  }
5372
5373
  /**
5374
   * Unicode transformation for case-less matching.
5375
   *
5376
   * @link http://unicode.org/reports/tr21/tr21-5.html
5377
   *
5378
   * @param string $str
5379
   * @param bool   $full
5380
   *
5381
   * @return string
5382
   */
5383 10
  public static function strtocasefold($str, $full = true)
5384
  {
5385 10
    static $fullCaseFold = null;
5386 10
    static $commonCaseFoldKeys = null;
5387 10
    static $commonCaseFoldValues = null;
5388
5389 10
    if ($commonCaseFoldKeys === null) {
5390 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5391 1
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5392 1
    }
5393
5394 10
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5395
5396 10
    if ($full) {
5397
5398 10
      if ($fullCaseFold === null) {
5399 1
        $fullCaseFold = self::getData('caseFolding_full');
5400 1
      }
5401
5402
      /** @noinspection OffsetOperationsInspection */
5403 10
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5404 10
    }
5405
5406 10
    $str = self::clean($str);
5407
5408 10
    return self::strtolower($str);
5409
  }
5410
5411
  /**
5412
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5413
   * Make a string lowercase.
5414
   *
5415
   * @link http://php.net/manual/en/function.mb-strtolower.php
5416
   *
5417
   * @param string $str <p>
5418
   *                    The string being lowercased.
5419
   *                    </p>
5420
   * @param string $encoding
5421
   *
5422
   * @return string str with all alphabetic characters converted to lowercase.
5423
   */
5424 20
  public static function strtolower($str, $encoding = 'UTF-8')
5425
  {
5426 20
    $str = (string)$str;
5427
5428 20
    if (!isset($str[0])) {
5429 5
      return '';
5430
    }
5431
5432
    // init
5433 18
    self::checkForSupport();
5434
5435 18
    return mb_strtolower($str, $encoding);
5436
  }
5437
5438
  /**
5439
   * Generic case sensitive transformation for collation matching.
5440
   *
5441
   * @param string $s
5442
   *
5443
   * @return string
5444
   */
5445 3
  protected static function strtonatfold($s)
5446
  {
5447 3
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
5448
  }
5449
5450
  /**
5451
   * Make a string uppercase.
5452
   *
5453
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5454
   *
5455
   * @param string $str <p>
5456
   *                    The string being uppercased.
5457
   *                    </p>
5458
   * @param string $encoding
5459
   *
5460
   * @return string str with all alphabetic characters converted to uppercase.
5461
   */
5462 16
  public static function strtoupper($str, $encoding = 'UTF-8')
5463
  {
5464 16
    $str = (string)$str;
5465
5466 16
    if (!isset($str[0])) {
5467 4
      return '';
5468
    }
5469
5470
    // init
5471 15
    self::checkForSupport();
5472
5473 15
    if (self::$support['mbstring'] === true) {
5474 15
      return mb_strtoupper($str, $encoding);
5475
    } else {
5476
5477
      // fallback
5478
5479
      static $caseTableKeys = null;
5480
      static $caseTableValues = null;
5481
5482
      if ($caseTableKeys === null) {
5483
        $caseTable = self::case_table();
5484
        $caseTableKeys = array_keys($caseTable);
5485
        $caseTableValues = array_values($caseTable);
5486
      }
5487
5488
      $str = self::clean($str);
5489
5490
      return str_replace($caseTableKeys, $caseTableValues, $str);
5491
    }
5492
  }
5493
5494
  /**
5495
   * Translate characters or replace sub-strings.
5496
   *
5497
   * @param string $s
5498
   * @param string $from
5499
   * @param string $to
5500
   *
5501
   * @return string
5502
   */
5503 1
  public static function strtr($s, $from, $to = INF)
5504
  {
5505 1
    if (INF !== $to) {
5506
      $from = self::str_split($from);
5507
      $to = self::str_split($to);
5508
      $a = count($from);
5509
      $b = count($to);
5510
5511
      if ($a > $b) {
5512
        $from = array_slice($from, 0, $b);
5513
      } elseif ($a < $b) {
5514
        $to = array_slice($to, 0, $a);
5515
      }
5516
5517
      $from = array_combine($from, $to);
5518
    }
5519
5520 1
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5503 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5521
  }
5522
5523
  /**
5524
   * Return the width of a string.
5525
   *
5526
   * @param string $s
5527
   *
5528
   * @return int
5529
   */
5530 1
  public static function strwidth($s)
5531
  {
5532
    // init
5533 1
    self::checkForSupport();
5534
5535 1
    return mb_strwidth($s, 'UTF-8');
5536
  }
5537
5538
  /**
5539
   * Get part of a string.
5540
   *
5541
   * @link http://php.net/manual/en/function.mb-substr.php
5542
   *
5543
   * @param string  $str       <p>
5544
   *                           The string being checked.
5545
   *                           </p>
5546
   * @param int     $start     <p>
5547
   *                           The first position used in str.
5548
   *                           </p>
5549
   * @param int     $length    [optional] <p>
5550
   *                           The maximum length of the returned string.
5551
   *                           </p>
5552
   * @param string  $encoding
5553
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5554
   *
5555
   * @return string mb_substr returns the portion of
5556
   * str specified by the start and length parameters.
5557
   */
5558 39
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5559
  {
5560 39
    $str = (string)$str;
5561
5562 39
    if (!isset($str[0])) {
5563 9
      return '';
5564
    }
5565
5566
    // init
5567 37
    self::checkForSupport();
5568
5569 37
    if ($cleanUtf8 === true) {
5570
      // iconv and mbstring are not tolerant to invalid encoding
5571
      // further, their behaviour is inconsistent with that of PHP's substr
5572
5573 1
      $str = self::clean($str);
5574 1
    }
5575
5576 37
    if ($length === null) {
5577 22
      $length = (int)self::strlen($str);
5578 22
    } else {
5579 33
      $length = (int)$length;
5580
    }
5581
5582 37
    if (self::$support['mbstring'] === true) {
5583
5584
      // INFO: this is only a fallback for old versions
5585 37
      if ($encoding === true || $encoding === false) {
5586 1
        $encoding = 'UTF-8';
5587 1
      }
5588
5589 37
      return mb_substr($str, $start, $length, $encoding);
5590
    }
5591
5592
    if (self::$support['iconv'] === true) {
5593
      return (string)grapheme_substr($str, $start, $length);
5594
    }
5595
5596
    // fallback
5597
5598
    // split to array, and remove invalid characters
5599
    $array = self::split($str);
5600
5601
    // extract relevant part, and join to make sting again
5602
    return implode(array_slice($array, $start, $length));
5603
  }
5604
5605
  /**
5606
   * Binary safe comparison of two strings from an offset, up to length characters.
5607
   *
5608
   * @param string  $main_str           The main string being compared.
5609
   * @param string  $str                The secondary string being compared.
5610
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5611
   *                                    end of the string.
5612
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5613
   *                                    the str compared to the length of main_str less the offset.
5614
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5615
   *
5616
   * @return int
5617
   */
5618 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5619
  {
5620 1
    $main_str = self::substr($main_str, $offset, $length);
5621 1
    $str = self::substr($str, 0, self::strlen($main_str));
5622
5623 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5624
  }
5625
5626
  /**
5627
   * Count the number of sub-string occurrences.
5628
   *
5629
   * @param    string $haystack The string to search in.
5630
   * @param    string $needle   The string to search for.
5631
   * @param    int    $offset   The offset where to start counting.
5632
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
5633
   *
5634
   * @return   int number of occurrences of $needle
5635
   */
5636
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5637
  {
5638
    $offset = (int)$offset;
5639
5640
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5641
      $length = (int)$length;
5642
5643
      $haystack = self::substr($haystack, $offset, $length);
5644
    }
5645
5646
    if ($length === null) {
5647
      return substr_count($haystack, $needle, $offset);
5648
    } else {
5649
      return substr_count($haystack, $needle, $offset, $length);
5650
    }
5651
  }
5652
5653
  /**
5654
   * Replace text within a portion of a string.
5655
   *
5656
   * source: https://gist.github.com/stemar/8287074
5657
   *
5658
   * @param string|array $str
5659
   * @param string|array $replacement
5660
   * @param int          $start
5661
   * @param null|int     $length
5662
   *
5663
   * @return array|string
5664
   */
5665 6
  public static function substr_replace($str, $replacement, $start, $length = null)
5666
  {
5667
5668 6
    if (is_array($str)) {
5669 1
      $num = count($str);
5670
5671
      // $replacement
5672 1
      if (is_array($replacement)) {
5673 1
        $replacement = array_slice($replacement, 0, $num);
5674 1
      } else {
5675 1
        $replacement = array_pad(array($replacement), $num, $replacement);
5676
      }
5677
5678
      // $start
5679 1
      if (is_array($start)) {
5680 1
        $start = array_slice($start, 0, $num);
5681 1
        foreach ($start as &$valueTmp) {
5682 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5683 1
        }
5684 1
        unset($value);
5685 1
      } else {
5686 1
        $start = array_pad(array($start), $num, $start);
5687
      }
5688
5689
      // $length
5690 1
      if (!isset($length)) {
5691 1
        $length = array_fill(0, $num, 0);
5692 1
      } elseif (is_array($length)) {
5693 1
        $length = array_slice($length, 0, $num);
5694 1
        foreach ($length as &$valueTmpV2) {
5695 1
          if (isset($valueTmpV2)) {
5696 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5697 1
          } else {
5698
            $valueTmpV2 = 0;
5699
          }
5700 1
        }
5701 1
        unset($valueTmpV2);
5702 1
      } else {
5703 1
        $length = array_pad(array($length), $num, $length);
5704
      }
5705
5706
      // Recursive call
5707 1
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5708
    } else {
5709 6
      if (is_array($replacement)) {
5710 1
        if (count($replacement) > 0) {
5711 1
          $replacement = $replacement[0];
5712 1
        } else {
5713 1
          $replacement = '';
5714
        }
5715 1
      }
5716
    }
5717
5718 6
    preg_match_all('/./us', (string)$str, $smatches);
5719 6
    preg_match_all('/./us', (string)$replacement, $rmatches);
5720
5721 6
    if ($length === null) {
5722 4
      self::checkForSupport();
5723
5724 4
      $length = mb_strlen($str);
5725 4
    }
5726
5727 6
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5728
5729 6
    return join($smatches[0], null);
5730
  }
5731
5732
  /**
5733
   * Returns a case swapped version of the string.
5734
   *
5735
   * @param string $str
5736
   * @param string $encoding
5737
   *
5738
   * @return string each character's case swapped
5739
   */
5740 1
  public static function swapCase($str, $encoding = 'UTF-8')
5741
  {
5742 1
    $str = (string)$str;
5743
5744 1
    if (!isset($str[0])) {
5745 1
      return '';
5746
    }
5747
5748 1
    $str = self::clean($str);
5749
5750 1
    $strSwappedCase = preg_replace_callback(
5751 1
        '/[\S]/u',
5752
        function ($match) use ($encoding) {
5753 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5754
5755 1
          if ($match[0] == $marchToUpper) {
5756 1
            return UTF8::strtolower($match[0], $encoding);
5757
          } else {
5758 1
            return $marchToUpper;
5759
          }
5760 1
        },
5761
        $str
5762 1
    );
5763
5764 1
    return $strSwappedCase;
5765
  }
5766
5767
  /**
5768
   * alias for "UTF8::to_ascii()"
5769
   *
5770
   * @param string $s The input string e.g. a UTF-8 String
5771
   * @param string $subst_chr
5772
   *
5773
   * @return string
5774
   */
5775 6
  public static function toAscii($s, $subst_chr = '?')
5776
  {
5777 6
    return self::to_ascii($s, $subst_chr);
5778
  }
5779
5780
  /**
5781
   * alias for "UTF8::to_latin1()"
5782
   *
5783
   * @param $str
5784
   *
5785
   * @return string
5786
   */
5787
  public static function toLatin1($str)
5788
  {
5789
    return self::to_latin1($str);
5790
  }
5791
5792
  /**
5793
   * alias for "UTF8::to_utf8"
5794
   *
5795
   * @param string $str
5796
   *
5797
   * @return string
5798
   */
5799
  public static function toUTF8($str)
5800
  {
5801
    return self::to_utf8($str);
5802
  }
5803
5804
  /**
5805
   * convert to ASCII
5806
   *
5807
   * @param string $s The input string e.g. a UTF-8 String
5808
   * @param string $subst_chr
5809
   *
5810
   * @return string
5811
   */
5812 7
  public static function to_ascii($s, $subst_chr = '?')
5813
  {
5814 7
    static $translitExtra = null;
5815
5816 7
    $s = (string)$s;
5817
5818 7
    if (!isset($s[0])) {
5819 2
      return '';
5820
    }
5821
5822 6
    $s = self::clean($s);
5823
5824 6
    if (preg_match("/[\x80-\xFF]/", $s)) {
5825 3
      $s = Normalizer::normalize($s, Normalizer::NFKC);
5826
5827 3
      $glibc = 'glibc' === ICONV_IMPL;
5828
5829 3
      preg_match_all('/./u', $s, $s);
5830
5831
      /** @noinspection AlterInForeachInspection */
5832 3
      foreach ($s[0] as &$c) {
5833
5834 3
        if (!isset($c[1])) {
5835 3
          continue;
5836
        }
5837
5838 3
        if ($glibc) {
5839 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
5840 3
        } else {
5841
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
5842
5843
          if ($t !== false && is_string($t)) {
5844
            if (!isset($t[0])) {
5845
              $t = '?';
5846
            } elseif (isset($t[1])) {
5847
              $t = ltrim($t, '\'`"^~');
5848
            }
5849
          }
5850
        }
5851
5852 3
        if ('?' === $t) {
5853
5854 1
          if ($translitExtra === null) {
5855 1
            $translitExtra = (array)self::getData('translit_extra');
5856 1
          }
5857
5858 1
          if (isset($translitExtra[$c])) {
5859 1
            $t = $translitExtra[$c];
5860 1
          } else {
5861 1
            $t = Normalizer::normalize($c, Normalizer::NFD);
5862
5863 1
            if ($t[0] < "\x80") {
5864
              $t = $t[0];
5865
            } else {
5866 1
              $t = $subst_chr;
5867
            }
5868
          }
5869 1
        }
5870
5871 3
        if ('?' === $t) {
5872 1
          $t = self::str_transliterate($c, $subst_chr);
5873 1
        }
5874
5875 3
        $c = $t;
5876 3
      }
5877
5878 3
      $s = implode('', $s[0]);
5879 3
    }
5880
5881 6
    return $s;
5882
  }
5883
5884
  /**
5885
   * alias for "UTF8::to_win1252()"
5886
   *
5887
   * @param   string $str
5888
   *
5889
   * @return  array|string
5890
   */
5891
  public static function to_iso8859($str)
5892
  {
5893
    return self::to_win1252($str);
5894
  }
5895
5896
  /**
5897
   * alias for "UTF8::to_win1252()"
5898
   *
5899
   * @param string|array $str
5900
   *
5901
   * @return string|array
5902
   */
5903 2
  public static function to_latin1($str)
5904
  {
5905 2
    return self::to_win1252($str);
5906
  }
5907
5908
  /**
5909
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5910
   *
5911
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
5912
   *
5913
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5914
   *
5915
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5916
   *    are followed by any of these:  ("group B")
5917
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5918
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5919
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5920
   * is also a valid unicode character, and will be left unchanged.
5921
   *
5922
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5923
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5924
   *
5925
   * @param string $str Any string or array.
5926
   *
5927
   * @return string The same string, but UTF8 encoded.
5928
   */
5929 20
  public static function to_utf8($str)
5930
  {
5931 20
    if (is_array($str)) {
5932 2
      foreach ($str as $k => $v) {
5933
        /** @noinspection AlterInForeachInspection */
5934 2
        $str[$k] = self::to_utf8($v);
5935 2
      }
5936
5937 2
      return $str;
5938
    }
5939
5940 20
    $str = (string)$str;
5941
5942 20
    if (!isset($str[0])) {
5943 9
      return $str;
5944
    }
5945
5946 20
    $max = self::strlen($str, '8bit');
5947
5948 20
    $buf = '';
5949
    /** @noinspection ForeachInvariantsInspection */
5950 20
    for ($i = 0; $i < $max; $i++) {
5951 20
      $c1 = $str[$i];
5952
5953 20
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5954 20
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5955 20
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5956 20
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5957
5958 20
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5959
5960 18
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
5961 17
            $buf .= $c1 . $c2;
5962 17
            $i++;
5963 17
          } else { // not valid UTF8 - convert it
5964 5
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5965 5
            $cc2 = ($c1 & "\x3f") | "\x80";
5966 5
            $buf .= $cc1 . $cc2;
5967
          }
5968
5969 20 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5970
5971 18
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
5972 14
            $buf .= $c1 . $c2 . $c3;
5973 14
            $i += 2;
5974 14
          } else { // not valid UTF8 - convert it
5975 8
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5976 8
            $cc2 = ($c1 & "\x3f") | "\x80";
5977 8
            $buf .= $cc1 . $cc2;
5978
          }
5979
5980 19
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
5981
5982 9 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5983 3
            $buf .= $c1 . $c2 . $c3 . $c4;
5984 3
            $i += 3;
5985 3
          } else { // not valid UTF8 - convert it
5986 6
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5987 6
            $cc2 = ($c1 & "\x3f") | "\x80";
5988 6
            $buf .= $cc1 . $cc2;
5989
          }
5990
5991 9
        } else { // doesn't look like UTF8, but should be converted
5992 6
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
5993 6
          $cc2 = (($c1 & "\x3f") | "\x80");
5994 6
          $buf .= $cc1 . $cc2;
5995
        }
5996
5997 20
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
5998
5999 2
        $ordC1 = ord($c1);
6000 2
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6001
          $buf .= self::$win1252ToUtf8[$ordC1];
6002
        } else {
6003 2
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6004 2
          $cc2 = (($c1 & "\x3f") | "\x80");
6005 2
          $buf .= $cc1 . $cc2;
6006
        }
6007
6008 2
      } else { // it doesn't need conversion
6009 18
        $buf .= $c1;
6010
      }
6011 20
    }
6012
6013 20
    self::checkForSupport();
6014
6015
    // decode unicode escape sequences
6016 20
    $buf = preg_replace_callback(
6017 20
        '/\\\\u([0-9a-f]{4})/i',
6018
        function ($match) {
6019 3
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6020 20
        },
6021
        $buf
6022 20
    );
6023
6024
    // decode UTF-8 codepoints
6025 20
    $buf = preg_replace_callback(
6026 20
        '/&#\d{2,4};/',
6027 20
        function ($match) {
6028 2
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6029 20
        },
6030
        $buf
6031 20
    );
6032
6033 20
    return $buf;
6034
  }
6035
6036
  /**
6037
   * Convert a string into win1252.
6038
   *
6039
   * @param  string|array $str
6040
   *
6041
   * @return string|array
6042
   */
6043 2
  protected static function to_win1252($str)
6044
  {
6045 2
    if (is_array($str)) {
6046
6047 1
      foreach ($str as $k => $v) {
6048
        /** @noinspection AlterInForeachInspection */
6049 1
        $str[$k] = self::to_win1252($v);
6050 1
      }
6051
6052 1
      return $str;
6053 2
    } elseif (is_string($str)) {
6054 2
      return self::utf8_decode($str);
6055
    } else {
6056
      return $str;
6057
    }
6058
  }
6059
6060
  /**
6061
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6062
   *
6063
   * INFO: This is slower then "trim()"
6064
   *
6065
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
6066
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6067
   *
6068
   * @param    string $str   The string to be trimmed
6069
   * @param    string $chars Optional characters to be stripped
6070
   *
6071
   * @return   string The trimmed string
6072
   */
6073 26
  public static function trim($str = '', $chars = INF)
6074
  {
6075 26
    $str = (string)$str;
6076
6077 26
    if (!isset($str[0])) {
6078 5
      return '';
6079
    }
6080
6081
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6082 22
    if ($chars === INF || !$chars) {
6083 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6084
    }
6085
6086 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6087
  }
6088
6089
  /**
6090
   * Makes string's first char uppercase.
6091
   *
6092
   * @param    string $str The input string
6093
   *
6094
   * @return   string The resulting string
6095
   */
6096 14
  public static function ucfirst($str)
6097
  {
6098 14
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6099
  }
6100
6101
  /**
6102
   * alias for "UTF8::ucfirst"
6103
   *
6104
   * @param $str
6105
   *
6106
   * @return string
6107
   */
6108
  public static function ucword($str)
6109
  {
6110
    return self::ucfirst($str);
6111
  }
6112
6113
  /**
6114
   * Uppercase for all words in the string.
6115
   *
6116
   * @param  string $str
6117
   * @param array   $exceptions
6118
   *
6119
   * @return string
6120
   */
6121 8
  public static function ucwords($str, $exceptions = array())
6122
  {
6123 8
    if (!$str) {
6124 2
      return '';
6125
    }
6126
6127
    // init
6128 7
    $words = explode(' ', $str);
6129 7
    $newwords = array();
6130
6131 7
    if (count($exceptions) > 0) {
6132 1
      $useExceptions = true;
6133 1
    } else {
6134 7
      $useExceptions = false;
6135
    }
6136
6137 7
    foreach ($words as $word) {
6138
      if (
6139 7
          ($useExceptions === false)
6140
          ||
6141
          (
6142
              $useExceptions === true
6143 1
              &&
6144 1
              !in_array($word, $exceptions, true)
6145 1
          )
6146 7
      ) {
6147 7
        $word = self::ucfirst($word);
6148 7
      }
6149 7
      $newwords[] = $word;
6150 7
    }
6151
6152 7
    return self::ucfirst(implode(' ', $newwords));
6153
  }
6154
6155
  /**
6156
   * Multi decode html entity & fix urlencoded-win1252-chars.
6157
   *
6158
   * e.g:
6159
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6160
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6161
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6162
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6163
   * 'Düsseldorf'                   => 'Düsseldorf'
6164
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6165
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6166
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6167
   *
6168
   * @param string $str
6169
   *
6170
   * @return string
6171
   */
6172 1
  public static function urldecode($str)
6173
  {
6174 1
    $str = (string)$str;
6175
6176 1
    if (!isset($str[0])) {
6177 1
      return '';
6178
    }
6179
6180 1
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6181
6182 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6183
6184 1
    $str = self::fix_simple_utf8(
6185 1
        rawurldecode(
6186 1
            self::html_entity_decode(
6187 1
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6188
                $flags
6189 1
            )
6190 1
        )
6191 1
    );
6192
6193 1
    return (string)$str;
6194
  }
6195
6196
  /**
6197
   * Return a array with "urlencoded"-win1252 -> UTF-8
6198
   *
6199
   * @return mixed
6200
   */
6201
  protected static function urldecode_fix_win1252_chars()
6202
  {
6203
    static $array = array(
6204
        '%20' => ' ',
6205
        '%21' => '!',
6206
        '%22' => '"',
6207
        '%23' => '#',
6208
        '%24' => '$',
6209
        '%25' => '%',
6210
        '%26' => '&',
6211
        '%27' => "'",
6212
        '%28' => '(',
6213
        '%29' => ')',
6214
        '%2A' => '*',
6215
        '%2B' => '+',
6216
        '%2C' => ',',
6217
        '%2D' => '-',
6218
        '%2E' => '.',
6219
        '%2F' => '/',
6220
        '%30' => '0',
6221
        '%31' => '1',
6222
        '%32' => '2',
6223
        '%33' => '3',
6224
        '%34' => '4',
6225
        '%35' => '5',
6226
        '%36' => '6',
6227
        '%37' => '7',
6228
        '%38' => '8',
6229
        '%39' => '9',
6230
        '%3A' => ':',
6231
        '%3B' => ';',
6232
        '%3C' => '<',
6233
        '%3D' => '=',
6234
        '%3E' => '>',
6235
        '%3F' => '?',
6236
        '%40' => '@',
6237
        '%41' => 'A',
6238
        '%42' => 'B',
6239
        '%43' => 'C',
6240
        '%44' => 'D',
6241
        '%45' => 'E',
6242
        '%46' => 'F',
6243
        '%47' => 'G',
6244
        '%48' => 'H',
6245
        '%49' => 'I',
6246
        '%4A' => 'J',
6247
        '%4B' => 'K',
6248
        '%4C' => 'L',
6249
        '%4D' => 'M',
6250
        '%4E' => 'N',
6251
        '%4F' => 'O',
6252
        '%50' => 'P',
6253
        '%51' => 'Q',
6254
        '%52' => 'R',
6255
        '%53' => 'S',
6256
        '%54' => 'T',
6257
        '%55' => 'U',
6258
        '%56' => 'V',
6259
        '%57' => 'W',
6260
        '%58' => 'X',
6261
        '%59' => 'Y',
6262
        '%5A' => 'Z',
6263
        '%5B' => '[',
6264
        '%5C' => '\\',
6265
        '%5D' => ']',
6266
        '%5E' => '^',
6267
        '%5F' => '_',
6268
        '%60' => '`',
6269
        '%61' => 'a',
6270
        '%62' => 'b',
6271
        '%63' => 'c',
6272
        '%64' => 'd',
6273
        '%65' => 'e',
6274
        '%66' => 'f',
6275
        '%67' => 'g',
6276
        '%68' => 'h',
6277
        '%69' => 'i',
6278
        '%6A' => 'j',
6279
        '%6B' => 'k',
6280
        '%6C' => 'l',
6281
        '%6D' => 'm',
6282
        '%6E' => 'n',
6283
        '%6F' => 'o',
6284
        '%70' => 'p',
6285
        '%71' => 'q',
6286
        '%72' => 'r',
6287
        '%73' => 's',
6288
        '%74' => 't',
6289
        '%75' => 'u',
6290
        '%76' => 'v',
6291
        '%77' => 'w',
6292
        '%78' => 'x',
6293
        '%79' => 'y',
6294
        '%7A' => 'z',
6295
        '%7B' => '{',
6296
        '%7C' => '|',
6297
        '%7D' => '}',
6298
        '%7E' => '~',
6299
        '%7F' => '',
6300
        '%80' => '`',
6301
        '%81' => '',
6302
        '%82' => '‚',
6303
        '%83' => 'ƒ',
6304
        '%84' => '„',
6305
        '%85' => '…',
6306
        '%86' => '†',
6307
        '%87' => '‡',
6308
        '%88' => 'ˆ',
6309
        '%89' => '‰',
6310
        '%8A' => 'Š',
6311
        '%8B' => '‹',
6312
        '%8C' => 'Œ',
6313
        '%8D' => '',
6314
        '%8E' => 'Ž',
6315
        '%8F' => '',
6316
        '%90' => '',
6317
        '%91' => '‘',
6318
        '%92' => '’',
6319
        '%93' => '“',
6320
        '%94' => '”',
6321
        '%95' => '•',
6322
        '%96' => '–',
6323
        '%97' => '—',
6324
        '%98' => '˜',
6325
        '%99' => '™',
6326
        '%9A' => 'š',
6327
        '%9B' => '›',
6328
        '%9C' => 'œ',
6329
        '%9D' => '',
6330
        '%9E' => 'ž',
6331
        '%9F' => 'Ÿ',
6332
        '%A0' => '',
6333
        '%A1' => '¡',
6334
        '%A2' => '¢',
6335
        '%A3' => '£',
6336
        '%A4' => '¤',
6337
        '%A5' => '¥',
6338
        '%A6' => '¦',
6339
        '%A7' => '§',
6340
        '%A8' => '¨',
6341
        '%A9' => '©',
6342
        '%AA' => 'ª',
6343
        '%AB' => '«',
6344
        '%AC' => '¬',
6345
        '%AD' => '',
6346
        '%AE' => '®',
6347
        '%AF' => '¯',
6348
        '%B0' => '°',
6349
        '%B1' => '±',
6350
        '%B2' => '²',
6351
        '%B3' => '³',
6352
        '%B4' => '´',
6353
        '%B5' => 'µ',
6354
        '%B6' => '¶',
6355
        '%B7' => '·',
6356
        '%B8' => '¸',
6357
        '%B9' => '¹',
6358
        '%BA' => 'º',
6359
        '%BB' => '»',
6360
        '%BC' => '¼',
6361
        '%BD' => '½',
6362
        '%BE' => '¾',
6363
        '%BF' => '¿',
6364
        '%C0' => 'À',
6365
        '%C1' => 'Á',
6366
        '%C2' => 'Â',
6367
        '%C3' => 'Ã',
6368
        '%C4' => 'Ä',
6369
        '%C5' => 'Å',
6370
        '%C6' => 'Æ',
6371
        '%C7' => 'Ç',
6372
        '%C8' => 'È',
6373
        '%C9' => 'É',
6374
        '%CA' => 'Ê',
6375
        '%CB' => 'Ë',
6376
        '%CC' => 'Ì',
6377
        '%CD' => 'Í',
6378
        '%CE' => 'Î',
6379
        '%CF' => 'Ï',
6380
        '%D0' => 'Ð',
6381
        '%D1' => 'Ñ',
6382
        '%D2' => 'Ò',
6383
        '%D3' => 'Ó',
6384
        '%D4' => 'Ô',
6385
        '%D5' => 'Õ',
6386
        '%D6' => 'Ö',
6387
        '%D7' => '×',
6388
        '%D8' => 'Ø',
6389
        '%D9' => 'Ù',
6390
        '%DA' => 'Ú',
6391
        '%DB' => 'Û',
6392
        '%DC' => 'Ü',
6393
        '%DD' => 'Ý',
6394
        '%DE' => 'Þ',
6395
        '%DF' => 'ß',
6396
        '%E0' => 'à',
6397
        '%E1' => 'á',
6398
        '%E2' => 'â',
6399
        '%E3' => 'ã',
6400
        '%E4' => 'ä',
6401
        '%E5' => 'å',
6402
        '%E6' => 'æ',
6403
        '%E7' => 'ç',
6404
        '%E8' => 'è',
6405
        '%E9' => 'é',
6406
        '%EA' => 'ê',
6407
        '%EB' => 'ë',
6408
        '%EC' => 'ì',
6409
        '%ED' => 'í',
6410
        '%EE' => 'î',
6411
        '%EF' => 'ï',
6412
        '%F0' => 'ð',
6413
        '%F1' => 'ñ',
6414
        '%F2' => 'ò',
6415
        '%F3' => 'ó',
6416
        '%F4' => 'ô',
6417
        '%F5' => 'õ',
6418
        '%F6' => 'ö',
6419
        '%F7' => '÷',
6420
        '%F8' => 'ø',
6421
        '%F9' => 'ù',
6422
        '%FA' => 'ú',
6423
        '%FB' => 'û',
6424
        '%FC' => 'ü',
6425
        '%FD' => 'ý',
6426
        '%FE' => 'þ',
6427
        '%FF' => 'ÿ',
6428
    );
6429
6430
    return $array;
6431
  }
6432
6433
  /**
6434
   * Decodes an UTF-8 string to ISO-8859-1.
6435
   *
6436
   * @param string $str
6437
   *
6438
   * @return string
6439
   */
6440 6
  public static function utf8_decode($str)
6441
  {
6442 6
    static $utf8ToWin1252Keys = null;
6443 6
    static $utf8ToWin1252Values = null;
6444
6445 6
    $str = (string)$str;
6446
6447 6
    if (!isset($str[0])) {
6448 5
      return '';
6449
    }
6450
6451
    // init
6452 6
    self::checkForSupport();
6453
6454 6
    $str = self::to_utf8($str);
6455
6456 6
    if ($utf8ToWin1252Keys === null) {
6457 1
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6458 1
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6459 1
    }
6460
6461 6
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6462
  }
6463
6464
  /**
6465
   * Encodes an ISO-8859-1 string to UTF-8.
6466
   *
6467
   * @param string $str
6468
   *
6469
   * @return string
6470
   */
6471 6
  public static function utf8_encode($str)
6472
  {
6473 6
    $str = utf8_encode($str);
6474
6475 6
    if (false === strpos($str, "\xC2")) {
6476 6
      return $str;
6477
    } else {
6478
6479 5
      static $cp1252ToUtf8Keys = null;
6480 5
      static $cp1252ToUtf8Values = null;
6481
6482 5
      if ($cp1252ToUtf8Keys === null) {
6483 1
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6484 1
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6485 1
      }
6486
6487 5
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6488
    }
6489
  }
6490
6491
  /**
6492
   * fix -> utf8-win1252 chars
6493
   *
6494
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
6495
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6496
   * See: http://en.wikipedia.org/wiki/Windows-1252
6497
   *
6498
   * @deprecated use "UTF8::fix_simple_utf8()"
6499
   *
6500
   * @param   string $str
6501
   *
6502
   * @return  string
6503
   */
6504
  public static function utf8_fix_win1252_chars($str)
6505
  {
6506
    return self::fix_simple_utf8($str);
6507
  }
6508
6509
  /**
6510
   * Returns an array with all utf8 whitespace characters.
6511
   *
6512
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6513
   *
6514
   * @author: Derek E. [email protected]
6515
   *
6516
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6517
   *         as defined in above URL
6518
   */
6519 1
  public static function whitespace_table()
6520
  {
6521 1
    return self::$whitespaceTable;
6522
  }
6523
6524
  /**
6525
   * Limit the number of words in a string.
6526
   *
6527
   * @param  string $str
6528
   * @param  int    $words
6529
   * @param  string $strAddOn
6530
   *
6531
   * @return string
6532
   */
6533 1
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6534
  {
6535 1
    if (!isset($str[0])) {
6536
      return '';
6537
    }
6538
6539 1
    $words = (int)$words;
6540
6541 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6542
6543
    if (
6544 1
        !isset($matches[0])
6545 1
        ||
6546 1
        self::strlen($str) === self::strlen($matches[0])
6547 1
    ) {
6548 1
      return $str;
6549
    }
6550
6551 1
    return self::rtrim($matches[0]) . $strAddOn;
6552
  }
6553
6554
  /**
6555
   * Wraps a string to a given number of characters.
6556
   *
6557
   * @param string $str
6558
   * @param int    $width
6559
   * @param string $break
6560
   * @param bool   $cut
6561
   *
6562
   * @return false|string Returns the given string wrapped at the specified length.
6563
   */
6564 4
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6565
  {
6566 4
    if (false === wordwrap('-', $width, $break, $cut)) {
6567
      return false;
6568
    }
6569
6570 4
    if (is_string($break)) {
6571 4
      $break = (string)$break;
6572 4
    }
6573
6574 4
    $w = '';
6575 4
    $str = explode($break, $str);
6576 4
    $iLen = count($str);
6577 4
    $chars = array();
6578
6579 4
    if (1 === $iLen && '' === $str[0]) {
6580
      return '';
6581
    }
6582
6583
    /** @noinspection ForeachInvariantsInspection */
6584 4
    for ($i = 0; $i < $iLen; ++$i) {
6585
6586 4
      if ($i) {
6587
        $chars[] = $break;
6588
        $w .= '#';
6589
      }
6590
6591 4
      $c = $str[$i];
6592 4
      unset($str[$i]);
6593
6594 4
      foreach (self::split($c) as $c) {
6595 4
        $chars[] = $c;
6596 4
        $w .= ' ' === $c ? ' ' : '?';
6597 4
      }
6598 4
    }
6599
6600 4
    $str = '';
6601 4
    $j = 0;
6602 4
    $b = $i = -1;
6603 4
    $w = wordwrap($w, $width, '#', $cut);
6604
6605 4
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6606 3
      for (++$i; $i < $b; ++$i) {
6607 3
        $str .= $chars[$j];
6608 3
        unset($chars[$j++]);
6609 3
      }
6610
6611 3
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6612
        unset($chars[$j++]);
6613
      }
6614
6615 3
      $str .= $break;
6616 3
    }
6617
6618 4
    return $str . implode('', $chars);
6619
  }
6620
6621
  /**
6622
   * Returns an array of Unicode White Space characters.
6623
   *
6624
   * @return   array An array with numeric code point as key and White Space Character as value.
6625
   */
6626
  public static function ws()
6627
  {
6628
    return self::$whitespace;
6629
  }
6630
6631
}
6632