Completed
Push — master ( ae956f...05812d )
by Lars
15:36 queued 12:04
created

UTF8::json_encode()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2.0116

Importance

Changes 5
Bugs 2 Features 1
Metric Value
c 5
b 2
f 1
dl 0
loc 12
ccs 6
cts 7
cp 0.8571
rs 9.4285
cc 2
eloc 7
nc 2
nop 3
crap 2.0116
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Intl\Normalizer\Normalizer;
7
use Symfony\Polyfill\Xml\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
86
   *
87
   * @var array
88
   */
89
  protected static $whitespace = array(
90
      // NUL Byte
91
      0     => "\x0",
92
      // Tab
93
      9     => "\x9",
94
      // New Line
95
      10    => "\xa",
96
      // Vertical Tab
97
      11    => "\xb",
98
      // Carriage Return
99
      13    => "\xd",
100
      // Ordinary Space
101
      32    => "\x20",
102
      // NO-BREAK SPACE
103
      160   => "\xc2\xa0",
104
      // OGHAM SPACE MARK
105
      5760  => "\xe1\x9a\x80",
106
      // MONGOLIAN VOWEL SEPARATOR
107
      6158  => "\xe1\xa0\x8e",
108
      // EN QUAD
109
      8192  => "\xe2\x80\x80",
110
      // EM QUAD
111
      8193  => "\xe2\x80\x81",
112
      // EN SPACE
113
      8194  => "\xe2\x80\x82",
114
      // EM SPACE
115
      8195  => "\xe2\x80\x83",
116
      // THREE-PER-EM SPACE
117
      8196  => "\xe2\x80\x84",
118
      // FOUR-PER-EM SPACE
119
      8197  => "\xe2\x80\x85",
120
      // SIX-PER-EM SPACE
121
      8198  => "\xe2\x80\x86",
122
      // FIGURE SPACE
123
      8199  => "\xe2\x80\x87",
124
      // PUNCTUATION SPACE
125
      8200  => "\xe2\x80\x88",
126
      // THIN SPACE
127
      8201  => "\xe2\x80\x89",
128
      //HAIR SPACE
129
      8202  => "\xe2\x80\x8a",
130
      // LINE SEPARATOR
131
      8232  => "\xe2\x80\xa8",
132
      // PARAGRAPH SEPARATOR
133
      8233  => "\xe2\x80\xa9",
134
      // NARROW NO-BREAK SPACE
135
      8239  => "\xe2\x80\xaf",
136
      // MEDIUM MATHEMATICAL SPACE
137
      8287  => "\xe2\x81\x9f",
138
      // IDEOGRAPHIC SPACE
139
      12288 => "\xe3\x80\x80",
140
  );
141
142
  /**
143
   * @var array
144
   */
145
  protected static $whitespaceTable = array(
146
      'SPACE'                     => "\x20",
147
      'NO-BREAK SPACE'            => "\xc2\xa0",
148
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
149
      'EN QUAD'                   => "\xe2\x80\x80",
150
      'EM QUAD'                   => "\xe2\x80\x81",
151
      'EN SPACE'                  => "\xe2\x80\x82",
152
      'EM SPACE'                  => "\xe2\x80\x83",
153
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
154
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
155
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
156
      'FIGURE SPACE'              => "\xe2\x80\x87",
157
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
158
      'THIN SPACE'                => "\xe2\x80\x89",
159
      'HAIR SPACE'                => "\xe2\x80\x8a",
160
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
161
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
162
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
163
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
164
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
165
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
166
  );
167
168
  /**
169
   * bidirectional text chars
170
   *
171
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
172
   *
173
   * @var array
174
   */
175
  protected static $bidiUniCodeControlsTable = array(
176
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
177
    8234 => "\xE2\x80\xAA",
178
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
179
    8235 => "\xE2\x80\xAB",
180
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
181
    8236 => "\xE2\x80\xAC",
182
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
183
    8237 => "\xE2\x80\xAD",
184
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
185
    8238 => "\xE2\x80\xAE",
186
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
187
    8294 => "\xE2\x81\xA6",
188
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
189
    8295 => "\xE2\x81\xA7",
190
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
191
    8296 => "\xE2\x81\xA8",
192
    // POP DIRECTIONAL ISOLATE
193
    8297 => "\xE2\x81\xA9",
194
  );
195
196
  /**
197
   * @var array
198
   */
199
  protected static $commonCaseFold = array(
200
      'ſ'            => 's',
201
      "\xCD\x85"     => 'ι',
202
      'ς'            => 'σ',
203
      "\xCF\x90"     => 'β',
204
      "\xCF\x91"     => 'θ',
205
      "\xCF\x95"     => 'φ',
206
      "\xCF\x96"     => 'π',
207
      "\xCF\xB0"     => 'κ',
208
      "\xCF\xB1"     => 'ρ',
209
      "\xCF\xB5"     => 'ε',
210
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
211
      "\xE1\xBE\xBE" => 'ι',
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  protected static $brokenUtf8ToUtf8 = array(
218
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
219
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
220
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
221
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
222
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
223
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
224
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
225
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
226
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
227
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
228
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
229
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
230
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
231
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
232
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
233
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
234
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
235
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
236
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
237
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
238
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
239
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
240
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
241
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
242
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
243
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
244
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
245
      'ü'       => 'ü',
246
      'ä'       => 'ä',
247
      'ö'       => 'ö',
248
      'Ö'       => 'Ö',
249
      'ß'       => 'ß',
250
      'Ã '       => 'à',
251
      'á'       => 'á',
252
      'â'       => 'â',
253
      'ã'       => 'ã',
254
      'ù'       => 'ù',
255
      'ú'       => 'ú',
256
      'û'       => 'û',
257
      'Ù'       => 'Ù',
258
      'Ú'       => 'Ú',
259
      'Û'       => 'Û',
260
      'Ü'       => 'Ü',
261
      'ò'       => 'ò',
262
      'ó'       => 'ó',
263
      'ô'       => 'ô',
264
      'è'       => 'è',
265
      'é'       => 'é',
266
      'ê'       => 'ê',
267
      'ë'       => 'ë',
268
      'À'       => 'À',
269
      'Á'       => 'Á',
270
      'Â'       => 'Â',
271
      'Ã'       => 'Ã',
272
      'Ä'       => 'Ä',
273
      'Ã…'       => 'Å',
274
      'Ç'       => 'Ç',
275
      'È'       => 'È',
276
      'É'       => 'É',
277
      'Ê'       => 'Ê',
278
      'Ë'       => 'Ë',
279
      'ÃŒ'       => 'Ì',
280
      'Í'       => 'Í',
281
      'ÃŽ'       => 'Î',
282
      'Ï'       => 'Ï',
283
      'Ñ'       => 'Ñ',
284
      'Ã’'       => 'Ò',
285
      'Ó'       => 'Ó',
286
      'Ô'       => 'Ô',
287
      'Õ'       => 'Õ',
288
      'Ø'       => 'Ø',
289
      'Ã¥'       => 'å',
290
      'æ'       => 'æ',
291
      'ç'       => 'ç',
292
      'ì'       => 'ì',
293
      'í'       => 'í',
294
      'î'       => 'î',
295
      'ï'       => 'ï',
296
      'ð'       => 'ð',
297
      'ñ'       => 'ñ',
298
      'õ'       => 'õ',
299
      'ø'       => 'ø',
300
      'ý'       => 'ý',
301
      'ÿ'       => 'ÿ',
302
      '€'      => '€',
303
  );
304
305
  /**
306
   * @var array
307
   */
308
  protected static $utf8ToWin1252 = array(
309
      "\xe2\x82\xac" => "\x80", // EURO SIGN
310
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
311
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
312
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
313
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
314
      "\xe2\x80\xa0" => "\x86", // DAGGER
315
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
316
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
317
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
318
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
319
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
320
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
321
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
322
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
323
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
324
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
325
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
326
      "\xe2\x80\xa2" => "\x95", // BULLET
327
      "\xe2\x80\x93" => "\x96", // EN DASH
328
      "\xe2\x80\x94" => "\x97", // EM DASH
329
      "\xcb\x9c"     => "\x98", // SMALL TILDE
330
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
331
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
332
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
333
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
334
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
335
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
336
  );
337
338
  /**
339
   * @var array
340
   */
341
  protected static $utf8MSWord = array(
342
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
343
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
344
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
345
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
346
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
347
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
348
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
349
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
350
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
351
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
352
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
353
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
354
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
355
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
356
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
357
  );
358
359
  /**
360
   * @var array
361
   */
362
  private static $support = array();
363
364
  /**
365
   * __construct()
366
   */
367 1
  public function __construct()
368
  {
369 1
    self::checkForSupport();
370 1
  }
371
372
  /**
373
   * Returns a single UTF-8 character from string.
374
   *
375
   * @param    string $str A UTF-8 string.
376
   * @param    int    $pos The position of character to return.
377
   *
378
   * @return   string Single Multi-Byte character.
379
   */
380 1
  public static function access($str, $pos)
381
  {
382
    // Return the character at the specified position: $str[1] like functionality.
383
384 1
    return self::substr($str, $pos, 1);
385
  }
386
387
  /**
388
   * Prepends BOM character to the string and returns the whole string.
389
   *
390
   * INFO: If BOM already existed there, the Input string is returned.
391
   *
392
   * @param    string $str The input string
393
   *
394
   * @return   string The output string that contains BOM
395
   */
396
  public static function add_bom_to_string($str)
397
  {
398
    if (!self::is_bom(substr($str, 0, 3))) {
399
      $str = self::bom() . $str;
400
    }
401
402
    return $str;
403
  }
404
405
  /**
406
   * Returns the Byte Order Mark Character.
407
   *
408
   * @return   string Byte Order Mark
409
   */
410 2
  public static function bom()
411
  {
412 2
    return "\xEF\xBB\xBF";
413
  }
414
415
  /**
416
   * @alias of UTF8::chr_map()
417
   *
418
   * @param $callback
419
   * @param $str
420
   *
421
   * @return array
422
   */
423 1
  public static function callback($callback, $str)
424
  {
425 1
    return self::chr_map($callback, $str);
426
  }
427
428
  /**
429
   * Returns an array of all lower and upper case UTF-8 encoded characters.
430
   *
431
   * @return   string An array with lower case chars as keys and upper chars as values.
432
   */
433
  protected static function case_table()
434
  {
435
    static $case = array(
436
437
      // lower => upper
438
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
439
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
440
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
441
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
442
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
443
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
444
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
445
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
446
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
447
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
448
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
449
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
450
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
451
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
452
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
453
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
454
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
455
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
456
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
457
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
458
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
459
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
460
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
461
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
462
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
463
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
464
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
465
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
466
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
467
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
468
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
469
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
470
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
471
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
472
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
473
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
474
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
475
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
476
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
477
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
478
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
479
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
480
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
481
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
482
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
483
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
484
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
485
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
486
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
487
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
488
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
489
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
490
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
491
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
492
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
493
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
494
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
495
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
496
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
497
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
498
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
499
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
500
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
501
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
502
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
503
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
504
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
505
      "\xea\x9e\x87"     => "\xea\x9e\x86",
506
      "\xea\x9e\x85"     => "\xea\x9e\x84",
507
      "\xea\x9e\x83"     => "\xea\x9e\x82",
508
      "\xea\x9e\x81"     => "\xea\x9e\x80",
509
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
510
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
511
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
512
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
513
      "\xea\x9d\xad"     => "\xea\x9d\xac",
514
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
515
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
516
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
517
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
518
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
519
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
520
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
521
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
522
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
523
      "\xea\x9d\x99"     => "\xea\x9d\x98",
524
      "\xea\x9d\x97"     => "\xea\x9d\x96",
525
      "\xea\x9d\x95"     => "\xea\x9d\x94",
526
      "\xea\x9d\x93"     => "\xea\x9d\x92",
527
      "\xea\x9d\x91"     => "\xea\x9d\x90",
528
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
529
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
530
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
531
      "\xea\x9d\x89"     => "\xea\x9d\x88",
532
      "\xea\x9d\x87"     => "\xea\x9d\x86",
533
      "\xea\x9d\x85"     => "\xea\x9d\x84",
534
      "\xea\x9d\x83"     => "\xea\x9d\x82",
535
      "\xea\x9d\x81"     => "\xea\x9d\x80",
536
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
537
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
538
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
539
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
540
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
541
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
542
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
543
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
544
      "\xea\x9c\xad"     => "\xea\x9c\xac",
545
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
546
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
547
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
548
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
549
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
550
      "\xea\x9a\x97"     => "\xea\x9a\x96",
551
      "\xea\x9a\x95"     => "\xea\x9a\x94",
552
      "\xea\x9a\x93"     => "\xea\x9a\x92",
553
      "\xea\x9a\x91"     => "\xea\x9a\x90",
554
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
555
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
556
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
557
      "\xea\x9a\x89"     => "\xea\x9a\x88",
558
      "\xea\x9a\x87"     => "\xea\x9a\x86",
559
      "\xea\x9a\x85"     => "\xea\x9a\x84",
560
      "\xea\x9a\x83"     => "\xea\x9a\x82",
561
      "\xea\x9a\x81"     => "\xea\x9a\x80",
562
      "\xea\x99\xad"     => "\xea\x99\xac",
563
      "\xea\x99\xab"     => "\xea\x99\xaa",
564
      "\xea\x99\xa9"     => "\xea\x99\xa8",
565
      "\xea\x99\xa7"     => "\xea\x99\xa6",
566
      "\xea\x99\xa5"     => "\xea\x99\xa4",
567
      "\xea\x99\xa3"     => "\xea\x99\xa2",
568
      "\xea\x99\x9f"     => "\xea\x99\x9e",
569
      "\xea\x99\x9d"     => "\xea\x99\x9c",
570
      "\xea\x99\x9b"     => "\xea\x99\x9a",
571
      "\xea\x99\x99"     => "\xea\x99\x98",
572
      "\xea\x99\x97"     => "\xea\x99\x96",
573
      "\xea\x99\x95"     => "\xea\x99\x94",
574
      "\xea\x99\x93"     => "\xea\x99\x92",
575
      "\xea\x99\x91"     => "\xea\x99\x90",
576
      "\xea\x99\x8f"     => "\xea\x99\x8e",
577
      "\xea\x99\x8d"     => "\xea\x99\x8c",
578
      "\xea\x99\x8b"     => "\xea\x99\x8a",
579
      "\xea\x99\x89"     => "\xea\x99\x88",
580
      "\xea\x99\x87"     => "\xea\x99\x86",
581
      "\xea\x99\x85"     => "\xea\x99\x84",
582
      "\xea\x99\x83"     => "\xea\x99\x82",
583
      "\xea\x99\x81"     => "\xea\x99\x80",
584
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
585
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
586
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
587
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
588
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
589
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
590
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
591
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
592
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
593
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
594
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
595
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
596
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
597
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
598
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
599
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
600
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
601
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
602
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
603
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
604
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
605
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
606
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
607
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
608
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
609
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
610
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
611
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
612
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
613
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
614
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
615
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
616
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
617
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
618
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
619
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
620
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
621
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
622
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
623
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
624
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
625
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
626
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
627
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
628
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
629
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
630
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
631
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
632
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
633
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
634
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
635
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
636
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
637
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
638
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
639
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
640
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
641
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
642
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
643
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
644
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
645
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
646
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
647
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
648
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
649
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
650
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
651
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
652
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
653
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
654
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
655
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
656
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
657
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
658
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
659
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
660
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
661
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
662
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
663
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
664
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
665
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
666
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
667
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
668
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
669
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
670
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
671
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
672
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
673
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
674
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
675
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
676
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
677
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
678
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
679
      "\xe2\xb1\xa6"     => "\xc8\xbe",
680
      "\xe2\xb1\xa5"     => "\xc8\xba",
681
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
682
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
683
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
684
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
685
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
686
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
687
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
688
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
689
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
690
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
691
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
692
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
693
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
694
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
695
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
696
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
697
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
698
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
699
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
700
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
701
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
702
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
703
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
704
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
705
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
706
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
707
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
708
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
709
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
710
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
711
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
712
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
713
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
714
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
715
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
716
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
717
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
718
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
719
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
720
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
721
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
722
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
723
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
724
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
725
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
726
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
727
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
728
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
729
      "\xe2\x86\x84"     => "\xe2\x86\x83",
730
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
731
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
732
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
733
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
734
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
735
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
736
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
737
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
738
      "\xe1\xbe\xbe"     => "\xce\x99",
739
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
740
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
741
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
742
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
743
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
744
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
745
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
746
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
747
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
748
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
749
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
750
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
751
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
752
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
753
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
754
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
755
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
756
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
757
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
758
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
759
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
760
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
761
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
762
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
763
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
764
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
765
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
766
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
767
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
768
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
769
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
770
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
771
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
772
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
773
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
774
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
775
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
776
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
777
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
778
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
779
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
780
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
781
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
782
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
783
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
784
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
785
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
786
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
787
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
788
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
789
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
790
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
791
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
792
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
793
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
794
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
795
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
796
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
797
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
798
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
799
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
800
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
801
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
802
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
803
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
804
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
805
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
806
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
807
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
808
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
809
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
810
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
811
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
812
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
813
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
814
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
815
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
816
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
817
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
818
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
819
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
820
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
821
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
822
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
823
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
824
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
825
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
826
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
827
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
828
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
829
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
830
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
831
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
832
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
833
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
834
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
835
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
836
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
837
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
838
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
839
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
840
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
841
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
842
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
843
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
844
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
845
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
846
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
847
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
848
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
849
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
850
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
851
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
852
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
853
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
854
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
855
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
856
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
857
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
858
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
859
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
860
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
861
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
862
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
863
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
864
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
865
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
866
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
867
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
868
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
869
      "\xe1\xba\xad"     => "\xe1\xba\xac",
870
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
871
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
872
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
873
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
874
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
875
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
876
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
877
      "\xe1\xba\x95"     => "\xe1\xba\x94",
878
      "\xe1\xba\x93"     => "\xe1\xba\x92",
879
      "\xe1\xba\x91"     => "\xe1\xba\x90",
880
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
881
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
882
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
883
      "\xe1\xba\x89"     => "\xe1\xba\x88",
884
      "\xe1\xba\x87"     => "\xe1\xba\x86",
885
      "\xe1\xba\x85"     => "\xe1\xba\x84",
886
      "\xe1\xba\x83"     => "\xe1\xba\x82",
887
      "\xe1\xba\x81"     => "\xe1\xba\x80",
888
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
889
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
890
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
891
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
892
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
893
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
894
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
895
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
896
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
897
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
898
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
899
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
900
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
901
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
902
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
903
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
904
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
905
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
906
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
907
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
908
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
909
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
910
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
911
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
912
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
913
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
914
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
915
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
916
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
917
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
918
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
919
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
920
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
921
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
922
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
923
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
924
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
925
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
926
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
927
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
928
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
929
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
930
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
931
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
932
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
933
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
934
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
935
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
936
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
937
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
938
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
939
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
940
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
941
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
942
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
943
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
944
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
945
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
946
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
947
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
948
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
949
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
950
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
951
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
952
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
953
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
954
      "\xd6\x86"         => "\xd5\x96",
955
      "\xd6\x85"         => "\xd5\x95",
956
      "\xd6\x84"         => "\xd5\x94",
957
      "\xd6\x83"         => "\xd5\x93",
958
      "\xd6\x82"         => "\xd5\x92",
959
      "\xd6\x81"         => "\xd5\x91",
960
      "\xd6\x80"         => "\xd5\x90",
961
      "\xd5\xbf"         => "\xd5\x8f",
962
      "\xd5\xbe"         => "\xd5\x8e",
963
      "\xd5\xbd"         => "\xd5\x8d",
964
      "\xd5\xbc"         => "\xd5\x8c",
965
      "\xd5\xbb"         => "\xd5\x8b",
966
      "\xd5\xba"         => "\xd5\x8a",
967
      "\xd5\xb9"         => "\xd5\x89",
968
      "\xd5\xb8"         => "\xd5\x88",
969
      "\xd5\xb7"         => "\xd5\x87",
970
      "\xd5\xb6"         => "\xd5\x86",
971
      "\xd5\xb5"         => "\xd5\x85",
972
      "\xd5\xb4"         => "\xd5\x84",
973
      "\xd5\xb3"         => "\xd5\x83",
974
      "\xd5\xb2"         => "\xd5\x82",
975
      "\xd5\xb1"         => "\xd5\x81",
976
      "\xd5\xb0"         => "\xd5\x80",
977
      "\xd5\xaf"         => "\xd4\xbf",
978
      "\xd5\xae"         => "\xd4\xbe",
979
      "\xd5\xad"         => "\xd4\xbd",
980
      "\xd5\xac"         => "\xd4\xbc",
981
      "\xd5\xab"         => "\xd4\xbb",
982
      "\xd5\xaa"         => "\xd4\xba",
983
      "\xd5\xa9"         => "\xd4\xb9",
984
      "\xd5\xa8"         => "\xd4\xb8",
985
      "\xd5\xa7"         => "\xd4\xb7",
986
      "\xd5\xa6"         => "\xd4\xb6",
987
      "\xd5\xa5"         => "\xd4\xb5",
988
      "\xd5\xa4"         => "\xd4\xb4",
989
      "\xd5\xa3"         => "\xd4\xb3",
990
      "\xd5\xa2"         => "\xd4\xb2",
991
      "\xd5\xa1"         => "\xd4\xb1",
992
      "\xd4\xa5"         => "\xd4\xa4",
993
      "\xd4\xa3"         => "\xd4\xa2",
994
      "\xd4\xa1"         => "\xd4\xa0",
995
      "\xd4\x9f"         => "\xd4\x9e",
996
      "\xd4\x9d"         => "\xd4\x9c",
997
      "\xd4\x9b"         => "\xd4\x9a",
998
      "\xd4\x99"         => "\xd4\x98",
999
      "\xd4\x97"         => "\xd4\x96",
1000
      "\xd4\x95"         => "\xd4\x94",
1001
      "\xd4\x93"         => "\xd4\x92",
1002
      "\xd4\x91"         => "\xd4\x90",
1003
      "\xd4\x8f"         => "\xd4\x8e",
1004
      "\xd4\x8d"         => "\xd4\x8c",
1005
      "\xd4\x8b"         => "\xd4\x8a",
1006
      "\xd4\x89"         => "\xd4\x88",
1007
      "\xd4\x87"         => "\xd4\x86",
1008
      "\xd4\x85"         => "\xd4\x84",
1009
      "\xd4\x83"         => "\xd4\x82",
1010
      "\xd4\x81"         => "\xd4\x80",
1011
      "\xd3\xbf"         => "\xd3\xbe",
1012
      "\xd3\xbd"         => "\xd3\xbc",
1013
      "\xd3\xbb"         => "\xd3\xba",
1014
      "\xd3\xb9"         => "\xd3\xb8",
1015
      "\xd3\xb7"         => "\xd3\xb6",
1016
      "\xd3\xb5"         => "\xd3\xb4",
1017
      "\xd3\xb3"         => "\xd3\xb2",
1018
      "\xd3\xb1"         => "\xd3\xb0",
1019
      "\xd3\xaf"         => "\xd3\xae",
1020
      "\xd3\xad"         => "\xd3\xac",
1021
      "\xd3\xab"         => "\xd3\xaa",
1022
      "\xd3\xa9"         => "\xd3\xa8",
1023
      "\xd3\xa7"         => "\xd3\xa6",
1024
      "\xd3\xa5"         => "\xd3\xa4",
1025
      "\xd3\xa3"         => "\xd3\xa2",
1026
      "\xd3\xa1"         => "\xd3\xa0",
1027
      "\xd3\x9f"         => "\xd3\x9e",
1028
      "\xd3\x9d"         => "\xd3\x9c",
1029
      "\xd3\x9b"         => "\xd3\x9a",
1030
      "\xd3\x99"         => "\xd3\x98",
1031
      "\xd3\x97"         => "\xd3\x96",
1032
      "\xd3\x95"         => "\xd3\x94",
1033
      "\xd3\x93"         => "\xd3\x92",
1034
      "\xd3\x91"         => "\xd3\x90",
1035
      "\xd3\x8f"         => "\xd3\x80",
1036
      "\xd3\x8e"         => "\xd3\x8d",
1037
      "\xd3\x8c"         => "\xd3\x8b",
1038
      "\xd3\x8a"         => "\xd3\x89",
1039
      "\xd3\x88"         => "\xd3\x87",
1040
      "\xd3\x86"         => "\xd3\x85",
1041
      "\xd3\x84"         => "\xd3\x83",
1042
      "\xd3\x82"         => "\xd3\x81",
1043
      "\xd2\xbf"         => "\xd2\xbe",
1044
      "\xd2\xbd"         => "\xd2\xbc",
1045
      "\xd2\xbb"         => "\xd2\xba",
1046
      "\xd2\xb9"         => "\xd2\xb8",
1047
      "\xd2\xb7"         => "\xd2\xb6",
1048
      "\xd2\xb5"         => "\xd2\xb4",
1049
      "\xd2\xb3"         => "\xd2\xb2",
1050
      "\xd2\xb1"         => "\xd2\xb0",
1051
      "\xd2\xaf"         => "\xd2\xae",
1052
      "\xd2\xad"         => "\xd2\xac",
1053
      "\xd2\xab"         => "\xd2\xaa",
1054
      "\xd2\xa9"         => "\xd2\xa8",
1055
      "\xd2\xa7"         => "\xd2\xa6",
1056
      "\xd2\xa5"         => "\xd2\xa4",
1057
      "\xd2\xa3"         => "\xd2\xa2",
1058
      "\xd2\xa1"         => "\xd2\xa0",
1059
      "\xd2\x9f"         => "\xd2\x9e",
1060
      "\xd2\x9d"         => "\xd2\x9c",
1061
      "\xd2\x9b"         => "\xd2\x9a",
1062
      "\xd2\x99"         => "\xd2\x98",
1063
      "\xd2\x97"         => "\xd2\x96",
1064
      "\xd2\x95"         => "\xd2\x94",
1065
      "\xd2\x93"         => "\xd2\x92",
1066
      "\xd2\x91"         => "\xd2\x90",
1067
      "\xd2\x8f"         => "\xd2\x8e",
1068
      "\xd2\x8d"         => "\xd2\x8c",
1069
      "\xd2\x8b"         => "\xd2\x8a",
1070
      "\xd2\x81"         => "\xd2\x80",
1071
      "\xd1\xbf"         => "\xd1\xbe",
1072
      "\xd1\xbd"         => "\xd1\xbc",
1073
      "\xd1\xbb"         => "\xd1\xba",
1074
      "\xd1\xb9"         => "\xd1\xb8",
1075
      "\xd1\xb7"         => "\xd1\xb6",
1076
      "\xd1\xb5"         => "\xd1\xb4",
1077
      "\xd1\xb3"         => "\xd1\xb2",
1078
      "\xd1\xb1"         => "\xd1\xb0",
1079
      "\xd1\xaf"         => "\xd1\xae",
1080
      "\xd1\xad"         => "\xd1\xac",
1081
      "\xd1\xab"         => "\xd1\xaa",
1082
      "\xd1\xa9"         => "\xd1\xa8",
1083
      "\xd1\xa7"         => "\xd1\xa6",
1084
      "\xd1\xa5"         => "\xd1\xa4",
1085
      "\xd1\xa3"         => "\xd1\xa2",
1086
      "\xd1\xa1"         => "\xd1\xa0",
1087
      "\xd1\x9f"         => "\xd0\x8f",
1088
      "\xd1\x9e"         => "\xd0\x8e",
1089
      "\xd1\x9d"         => "\xd0\x8d",
1090
      "\xd1\x9c"         => "\xd0\x8c",
1091
      "\xd1\x9b"         => "\xd0\x8b",
1092
      "\xd1\x9a"         => "\xd0\x8a",
1093
      "\xd1\x99"         => "\xd0\x89",
1094
      "\xd1\x98"         => "\xd0\x88",
1095
      "\xd1\x97"         => "\xd0\x87",
1096
      "\xd1\x96"         => "\xd0\x86",
1097
      "\xd1\x95"         => "\xd0\x85",
1098
      "\xd1\x94"         => "\xd0\x84",
1099
      "\xd1\x93"         => "\xd0\x83",
1100
      "\xd1\x92"         => "\xd0\x82",
1101
      "\xd1\x91"         => "\xd0\x81",
1102
      "\xd1\x90"         => "\xd0\x80",
1103
      "\xd1\x8f"         => "\xd0\xaf",
1104
      "\xd1\x8e"         => "\xd0\xae",
1105
      "\xd1\x8d"         => "\xd0\xad",
1106
      "\xd1\x8c"         => "\xd0\xac",
1107
      "\xd1\x8b"         => "\xd0\xab",
1108
      "\xd1\x8a"         => "\xd0\xaa",
1109
      "\xd1\x89"         => "\xd0\xa9",
1110
      "\xd1\x88"         => "\xd0\xa8",
1111
      "\xd1\x87"         => "\xd0\xa7",
1112
      "\xd1\x86"         => "\xd0\xa6",
1113
      "\xd1\x85"         => "\xd0\xa5",
1114
      "\xd1\x84"         => "\xd0\xa4",
1115
      "\xd1\x83"         => "\xd0\xa3",
1116
      "\xd1\x82"         => "\xd0\xa2",
1117
      "\xd1\x81"         => "\xd0\xa1",
1118
      "\xd1\x80"         => "\xd0\xa0",
1119
      "\xd0\xbf"         => "\xd0\x9f",
1120
      "\xd0\xbe"         => "\xd0\x9e",
1121
      "\xd0\xbd"         => "\xd0\x9d",
1122
      "\xd0\xbc"         => "\xd0\x9c",
1123
      "\xd0\xbb"         => "\xd0\x9b",
1124
      "\xd0\xba"         => "\xd0\x9a",
1125
      "\xd0\xb9"         => "\xd0\x99",
1126
      "\xd0\xb8"         => "\xd0\x98",
1127
      "\xd0\xb7"         => "\xd0\x97",
1128
      "\xd0\xb6"         => "\xd0\x96",
1129
      "\xd0\xb5"         => "\xd0\x95",
1130
      "\xd0\xb4"         => "\xd0\x94",
1131
      "\xd0\xb3"         => "\xd0\x93",
1132
      "\xd0\xb2"         => "\xd0\x92",
1133
      "\xd0\xb1"         => "\xd0\x91",
1134
      "\xd0\xb0"         => "\xd0\x90",
1135
      "\xcf\xbb"         => "\xcf\xba",
1136
      "\xcf\xb8"         => "\xcf\xb7",
1137
      "\xcf\xb5"         => "\xce\x95",
1138
      "\xcf\xb2"         => "\xcf\xb9",
1139
      "\xcf\xb1"         => "\xce\xa1",
1140
      "\xcf\xb0"         => "\xce\x9a",
1141
      "\xcf\xaf"         => "\xcf\xae",
1142
      "\xcf\xad"         => "\xcf\xac",
1143
      "\xcf\xab"         => "\xcf\xaa",
1144
      "\xcf\xa9"         => "\xcf\xa8",
1145
      "\xcf\xa7"         => "\xcf\xa6",
1146
      "\xcf\xa5"         => "\xcf\xa4",
1147
      "\xcf\xa3"         => "\xcf\xa2",
1148
      "\xcf\xa1"         => "\xcf\xa0",
1149
      "\xcf\x9f"         => "\xcf\x9e",
1150
      "\xcf\x9d"         => "\xcf\x9c",
1151
      "\xcf\x9b"         => "\xcf\x9a",
1152
      "\xcf\x99"         => "\xcf\x98",
1153
      "\xcf\x97"         => "\xcf\x8f",
1154
      "\xcf\x96"         => "\xce\xa0",
1155
      "\xcf\x95"         => "\xce\xa6",
1156
      "\xcf\x91"         => "\xce\x98",
1157
      "\xcf\x90"         => "\xce\x92",
1158
      "\xcf\x8e"         => "\xce\x8f",
1159
      "\xcf\x8d"         => "\xce\x8e",
1160
      "\xcf\x8c"         => "\xce\x8c",
1161
      "\xcf\x8b"         => "\xce\xab",
1162
      "\xcf\x8a"         => "\xce\xaa",
1163
      "\xcf\x89"         => "\xce\xa9",
1164
      "\xcf\x88"         => "\xce\xa8",
1165
      "\xcf\x87"         => "\xce\xa7",
1166
      "\xcf\x86"         => "\xce\xa6",
1167
      "\xcf\x85"         => "\xce\xa5",
1168
      "\xcf\x84"         => "\xce\xa4",
1169
      "\xcf\x83"         => "\xce\xa3",
1170
      "\xcf\x82"         => "\xce\xa3",
1171
      "\xcf\x81"         => "\xce\xa1",
1172
      "\xcf\x80"         => "\xce\xa0",
1173
      "\xce\xbf"         => "\xce\x9f",
1174
      "\xce\xbe"         => "\xce\x9e",
1175
      "\xce\xbd"         => "\xce\x9d",
1176
      "\xce\xbc"         => "\xce\x9c",
1177
      "\xce\xbb"         => "\xce\x9b",
1178
      "\xce\xba"         => "\xce\x9a",
1179
      "\xce\xb9"         => "\xce\x99",
1180
      "\xce\xb8"         => "\xce\x98",
1181
      "\xce\xb7"         => "\xce\x97",
1182
      "\xce\xb6"         => "\xce\x96",
1183
      "\xce\xb5"         => "\xce\x95",
1184
      "\xce\xb4"         => "\xce\x94",
1185
      "\xce\xb3"         => "\xce\x93",
1186
      "\xce\xb2"         => "\xce\x92",
1187
      "\xce\xb1"         => "\xce\x91",
1188
      "\xce\xaf"         => "\xce\x8a",
1189
      "\xce\xae"         => "\xce\x89",
1190
      "\xce\xad"         => "\xce\x88",
1191
      "\xce\xac"         => "\xce\x86",
1192
      "\xcd\xbd"         => "\xcf\xbf",
1193
      "\xcd\xbc"         => "\xcf\xbe",
1194
      "\xcd\xbb"         => "\xcf\xbd",
1195
      "\xcd\xb7"         => "\xcd\xb6",
1196
      "\xcd\xb3"         => "\xcd\xb2",
1197
      "\xcd\xb1"         => "\xcd\xb0",
1198
      "\xca\x92"         => "\xc6\xb7",
1199
      "\xca\x8c"         => "\xc9\x85",
1200
      "\xca\x8b"         => "\xc6\xb2",
1201
      "\xca\x8a"         => "\xc6\xb1",
1202
      "\xca\x89"         => "\xc9\x84",
1203
      "\xca\x88"         => "\xc6\xae",
1204
      "\xca\x83"         => "\xc6\xa9",
1205
      "\xca\x80"         => "\xc6\xa6",
1206
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1207
      "\xc9\xb5"         => "\xc6\x9f",
1208
      "\xc9\xb2"         => "\xc6\x9d",
1209
      "\xc9\xb1"         => "\xe2\xb1\xae",
1210
      "\xc9\xaf"         => "\xc6\x9c",
1211
      "\xc9\xab"         => "\xe2\xb1\xa2",
1212
      "\xc9\xa9"         => "\xc6\x96",
1213
      "\xc9\xa8"         => "\xc6\x97",
1214
      "\xc9\xa5"         => "\xea\x9e\x8d",
1215
      "\xc9\xa3"         => "\xc6\x94",
1216
      "\xc9\xa0"         => "\xc6\x93",
1217
      "\xc9\x9b"         => "\xc6\x90",
1218
      "\xc9\x99"         => "\xc6\x8f",
1219
      "\xc9\x97"         => "\xc6\x8a",
1220
      "\xc9\x96"         => "\xc6\x89",
1221
      "\xc9\x94"         => "\xc6\x86",
1222
      "\xc9\x93"         => "\xc6\x81",
1223
      "\xc9\x92"         => "\xe2\xb1\xb0",
1224
      "\xc9\x91"         => "\xe2\xb1\xad",
1225
      "\xc9\x90"         => "\xe2\xb1\xaf",
1226
      "\xc9\x8f"         => "\xc9\x8e",
1227
      "\xc9\x8d"         => "\xc9\x8c",
1228
      "\xc9\x8b"         => "\xc9\x8a",
1229
      "\xc9\x89"         => "\xc9\x88",
1230
      "\xc9\x87"         => "\xc9\x86",
1231
      "\xc9\x82"         => "\xc9\x81",
1232
      "\xc9\x80"         => "\xe2\xb1\xbf",
1233
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1234
      "\xc8\xbc"         => "\xc8\xbb",
1235
      "\xc8\xb3"         => "\xc8\xb2",
1236
      "\xc8\xb1"         => "\xc8\xb0",
1237
      "\xc8\xaf"         => "\xc8\xae",
1238
      "\xc8\xad"         => "\xc8\xac",
1239
      "\xc8\xab"         => "\xc8\xaa",
1240
      "\xc8\xa9"         => "\xc8\xa8",
1241
      "\xc8\xa7"         => "\xc8\xa6",
1242
      "\xc8\xa5"         => "\xc8\xa4",
1243
      "\xc8\xa3"         => "\xc8\xa2",
1244
      "\xc8\x9f"         => "\xc8\x9e",
1245
      "\xc8\x9d"         => "\xc8\x9c",
1246
      "\xc8\x9b"         => "\xc8\x9a",
1247
      "\xc8\x99"         => "\xc8\x98",
1248
      "\xc8\x97"         => "\xc8\x96",
1249
      "\xc8\x95"         => "\xc8\x94",
1250
      "\xc8\x93"         => "\xc8\x92",
1251
      "\xc8\x91"         => "\xc8\x90",
1252
      "\xc8\x8f"         => "\xc8\x8e",
1253
      "\xc8\x8d"         => "\xc8\x8c",
1254
      "\xc8\x8b"         => "\xc8\x8a",
1255
      "\xc8\x89"         => "\xc8\x88",
1256
      "\xc8\x87"         => "\xc8\x86",
1257
      "\xc8\x85"         => "\xc8\x84",
1258
      "\xc8\x83"         => "\xc8\x82",
1259
      "\xc8\x81"         => "\xc8\x80",
1260
      "\xc7\xbf"         => "\xc7\xbe",
1261
      "\xc7\xbd"         => "\xc7\xbc",
1262
      "\xc7\xbb"         => "\xc7\xba",
1263
      "\xc7\xb9"         => "\xc7\xb8",
1264
      "\xc7\xb5"         => "\xc7\xb4",
1265
      "\xc7\xb3"         => "\xc7\xb2",
1266
      "\xc7\xaf"         => "\xc7\xae",
1267
      "\xc7\xad"         => "\xc7\xac",
1268
      "\xc7\xab"         => "\xc7\xaa",
1269
      "\xc7\xa9"         => "\xc7\xa8",
1270
      "\xc7\xa7"         => "\xc7\xa6",
1271
      "\xc7\xa5"         => "\xc7\xa4",
1272
      "\xc7\xa3"         => "\xc7\xa2",
1273
      "\xc7\xa1"         => "\xc7\xa0",
1274
      "\xc7\x9f"         => "\xc7\x9e",
1275
      "\xc7\x9d"         => "\xc6\x8e",
1276
      "\xc7\x9c"         => "\xc7\x9b",
1277
      "\xc7\x9a"         => "\xc7\x99",
1278
      "\xc7\x98"         => "\xc7\x97",
1279
      "\xc7\x96"         => "\xc7\x95",
1280
      "\xc7\x94"         => "\xc7\x93",
1281
      "\xc7\x92"         => "\xc7\x91",
1282
      "\xc7\x90"         => "\xc7\x8f",
1283
      "\xc7\x8e"         => "\xc7\x8d",
1284
      "\xc7\x8c"         => "\xc7\x8b",
1285
      "\xc7\x89"         => "\xc7\x88",
1286
      "\xc7\x86"         => "\xc7\x85",
1287
      "\xc6\xbf"         => "\xc7\xb7",
1288
      "\xc6\xbd"         => "\xc6\xbc",
1289
      "\xc6\xb9"         => "\xc6\xb8",
1290
      "\xc6\xb6"         => "\xc6\xb5",
1291
      "\xc6\xb4"         => "\xc6\xb3",
1292
      "\xc6\xb0"         => "\xc6\xaf",
1293
      "\xc6\xad"         => "\xc6\xac",
1294
      "\xc6\xa8"         => "\xc6\xa7",
1295
      "\xc6\xa5"         => "\xc6\xa4",
1296
      "\xc6\xa3"         => "\xc6\xa2",
1297
      "\xc6\xa1"         => "\xc6\xa0",
1298
      "\xc6\x9e"         => "\xc8\xa0",
1299
      "\xc6\x9a"         => "\xc8\xbd",
1300
      "\xc6\x99"         => "\xc6\x98",
1301
      "\xc6\x95"         => "\xc7\xb6",
1302
      "\xc6\x92"         => "\xc6\x91",
1303
      "\xc6\x8c"         => "\xc6\x8b",
1304
      "\xc6\x88"         => "\xc6\x87",
1305
      "\xc6\x85"         => "\xc6\x84",
1306
      "\xc6\x83"         => "\xc6\x82",
1307
      "\xc6\x80"         => "\xc9\x83",
1308
      "\xc5\xbf"         => "\x53",
1309
      "\xc5\xbe"         => "\xc5\xbd",
1310
      "\xc5\xbc"         => "\xc5\xbb",
1311
      "\xc5\xba"         => "\xc5\xb9",
1312
      "\xc5\xb7"         => "\xc5\xb6",
1313
      "\xc5\xb5"         => "\xc5\xb4",
1314
      "\xc5\xb3"         => "\xc5\xb2",
1315
      "\xc5\xb1"         => "\xc5\xb0",
1316
      "\xc5\xaf"         => "\xc5\xae",
1317
      "\xc5\xad"         => "\xc5\xac",
1318
      "\xc5\xab"         => "\xc5\xaa",
1319
      "\xc5\xa9"         => "\xc5\xa8",
1320
      "\xc5\xa7"         => "\xc5\xa6",
1321
      "\xc5\xa5"         => "\xc5\xa4",
1322
      "\xc5\xa3"         => "\xc5\xa2",
1323
      "\xc5\xa1"         => "\xc5\xa0",
1324
      "\xc5\x9f"         => "\xc5\x9e",
1325
      "\xc5\x9d"         => "\xc5\x9c",
1326
      "\xc5\x9b"         => "\xc5\x9a",
1327
      "\xc5\x99"         => "\xc5\x98",
1328
      "\xc5\x97"         => "\xc5\x96",
1329
      "\xc5\x95"         => "\xc5\x94",
1330
      "\xc5\x93"         => "\xc5\x92",
1331
      "\xc5\x91"         => "\xc5\x90",
1332
      "\xc5\x8f"         => "\xc5\x8e",
1333
      "\xc5\x8d"         => "\xc5\x8c",
1334
      "\xc5\x8b"         => "\xc5\x8a",
1335
      "\xc5\x88"         => "\xc5\x87",
1336
      "\xc5\x86"         => "\xc5\x85",
1337
      "\xc5\x84"         => "\xc5\x83",
1338
      "\xc5\x82"         => "\xc5\x81",
1339
      "\xc5\x80"         => "\xc4\xbf",
1340
      "\xc4\xbe"         => "\xc4\xbd",
1341
      "\xc4\xbc"         => "\xc4\xbb",
1342
      "\xc4\xba"         => "\xc4\xb9",
1343
      "\xc4\xb7"         => "\xc4\xb6",
1344
      "\xc4\xb5"         => "\xc4\xb4",
1345
      "\xc4\xb3"         => "\xc4\xb2",
1346
      "\xc4\xb1"         => "\x49",
1347
      "\xc4\xaf"         => "\xc4\xae",
1348
      "\xc4\xad"         => "\xc4\xac",
1349
      "\xc4\xab"         => "\xc4\xaa",
1350
      "\xc4\xa9"         => "\xc4\xa8",
1351
      "\xc4\xa7"         => "\xc4\xa6",
1352
      "\xc4\xa5"         => "\xc4\xa4",
1353
      "\xc4\xa3"         => "\xc4\xa2",
1354
      "\xc4\xa1"         => "\xc4\xa0",
1355
      "\xc4\x9f"         => "\xc4\x9e",
1356
      "\xc4\x9d"         => "\xc4\x9c",
1357
      "\xc4\x9b"         => "\xc4\x9a",
1358
      "\xc4\x99"         => "\xc4\x98",
1359
      "\xc4\x97"         => "\xc4\x96",
1360
      "\xc4\x95"         => "\xc4\x94",
1361
      "\xc4\x93"         => "\xc4\x92",
1362
      "\xc4\x91"         => "\xc4\x90",
1363
      "\xc4\x8f"         => "\xc4\x8e",
1364
      "\xc4\x8d"         => "\xc4\x8c",
1365
      "\xc4\x8b"         => "\xc4\x8a",
1366
      "\xc4\x89"         => "\xc4\x88",
1367
      "\xc4\x87"         => "\xc4\x86",
1368
      "\xc4\x85"         => "\xc4\x84",
1369
      "\xc4\x83"         => "\xc4\x82",
1370
      "\xc4\x81"         => "\xc4\x80",
1371
      "\xc3\xbf"         => "\xc5\xb8",
1372
      "\xc3\xbe"         => "\xc3\x9e",
1373
      "\xc3\xbd"         => "\xc3\x9d",
1374
      "\xc3\xbc"         => "\xc3\x9c",
1375
      "\xc3\xbb"         => "\xc3\x9b",
1376
      "\xc3\xba"         => "\xc3\x9a",
1377
      "\xc3\xb9"         => "\xc3\x99",
1378
      "\xc3\xb8"         => "\xc3\x98",
1379
      "\xc3\xb6"         => "\xc3\x96",
1380
      "\xc3\xb5"         => "\xc3\x95",
1381
      "\xc3\xb4"         => "\xc3\x94",
1382
      "\xc3\xb3"         => "\xc3\x93",
1383
      "\xc3\xb2"         => "\xc3\x92",
1384
      "\xc3\xb1"         => "\xc3\x91",
1385
      "\xc3\xb0"         => "\xc3\x90",
1386
      "\xc3\xaf"         => "\xc3\x8f",
1387
      "\xc3\xae"         => "\xc3\x8e",
1388
      "\xc3\xad"         => "\xc3\x8d",
1389
      "\xc3\xac"         => "\xc3\x8c",
1390
      "\xc3\xab"         => "\xc3\x8b",
1391
      "\xc3\xaa"         => "\xc3\x8a",
1392
      "\xc3\xa9"         => "\xc3\x89",
1393
      "\xc3\xa8"         => "\xc3\x88",
1394
      "\xc3\xa7"         => "\xc3\x87",
1395
      "\xc3\xa6"         => "\xc3\x86",
1396
      "\xc3\xa5"         => "\xc3\x85",
1397
      "\xc3\xa4"         => "\xc3\x84",
1398
      "\xc3\xa3"         => "\xc3\x83",
1399
      "\xc3\xa2"         => "\xc3\x82",
1400
      "\xc3\xa1"         => "\xc3\x81",
1401
      "\xc3\xa0"         => "\xc3\x80",
1402
      "\xc2\xb5"         => "\xce\x9c",
1403
      "\x7a"             => "\x5a",
1404
      "\x79"             => "\x59",
1405
      "\x78"             => "\x58",
1406
      "\x77"             => "\x57",
1407
      "\x76"             => "\x56",
1408
      "\x75"             => "\x55",
1409
      "\x74"             => "\x54",
1410
      "\x73"             => "\x53",
1411
      "\x72"             => "\x52",
1412
      "\x71"             => "\x51",
1413
      "\x70"             => "\x50",
1414
      "\x6f"             => "\x4f",
1415
      "\x6e"             => "\x4e",
1416
      "\x6d"             => "\x4d",
1417
      "\x6c"             => "\x4c",
1418
      "\x6b"             => "\x4b",
1419
      "\x6a"             => "\x4a",
1420
      "\x69"             => "\x49",
1421
      "\x68"             => "\x48",
1422
      "\x67"             => "\x47",
1423
      "\x66"             => "\x46",
1424
      "\x65"             => "\x45",
1425
      "\x64"             => "\x44",
1426
      "\x63"             => "\x43",
1427
      "\x62"             => "\x42",
1428
      "\x61"             => "\x41",
1429
1430
    );
1431
1432
    return $case;
1433
  }
1434
1435
  /**
1436
   * check for UTF8-Support
1437
   */
1438 158
  public static function checkForSupport()
1439
  {
1440 158
    if (!isset(self::$support['mbstring'])) {
1441
1442 1
      self::$support['mbstring'] = self::mbstring_loaded();
1443 1
      self::$support['iconv'] = self::iconv_loaded();
1444 1
      self::$support['intl'] = self::intl_loaded();
1445 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1446 1
    }
1447 158
  }
1448
1449
  /**
1450
   * Generates a UTF-8 encoded character from the given code point.
1451
   *
1452
   * @param    int $code_point The code point for which to generate a character.
1453
   *
1454
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1455
   */
1456 8
  public static function chr($code_point)
1457
  {
1458 8
    self::checkForSupport();
1459
1460 8
    if (($i = (int)$code_point) !== $code_point) {
1461
      // $code_point is a string, lets extract int code point from it
1462
      if (!($i = (int)self::hex_to_int($code_point))) {
1463
        return '';
1464
      }
1465
    }
1466
1467 8
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1468
  }
1469
1470
  /**
1471
   * Applies callback to all characters of a string.
1472
   *
1473
   * @param    string $callback The callback function.
1474
   * @param    string $str      UTF-8 string to run callback on.
1475
   *
1476
   * @return   array The outcome of callback.
1477
   */
1478
1479 1
  public static function chr_map($callback, $str)
1480
  {
1481 1
    $chars = self::split($str);
1482
1483 1
    return array_map($callback, $chars);
1484
  }
1485
1486
  /**
1487
   * Generates an array of byte length of each character of a Unicode string.
1488
   *
1489
   * 1 byte => U+0000  - U+007F
1490
   * 2 byte => U+0080  - U+07FF
1491
   * 3 byte => U+0800  - U+FFFF
1492
   * 4 byte => U+10000 - U+10FFFF
1493
   *
1494
   * @param    string $str The original Unicode string.
1495
   *
1496
   * @return   array An array of byte lengths of each character.
1497
   */
1498 2
  public static function chr_size_list($str)
1499
  {
1500 2
    if (!$str) {
1501 2
      return array();
1502
    }
1503
1504 2
    return array_map('strlen', self::split($str));
1505
  }
1506
1507
  /**
1508
   * Get a decimal code representation of a specific character.
1509
   *
1510
   * @param   string $chr The input character
1511
   *
1512
   * @return  int
1513
   */
1514 2
  public static function chr_to_decimal($chr)
1515
  {
1516 2
    $chr = (string)$chr;
1517 2
    $code = self::ord($chr[0]);
1518 2
    $bytes = 1;
1519
1520 2
    if (!($code & 0x80)) {
1521
      // 0xxxxxxx
1522 2
      return $code;
1523
    }
1524
1525 2
    if (($code & 0xe0) === 0xc0) {
1526
      // 110xxxxx
1527 2
      $bytes = 2;
1528 2
      $code &= ~0xc0;
1529 2
    } elseif (($code & 0xf0) == 0xe0) {
1530
      // 1110xxxx
1531 1
      $bytes = 3;
1532 1
      $code &= ~0xe0;
1533 1
    } elseif (($code & 0xf8) === 0xf0) {
1534
      // 11110xxx
1535
      $bytes = 4;
1536
      $code &= ~0xf0;
1537
    }
1538
1539 2
    for ($i = 2; $i <= $bytes; $i++) {
1540
      // 10xxxxxx
1541 2
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1542 2
    }
1543
1544 2
    return $code;
1545
  }
1546
1547
  /**
1548
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1549
   *
1550
   * @param    string $chr The input character
1551
   * @param    string $pfix
1552
   *
1553
   * @return   string The code point encoded as U+xxxx
1554
   */
1555
  public static function chr_to_hex($chr, $pfix = 'U+')
1556
  {
1557
    return self::int_to_hex(self::ord($chr), $pfix);
1558
  }
1559
1560
  /**
1561
   * Splits a string into smaller chunks and multiple lines, using the specified
1562
   * line ending character.
1563
   *
1564
   * @param    string $body     The original string to be split.
1565
   * @param    int    $chunklen The maximum character length of a chunk.
1566
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1567
   *
1568
   * @return   string The chunked string
1569
   */
1570 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1571
  {
1572 1
    return implode($end, self::split($body, $chunklen));
1573
  }
1574
1575
  /**
1576
   * accepts a string and removes all non-UTF-8 characters from it.
1577
   *
1578
   * @param string $str                     The string to be sanitized.
1579
   * @param bool   $remove_bom
1580
   * @param bool   $normalize_whitespace
1581
   * @param bool   $normalize_msword        e.g.: "…" => "..."
1582
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
1583
   *
1584
   * @return string Clean UTF-8 encoded string
1585
   */
1586 35
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1587
  {
1588
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1589
    // caused connection reset problem on larger strings
1590
1591
    $regx = '/
1592
       (
1593
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
1594
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
1595
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
1596
        |   [\xE1-\xEC][\x80-\xBF]{2}
1597
        |   \xED[\x80-\x9F][\x80-\xBF]
1598
        |   [\xEE-\xEF][\x80-\xBF]{2}
1599
        ){1,50}                          # ...one or more times
1600
       )
1601
       | .                               # anything else
1602 35
       /x';
1603 35
    $str = preg_replace($regx, '$1', $str);
1604
1605 35
    $str = self::replace_diamond_question_mark($str, '');
1606 35
    $str = self::remove_invisible_characters($str);
1607
1608 35
    if ($normalize_whitespace === true) {
1609 7
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1610 7
    }
1611
1612 35
    if ($normalize_msword === true) {
1613 1
      $str = self::normalize_msword($str);
1614 1
    }
1615
1616 35
    if ($remove_bom === true) {
1617 6
      $str = self::removeBOM($str);
1618 6
    }
1619
1620 35
    return $str;
1621
  }
1622
1623
  /**
1624
   * Clean-up a and show only printable UTF-8 chars at the end.
1625
   *
1626
   * @param string|false $str
1627
   *
1628
   * @return string
1629
   */
1630 5
  public static function cleanup($str)
1631
  {
1632 5
    $str = (string)$str;
1633
1634 5
    if (!isset($str[0])) {
1635 1
      return '';
1636
    }
1637
1638
    // init
1639 5
    self::checkForSupport();
1640
1641
    // fixed ISO <-> UTF-8 Errors
1642 5
    $str = self::fix_simple_utf8($str);
1643
1644
    // remove all none UTF-8 symbols
1645
    // && remove diamond question mark (�)
1646
    // && remove remove invisible characters (e.g. "\0")
1647
    // && remove BOM
1648
    // && normalize whitespace chars (but keep non-breaking-spaces)
1649 5
    $str = self::clean($str, true, true, false, true);
1650
1651 5
    return (string)$str;
1652
  }
1653
1654
  /**
1655
   * Accepts a string and returns an array of Unicode code points.
1656
   *
1657
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
1658
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
1659
   *                          default, code points will be returned as integers.
1660
   *
1661
   * @return   array The array of code points
1662
   */
1663 3
  public static function codepoints($arg, $u_style = false)
1664
  {
1665 3
    if (is_string($arg)) {
1666 3
      $arg = self::split($arg);
1667 3
    }
1668
1669 3
    $arg = array_map(
1670
        array(
1671 3
            '\\voku\\helper\\UTF8',
1672 3
            'ord',
1673 3
        ),
1674
        $arg
1675 3
    );
1676
1677 3
    if ($u_style) {
1678
      $arg = array_map(
1679
          array(
1680
              '\\voku\\helper\\UTF8',
1681
              'int_to_hex',
1682
          ),
1683
          $arg
1684
      );
1685
    }
1686
1687 3
    return $arg;
1688
  }
1689
1690
  /**
1691
   * Returns count of characters used in a string.
1692
   *
1693
   * @param    string $str The input string.
1694
   *
1695
   * @return   array An associative array of Character as keys and
1696
   *           their count as values.
1697
   */
1698 2
  public static function count_chars($str) // there is no $mode parameters
1699
  {
1700 2
    $array = array_count_values(self::split($str));
1701
1702 2
    ksort($array);
1703
1704 2
    return $array;
1705
  }
1706
1707
  /**
1708
   * Get a UTF-8 character from its decimal code representation.
1709
   *
1710
   * @param   int $code Code.
1711
   *
1712
   * @return  string
1713
   */
1714 1
  public static function decimal_to_chr($code)
1715
  {
1716 1
    self::checkForSupport();
1717
1718 1
    return mb_convert_encoding(
1719 1
        '&#x' . dechex($code) . ';',
1720 1
        'UTF-8',
1721
        'HTML-ENTITIES'
1722 1
    );
1723
  }
1724
1725
  /**
1726
   * Encode to UTF8 or LATIN1.
1727
   *
1728
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1729
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1730
   *
1731
   * @param string $encodingLabel ISO-8859-1 || UTF-8
1732
   * @param string $str
1733
   *
1734
   * @return false|string Will return false on error.
1735
   */
1736 11
  public static function encode($encodingLabel, $str)
1737
  {
1738 11
    $encodingLabel = self::normalizeEncoding($encodingLabel);
1739
1740 11
    if ($encodingLabel === 'UTF-8') {
1741 11
      return self::to_utf8($str);
1742
    }
1743
1744 1
    if ($encodingLabel === 'ISO-8859-1') {
1745 1
      return self::to_latin1($str);
1746
    }
1747
1748
    return false;
1749
  }
1750
1751
  /**
1752
   * Reads entire file into a string.
1753
   *
1754
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
1755
   *
1756
   * @link http://php.net/manual/en/function.file-get-contents.php
1757
   *
1758
   * @param string   $filename      <p>
1759
   *                                Name of the file to read.
1760
   *                                </p>
1761
   * @param int      $flags         [optional] <p>
1762
   *                                Prior to PHP 6, this parameter is called
1763
   *                                use_include_path and is a bool.
1764
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1765
   *                                to trigger include path
1766
   *                                search.
1767
   *                                </p>
1768
   *                                <p>
1769
   *                                The value of flags can be any combination of
1770
   *                                the following flags (with some restrictions), joined with the
1771
   *                                binary OR (|)
1772
   *                                operator.
1773
   *                                </p>
1774
   *                                <p>
1775
   *                                <table>
1776
   *                                Available flags
1777
   *                                <tr valign="top">
1778
   *                                <td>Flag</td>
1779
   *                                <td>Description</td>
1780
   *                                </tr>
1781
   *                                <tr valign="top">
1782
   *                                <td>
1783
   *                                FILE_USE_INCLUDE_PATH
1784
   *                                </td>
1785
   *                                <td>
1786
   *                                Search for filename in the include directory.
1787
   *                                See include_path for more
1788
   *                                information.
1789
   *                                </td>
1790
   *                                </tr>
1791
   *                                <tr valign="top">
1792
   *                                <td>
1793
   *                                FILE_TEXT
1794
   *                                </td>
1795
   *                                <td>
1796
   *                                As of PHP 6, the default encoding of the read
1797
   *                                data is UTF-8. You can specify a different encoding by creating a
1798
   *                                custom context or by changing the default using
1799
   *                                stream_default_encoding. This flag cannot be
1800
   *                                used with FILE_BINARY.
1801
   *                                </td>
1802
   *                                </tr>
1803
   *                                <tr valign="top">
1804
   *                                <td>
1805
   *                                FILE_BINARY
1806
   *                                </td>
1807
   *                                <td>
1808
   *                                With this flag, the file is read in binary mode. This is the default
1809
   *                                setting and cannot be used with FILE_TEXT.
1810
   *                                </td>
1811
   *                                </tr>
1812
   *                                </table>
1813
   *                                </p>
1814
   * @param resource $context       [optional] <p>
1815
   *                                A valid context resource created with
1816
   *                                stream_context_create. If you don't need to use a
1817
   *                                custom context, you can skip this parameter by &null;.
1818
   *                                </p>
1819
   * @param int      $offset        [optional] <p>
1820
   *                                The offset where the reading starts.
1821
   *                                </p>
1822
   * @param int      $maxlen        [optional] <p>
1823
   *                                Maximum length of data read. The default is to read until end
1824
   *                                of file is reached.
1825
   *                                </p>
1826
   * @param int      $timeout
1827
   *
1828
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
1829
   *                                default utf-8 chars
1830
   *
1831
   * @return string The function returns the read data or false on failure.
1832
   */
1833 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1834
  {
1835
    // init
1836 2
    $timeout = (int)$timeout;
1837 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1838
1839 2
    if ($timeout && $context === null) {
1840 2
      $context = stream_context_create(
1841
          array(
1842
              'http' =>
1843
                  array(
1844 2
                      'timeout' => $timeout,
1845 2
                  ),
1846
          )
1847 2
      );
1848 2
    }
1849
1850 2
    if (is_int($maxlen)) {
1851 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1852 1
    } else {
1853 2
      $data = file_get_contents($filename, $flags, $context, $offset);
1854
    }
1855
1856
    // return false on error
1857 2
    if ($data === false) {
1858
      return false;
1859
    }
1860
1861 2
    if ($convertToUtf8 === true) {
1862 2
      self::checkForSupport();
1863
1864 2
      $encoding = self::str_detect_encoding($data);
1865 2
      if ($encoding && $encoding != 'UTF-8') {
0 ignored issues
show
Bug Best Practice introduced by
The expression $encoding of type string|false is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1866
1867 1
        $data = mb_convert_encoding(
1868 1
            $data,
1869 1
            'UTF-8',
1870 1
            self::normalizeEncoding($encoding)
1871 1
        );
1872 1
      }
1873
1874 2
      $data = self::cleanup($data);
1875 2
    }
1876
1877
    // clean utf-8 string
1878 2
    return $data;
1879
  }
1880
1881
  /**
1882
   * Checks if a file starts with BOM character.
1883
   *
1884
   * @param    string $file_path Path to a valid file.
1885
   *
1886
   * @return   bool True if the file has BOM at the start, False otherwise.
1887
   */
1888 1
  public static function file_has_bom($file_path)
1889
  {
1890 1
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
1891
  }
1892
1893
  /**
1894
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1895
   *
1896
   * @param mixed  $var
1897
   * @param int    $normalization_form
1898
   * @param string $leading_combining
1899
   *
1900
   * @return mixed
1901
   */
1902 7
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
1903
  {
1904 7
    switch (gettype($var)) {
1905 7 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1906 2
        foreach ($var as $k => $v) {
1907
          /** @noinspection AlterInForeachInspection */
1908 1
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1909 2
        }
1910 2
        break;
1911 7 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1912 1
        foreach ($var as $k => $v) {
1913 1
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
1914 1
        }
1915 1
        break;
1916 7
      case 'string':
1917 7 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1918
          // Workaround https://bugs.php.net/65732
1919
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1920
        }
1921 7 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1922 7
          if (Normalizer::isNormalized($var, $normalization_form)) {
1923 1
            $n = '-';
1924 1
          } else {
1925 7
            $n = Normalizer::normalize($var, $normalization_form);
1926
1927 7
            if (isset($n[0])) {
1928 5
              $var = $n;
1929 5
            } else {
1930 4
              $var = self::encode('UTF-8', $var);
1931
            }
1932
1933
          }
1934 7
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
1935
            // Prevent leading combining chars
1936
            // for NFC-safe concatenations.
1937
            $var = $leading_combining . $var;
1938
          }
1939 7
        }
1940 7
        break;
1941 7
    }
1942
1943 7
    return $var;
1944
  }
1945
1946
  /**
1947
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1948
   *
1949
   * @param int    $type
1950
   * @param string $var
1951
   * @param int    $filter
1952
   * @param mixed  $option
1953
   *
1954
   * @return mixed
1955
   */
1956 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1957
  {
1958
    if (4 > func_num_args()) {
1959
      $var = filter_input($type, $var, $filter);
1960
    } else {
1961
      $var = filter_input($type, $var, $filter, $option);
1962
    }
1963
1964
    return self::filter($var);
1965
  }
1966
1967
  /**
1968
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1969
   *
1970
   * @param int   $type
1971
   * @param mixed $definition
1972
   * @param bool  $add_empty
1973
   *
1974
   * @return mixed
1975
   */
1976 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1977
  {
1978
    if (2 > func_num_args()) {
1979
      $a = filter_input_array($type);
1980
    } else {
1981
      $a = filter_input_array($type, $definition, $add_empty);
1982
    }
1983
1984
    return self::filter($a);
1985
  }
1986
1987
  /**
1988
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1989
   *
1990
   * @param mixed $var
1991
   * @param int   $filter
1992
   * @param mixed $option
1993
   *
1994
   * @return mixed
1995
   */
1996 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1997
  {
1998 1
    if (3 > func_num_args()) {
1999 1
      $var = filter_var($var, $filter);
2000 1
    } else {
2001 1
      $var = filter_var($var, $filter, $option);
2002
    }
2003
2004 1
    return self::filter($var);
2005
  }
2006
2007
  /**
2008
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2009
   *
2010
   * @param array $data
2011
   * @param mixed $definition
2012
   * @param bool  $add_empty
2013
   *
2014
   * @return mixed
2015
   */
2016 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2017
  {
2018 1
    if (2 > func_num_args()) {
2019 1
      $a = filter_var_array($data);
2020 1
    } else {
2021 1
      $a = filter_var_array($data, $definition, $add_empty);
2022
    }
2023
2024 1
    return self::filter($a);
2025
  }
2026
2027
  /**
2028
   * Checks if the number of Unicode characters in a string are not
2029
   * more than the specified integer.
2030
   *
2031
   * @param    string $str      The original string to be checked.
2032
   * @param    int    $box_size The size in number of chars to be checked against string.
2033
   *
2034
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2035
   */
2036 1
  public static function fits_inside($str, $box_size)
2037
  {
2038 1
    return (self::strlen($str) <= $box_size);
2039
  }
2040
2041
  /**
2042
   * Fixing a broken UTF-8 string.
2043
   *
2044
   * @param string $str
2045
   *
2046
   * @return string
2047
   */
2048 8
  public static function fix_simple_utf8($str)
2049
  {
2050 8
    static $brokenUtf8ToUtf8Keys = null;
2051 8
    static $brokenUtf8ToUtf8Values = null;
2052
2053 8
    $str = (string)$str;
2054
2055 8
    if (!isset($str[0])) {
2056 2
      return '';
2057
    }
2058
2059 8
    if ($brokenUtf8ToUtf8Keys === null) {
2060 1
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2061 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2062 1
    }
2063
2064 8
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2065
  }
2066
2067
  /**
2068
   * Fix a double (or multiple) encoded UTF8 string.
2069
   *
2070
   * @param array|string $str
2071
   *
2072
   * @return string
2073
   */
2074 1
  public static function fix_utf8($str)
2075
  {
2076 1
    if (is_array($str)) {
2077
2078
      foreach ($str as $k => $v) {
2079
        /** @noinspection AlterInForeachInspection */
2080
        $str[$k] = self::fix_utf8($v);
2081
      }
2082
2083
      return $str;
2084
    }
2085
2086 1
    $last = '';
2087 1
    while ($last <> $str) {
2088 1
      $last = $str;
2089 1
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2089 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2090 1
    }
2091
2092 1
    return $str;
2093
  }
2094
2095
  /**
2096
   * Get character of a specific character.
2097
   *
2098
   * @param   string $chr Character.
2099
   *
2100
   * @return  string 'RTL' or 'LTR'
2101
   */
2102 1
  public static function getCharDirection($chr)
2103
  {
2104 1
    $c = static::chr_to_decimal($chr);
2105
2106 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2107 1
      return 'LTR';
2108
    }
2109
2110 1
    if (0x85e >= $c) {
2111
2112 1
      if (0x5be === $c ||
2113 1
          0x5c0 === $c ||
2114 1
          0x5c3 === $c ||
2115 1
          0x5c6 === $c ||
2116 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
2117 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2118 1
          0x608 === $c ||
2119 1
          0x60b === $c ||
2120 1
          0x60d === $c ||
2121 1
          0x61b === $c ||
2122 1
          (0x61e <= $c && 0x64a >= $c) ||
2123
          (0x66d <= $c && 0x66f >= $c) ||
2124
          (0x671 <= $c && 0x6d5 >= $c) ||
2125
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2126
          (0x6ee <= $c && 0x6ef >= $c) ||
2127
          (0x6fa <= $c && 0x70d >= $c) ||
2128
          0x710 === $c ||
2129
          (0x712 <= $c && 0x72f >= $c) ||
2130
          (0x74d <= $c && 0x7a5 >= $c) ||
2131
          0x7b1 === $c ||
2132
          (0x7c0 <= $c && 0x7ea >= $c) ||
2133
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2134
          0x7fa === $c ||
2135
          (0x800 <= $c && 0x815 >= $c) ||
2136
          0x81a === $c ||
2137
          0x824 === $c ||
2138
          0x828 === $c ||
2139
          (0x830 <= $c && 0x83e >= $c) ||
2140
          (0x840 <= $c && 0x858 >= $c) ||
2141
          0x85e === $c
2142 1
      ) {
2143 1
        return 'RTL';
2144
      }
2145
2146
    } elseif (0x200f === $c) {
2147
2148
      return 'RTL';
2149
2150
    } elseif (0xfb1d <= $c) {
2151
2152
      if (0xfb1d === $c ||
2153
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2154
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2155
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2156
          0xfb3e === $c ||
2157
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2158
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2159
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2160
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2161
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2162
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2163
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2164
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2165
          (0xfe76 <= $c && 0xfefc >= $c) ||
2166
          (0x10800 <= $c && 0x10805 >= $c) ||
2167
          0x10808 === $c ||
2168
          (0x1080a <= $c && 0x10835 >= $c) ||
2169
          (0x10837 <= $c && 0x10838 >= $c) ||
2170
          0x1083c === $c ||
2171
          (0x1083f <= $c && 0x10855 >= $c) ||
2172
          (0x10857 <= $c && 0x1085f >= $c) ||
2173
          (0x10900 <= $c && 0x1091b >= $c) ||
2174
          (0x10920 <= $c && 0x10939 >= $c) ||
2175
          0x1093f === $c ||
2176
          0x10a00 === $c ||
2177
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2178
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2179
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2180
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2181
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2182
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2183
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2184
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2185
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2186
          (0x10b78 <= $c && 0x10b7f >= $c)
2187
      ) {
2188
        return 'RTL';
2189
      }
2190
    }
2191
2192
    return 'LTR';
2193
  }
2194
2195
  /**
2196
   * get data from "/data/*.ser"
2197
   *
2198
   * @param string $file
2199
   *
2200
   * @return bool|string|array|int false on error
2201
   */
2202 2
  protected static function getData($file)
2203
  {
2204 2
    $file = __DIR__ . '/data/' . $file . '.ser';
2205 2
    if (file_exists($file)) {
2206 2
      return unserialize(file_get_contents($file));
2207
    } else {
2208
      return false;
2209
    }
2210
  }
2211
2212
  /**
2213
   * Creates a random string of UTF-8 characters.
2214
   *
2215
   * @param    int $len The length of string in characters.
2216
   *
2217
   * @return   string String consisting of random characters.
2218
   */
2219 1
  public static function hash($len = 8)
2220
  {
2221 1
    static $chars = array();
2222 1
    static $chars_len = null;
2223
2224 1
    if ($len <= 0) {
2225 1
      return '';
2226
    }
2227
2228
    // init
2229 1
    self::checkForSupport();
2230
2231 1
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2232 1
      if (self::$support['pcre_utf8'] === true) {
2233 1
        $chars = array_map(
2234
            array(
2235 1
                '\\voku\\helper\\UTF8',
2236 1
                'chr',
2237 1
            ),
2238 1
            range(48, 79)
2239 1
        );
2240
2241 1
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2242
2243 1
        $chars = array_values(array_filter($chars));
2244 1
      } else {
2245
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2246
      }
2247
2248 1
      $chars_len = count($chars);
2249 1
    }
2250
2251 1
    $hash = '';
2252
2253 1
    for (; $len; --$len) {
2254 1
      $hash .= $chars[mt_rand() % $chars_len];
2255 1
    }
2256
2257 1
    return $hash;
2258
  }
2259
2260
  /**
2261
   * Converts hexadecimal U+xxxx code point representation to Integer.
2262
   *
2263
   * INFO: opposite to UTF8::int_to_hex( )
2264
   *
2265
   * @param    string $str The hexadecimal code point representation.
2266
   *
2267
   * @return   int The code point, or 0 on failure.
2268
   */
2269
  public static function hex_to_int($str)
2270
  {
2271
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2272
      return intval($match[1], 16);
2273
    }
2274
2275
    return 0;
2276
  }
2277
2278
  /**
2279
   * Converts a UTF-8 string to a series of HTML numbered entities.
2280
   *
2281
   * e.g.: &#123;&#39;&#1740;
2282
   *
2283
   * @param  string $str The Unicode string to be encoded as numbered entities.
2284
   *
2285
   * @return string HTML numbered entities.
2286
   */
2287 1
  public static function html_encode($str)
2288
  {
2289 1
    return implode(
2290 1
        array_map(
2291
            array(
2292 1
                '\\voku\\helper\\UTF8',
2293 1
                'single_chr_html_encode',
2294 1
            ),
2295 1
            self::split($str)
2296 1
        )
2297 1
    );
2298
  }
2299
2300
  /**
2301
   * UTF-8 version of html_entity_decode()
2302
   *
2303
   * The reason we are not using html_entity_decode() by itself is because
2304
   * while it is not technically correct to leave out the semicolon
2305
   * at the end of an entity most browsers will still interpret the entity
2306
   * correctly. html_entity_decode() does not convert entities without
2307
   * semicolons, so we are left with our own little solution here. Bummer.
2308
   *
2309
   * Convert all HTML entities to their applicable characters
2310
   *
2311
   * @link http://php.net/manual/en/function.html-entity-decode.php
2312
   *
2313
   * @param string $str      <p>
2314
   *                         The input string.
2315
   *                         </p>
2316
   * @param int    $flags    [optional] <p>
2317
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2318
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2319
   *                         <table>
2320
   *                         Available <i>flags</i> constants
2321
   *                         <tr valign="top">
2322
   *                         <td>Constant Name</td>
2323
   *                         <td>Description</td>
2324
   *                         </tr>
2325
   *                         <tr valign="top">
2326
   *                         <td><b>ENT_COMPAT</b></td>
2327
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2328
   *                         </tr>
2329
   *                         <tr valign="top">
2330
   *                         <td><b>ENT_QUOTES</b></td>
2331
   *                         <td>Will convert both double and single quotes.</td>
2332
   *                         </tr>
2333
   *                         <tr valign="top">
2334
   *                         <td><b>ENT_NOQUOTES</b></td>
2335
   *                         <td>Will leave both double and single quotes unconverted.</td>
2336
   *                         </tr>
2337
   *                         <tr valign="top">
2338
   *                         <td><b>ENT_HTML401</b></td>
2339
   *                         <td>
2340
   *                         Handle code as HTML 4.01.
2341
   *                         </td>
2342
   *                         </tr>
2343
   *                         <tr valign="top">
2344
   *                         <td><b>ENT_XML1</b></td>
2345
   *                         <td>
2346
   *                         Handle code as XML 1.
2347
   *                         </td>
2348
   *                         </tr>
2349
   *                         <tr valign="top">
2350
   *                         <td><b>ENT_XHTML</b></td>
2351
   *                         <td>
2352
   *                         Handle code as XHTML.
2353
   *                         </td>
2354
   *                         </tr>
2355
   *                         <tr valign="top">
2356
   *                         <td><b>ENT_HTML5</b></td>
2357
   *                         <td>
2358
   *                         Handle code as HTML 5.
2359
   *                         </td>
2360
   *                         </tr>
2361
   *                         </table>
2362
   *                         </p>
2363
   * @param string $encoding [optional] <p>
2364
   *                         Encoding to use.
2365
   *                         </p>
2366
   *
2367
   * @return string the decoded string.
2368
   */
2369 15
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2370
  {
2371 15
    $str = (string)$str;
2372
2373 15
    if (!isset($str[0])) {
2374 3
      return '';
2375
    }
2376
2377 15
    if (strpos($str, '&') === false) {
2378 4
      return $str;
2379
    }
2380
2381 15
    if ($flags === null) {
2382 3
      if (Bootup::is_php('5.4') === true) {
2383 3
        $flags = ENT_COMPAT | ENT_HTML5;
2384 3
      } else {
2385
        $flags = ENT_COMPAT;
2386
      }
2387 3
    }
2388
2389
    do {
2390 15
      $str_compare = $str;
2391
2392 15
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2393
2394
      // decode numeric & UTF16 two byte entities
2395 15
      $str = html_entity_decode(
2396 15
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2397 15
          $flags,
2398
          $encoding
2399 15
      );
2400
2401 15
    } while ($str_compare !== $str);
2402
2403 15
    return $str;
2404
  }
2405
2406
  /**
2407
   * Callback function for preg_replace_callback use.
2408
   *
2409
   * @param  array $matches PREG matches
2410
   *
2411
   * @return string
2412
   */
2413 12
  protected static function entityCallback($matches)
2414
  {
2415 12
    self::checkForSupport();
2416
2417 12
    $return = mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2418
2419 12
    if ($return === "'") {
2420 5
      return '&#x27;';
2421
    }
2422
2423 11
    return $return;
2424
  }
2425
2426
  /**
2427
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2428
   *
2429
   * @link http://php.net/manual/en/function.htmlentities.php
2430
   *
2431
   * @param string $str           <p>
2432
   *                              The input string.
2433
   *                              </p>
2434
   * @param int    $flags         [optional] <p>
2435
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2436
   *                              invalid code unit sequences and the used document type. The default is
2437
   *                              ENT_COMPAT | ENT_HTML401.
2438
   *                              <table>
2439
   *                              Available <i>flags</i> constants
2440
   *                              <tr valign="top">
2441
   *                              <td>Constant Name</td>
2442
   *                              <td>Description</td>
2443
   *                              </tr>
2444
   *                              <tr valign="top">
2445
   *                              <td><b>ENT_COMPAT</b></td>
2446
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2447
   *                              </tr>
2448
   *                              <tr valign="top">
2449
   *                              <td><b>ENT_QUOTES</b></td>
2450
   *                              <td>Will convert both double and single quotes.</td>
2451
   *                              </tr>
2452
   *                              <tr valign="top">
2453
   *                              <td><b>ENT_NOQUOTES</b></td>
2454
   *                              <td>Will leave both double and single quotes unconverted.</td>
2455
   *                              </tr>
2456
   *                              <tr valign="top">
2457
   *                              <td><b>ENT_IGNORE</b></td>
2458
   *                              <td>
2459
   *                              Silently discard invalid code unit sequences instead of returning
2460
   *                              an empty string. Using this flag is discouraged as it
2461
   *                              may have security implications.
2462
   *                              </td>
2463
   *                              </tr>
2464
   *                              <tr valign="top">
2465
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2466
   *                              <td>
2467
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2468
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2469
   *                              </td>
2470
   *                              </tr>
2471
   *                              <tr valign="top">
2472
   *                              <td><b>ENT_DISALLOWED</b></td>
2473
   *                              <td>
2474
   *                              Replace invalid code points for the given document type with a
2475
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2476
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2477
   *                              instance, to ensure the well-formedness of XML documents with
2478
   *                              embedded external content.
2479
   *                              </td>
2480
   *                              </tr>
2481
   *                              <tr valign="top">
2482
   *                              <td><b>ENT_HTML401</b></td>
2483
   *                              <td>
2484
   *                              Handle code as HTML 4.01.
2485
   *                              </td>
2486
   *                              </tr>
2487
   *                              <tr valign="top">
2488
   *                              <td><b>ENT_XML1</b></td>
2489
   *                              <td>
2490
   *                              Handle code as XML 1.
2491
   *                              </td>
2492
   *                              </tr>
2493
   *                              <tr valign="top">
2494
   *                              <td><b>ENT_XHTML</b></td>
2495
   *                              <td>
2496
   *                              Handle code as XHTML.
2497
   *                              </td>
2498
   *                              </tr>
2499
   *                              <tr valign="top">
2500
   *                              <td><b>ENT_HTML5</b></td>
2501
   *                              <td>
2502
   *                              Handle code as HTML 5.
2503
   *                              </td>
2504
   *                              </tr>
2505
   *                              </table>
2506
   *                              </p>
2507
   * @param string $encoding      [optional] <p>
2508
   *                              Like <b>htmlspecialchars</b>,
2509
   *                              <b>htmlentities</b> takes an optional third argument
2510
   *                              <i>encoding</i> which defines encoding used in
2511
   *                              conversion.
2512
   *                              Although this argument is technically optional, you are highly
2513
   *                              encouraged to specify the correct value for your code.
2514
   *                              </p>
2515
   * @param bool   $double_encode [optional] <p>
2516
   *                              When <i>double_encode</i> is turned off PHP will not
2517
   *                              encode existing html entities. The default is to convert everything.
2518
   *                              </p>
2519
   *
2520
   *
2521
   * @return string the encoded string.
2522
   * </p>
2523
   * <p>
2524
   * If the input <i>string</i> contains an invalid code unit
2525
   * sequence within the given <i>encoding</i> an empty string
2526
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2527
   * <b>ENT_SUBSTITUTE</b> flags are set.
2528
   */
2529 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2530
  {
2531 2
    return htmlentities($str, $flags, $encoding, $double_encode);
2532
  }
2533
2534
  /**
2535
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
2536
   *
2537
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2538
   *
2539
   * @param string $str           <p>
2540
   *                              The string being converted.
2541
   *                              </p>
2542
   * @param int    $flags         [optional] <p>
2543
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2544
   *                              invalid code unit sequences and the used document type. The default is
2545
   *                              ENT_COMPAT | ENT_HTML401.
2546
   *                              <table>
2547
   *                              Available <i>flags</i> constants
2548
   *                              <tr valign="top">
2549
   *                              <td>Constant Name</td>
2550
   *                              <td>Description</td>
2551
   *                              </tr>
2552
   *                              <tr valign="top">
2553
   *                              <td><b>ENT_COMPAT</b></td>
2554
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2555
   *                              </tr>
2556
   *                              <tr valign="top">
2557
   *                              <td><b>ENT_QUOTES</b></td>
2558
   *                              <td>Will convert both double and single quotes.</td>
2559
   *                              </tr>
2560
   *                              <tr valign="top">
2561
   *                              <td><b>ENT_NOQUOTES</b></td>
2562
   *                              <td>Will leave both double and single quotes unconverted.</td>
2563
   *                              </tr>
2564
   *                              <tr valign="top">
2565
   *                              <td><b>ENT_IGNORE</b></td>
2566
   *                              <td>
2567
   *                              Silently discard invalid code unit sequences instead of returning
2568
   *                              an empty string. Using this flag is discouraged as it
2569
   *                              may have security implications.
2570
   *                              </td>
2571
   *                              </tr>
2572
   *                              <tr valign="top">
2573
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2574
   *                              <td>
2575
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2576
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2577
   *                              </td>
2578
   *                              </tr>
2579
   *                              <tr valign="top">
2580
   *                              <td><b>ENT_DISALLOWED</b></td>
2581
   *                              <td>
2582
   *                              Replace invalid code points for the given document type with a
2583
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2584
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2585
   *                              instance, to ensure the well-formedness of XML documents with
2586
   *                              embedded external content.
2587
   *                              </td>
2588
   *                              </tr>
2589
   *                              <tr valign="top">
2590
   *                              <td><b>ENT_HTML401</b></td>
2591
   *                              <td>
2592
   *                              Handle code as HTML 4.01.
2593
   *                              </td>
2594
   *                              </tr>
2595
   *                              <tr valign="top">
2596
   *                              <td><b>ENT_XML1</b></td>
2597
   *                              <td>
2598
   *                              Handle code as XML 1.
2599
   *                              </td>
2600
   *                              </tr>
2601
   *                              <tr valign="top">
2602
   *                              <td><b>ENT_XHTML</b></td>
2603
   *                              <td>
2604
   *                              Handle code as XHTML.
2605
   *                              </td>
2606
   *                              </tr>
2607
   *                              <tr valign="top">
2608
   *                              <td><b>ENT_HTML5</b></td>
2609
   *                              <td>
2610
   *                              Handle code as HTML 5.
2611
   *                              </td>
2612
   *                              </tr>
2613
   *                              </table>
2614
   *                              </p>
2615
   * @param string $encoding      [optional] <p>
2616
   *                              Defines encoding used in conversion.
2617
   *                              </p>
2618
   *                              <p>
2619
   *                              For the purposes of this function, the encodings
2620
   *                              ISO-8859-1, ISO-8859-15,
2621
   *                              UTF-8, cp866,
2622
   *                              cp1251, cp1252, and
2623
   *                              KOI8-R are effectively equivalent, provided the
2624
   *                              <i>string</i> itself is valid for the encoding, as
2625
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2626
   *                              the same positions in all of these encodings.
2627
   *                              </p>
2628
   * @param bool   $double_encode [optional] <p>
2629
   *                              When <i>double_encode</i> is turned off PHP will not
2630
   *                              encode existing html entities, the default is to convert everything.
2631
   *                              </p>
2632
   *
2633
   * @return string The converted string.
2634
   * </p>
2635
   * <p>
2636
   * If the input <i>string</i> contains an invalid code unit
2637
   * sequence within the given <i>encoding</i> an empty string
2638
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2639
   * <b>ENT_SUBSTITUTE</b> flags are set.
2640
   */
2641 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2642
  {
2643 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2644
  }
2645
2646
  /**
2647
   * checks whether iconv is available on the server
2648
   *
2649
   * @return   bool True if available, False otherwise
2650
   */
2651 1
  public static function iconv_loaded()
2652
  {
2653 1
    return extension_loaded('iconv') ? true : false;
2654
  }
2655
2656
  /**
2657
   * Converts Integer to hexadecimal U+xxxx code point representation.
2658
   *
2659
   * @param    int    $int The integer to be converted to hexadecimal code point.
2660
   * @param    string $pfix
2661
   *
2662
   * @return   string The code point, or empty string on failure.
2663
   */
2664
  public static function int_to_hex($int, $pfix = 'U+')
2665
  {
2666
    if (ctype_digit((string)$int)) {
2667
      $hex = dechex((int)$int);
2668
2669
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2670
2671
      return $pfix . $hex;
2672
    }
2673
2674
    return '';
2675
  }
2676
2677
  /**
2678
   * checks whether intl is available on the server
2679
   *
2680
   * @return   bool True if available, False otherwise
2681
   */
2682 1
  public static function intl_loaded()
2683
  {
2684 1
    return extension_loaded('intl') ? true : false;
2685
  }
2686
2687
  /**
2688
   * alias for "UTF8::is_ascii()"
2689
   *
2690
   * @param string $str
2691
   *
2692
   * @return boolean
2693
   */
2694 1
  public static function isAscii($str)
2695
  {
2696 1
    return self::is_ascii($str);
2697
  }
2698
2699
  /**
2700
   * alias for "UTF8::is_base64"
2701
   *
2702
   * @param string $str
2703
   *
2704
   * @return bool
2705
   */
2706 1
  public static function isBase64($str)
2707
  {
2708 1
    return self::is_base64($str);
2709
  }
2710
2711
  /**
2712
   * alias for "UTF8::is_bom"
2713
   *
2714
   * @param string $utf8_chr
2715
   *
2716
   * @return boolean
2717
   */
2718
  public static function isBom($utf8_chr)
2719
  {
2720
    return self::is_bom($utf8_chr);
2721
  }
2722
2723
  /**
2724
   * Try to check if a string is a json-string...
2725
   *
2726
   * @param $str
2727
   *
2728
   * @return bool
2729
   *
2730
   * @deprecated
2731
   */
2732
  public static function isJson($str)
2733
  {
2734
    $str = (string)$str;
2735
2736
    if (!isset($str[0])) {
2737
      return false;
2738
    }
2739
2740
    if (
2741
        is_object(json_decode($str))
2742
        &&
2743
        json_last_error() == JSON_ERROR_NONE
2744
    ) {
2745
      return true;
2746
    } else {
2747
      return false;
2748
    }
2749
  }
2750
2751
  /**
2752
   * alias for "UTF8::is_utf8"
2753
   *
2754
   * @param string $str
2755
   *
2756
   * @return bool
2757
   */
2758 16
  public static function isUtf8($str)
2759
  {
2760 16
    return self::is_utf8($str);
2761
  }
2762
2763
  /**
2764
   * Checks if a string is 7 bit ASCII.
2765
   *
2766
   * @param    string $str The string to check.
2767
   *
2768
   * @return   bool <strong>true</strong> if it is ASCII<br />
2769
   *                <strong>false</strong> otherwise
2770
   */
2771 1
  public static function is_ascii($str)
2772
  {
2773 1
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2774
  }
2775
2776
  /**
2777
   * Returns true if the string is base64 encoded, false otherwise.
2778
   *
2779
   * @param string $str
2780
   *
2781
   * @return bool Whether or not $str is base64 encoded
2782
   */
2783 1
  public static function is_base64($str)
2784
  {
2785 1
    $str = (string)$str;
2786
2787 1
    if (!isset($str[0])) {
2788 1
      return false;
2789
    }
2790
2791 1
    if (base64_encode(base64_decode($str, true)) === $str) {
2792 1
      return true;
2793
    } else {
2794 1
      return false;
2795
    }
2796
  }
2797
2798
  /**
2799
   * Check if the input is binary... (is look like a hack)
2800
   *
2801
   * @param string $input
2802
   *
2803
   * @return bool
2804
   */
2805 3
  public static function is_binary($input)
2806
  {
2807
2808 3
    $testLength = strlen($input);
2809
2810
    if (
2811 3
        preg_match('~^[01]+$~', $input)
2812
        ||
2813 3
        substr_count($input, "\x00") > 0
2814 3
        ||
2815 3
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2816 3
    ) {
2817 2
      return true;
2818
    } else {
2819 3
      return false;
2820
    }
2821
  }
2822
2823
  /**
2824
   * Check if the file is binary.
2825
   *
2826
   * @param string $file
2827
   *
2828
   * @return boolean
2829
   */
2830
  public static function is_binary_file($file)
2831
  {
2832
    try {
2833
      $fp = fopen($file, 'r');
2834
      $block = fread($fp, 512);
2835
      fclose($fp);
2836
    } catch (\Exception $e) {
2837
      $block = '';
2838
    }
2839
2840
    return self::is_binary($block);
2841
  }
2842
2843
  /**
2844
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
2845
   *
2846
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2847
   *
2848
   * @param    string $utf8_chr The input string.
2849
   *
2850
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
2851
   */
2852 2
  public static function is_bom($utf8_chr)
2853
  {
2854 2
    return ($utf8_chr === self::bom());
2855
  }
2856
2857
  /**
2858
   * Check if the string is UTF-16.
2859
   *
2860
   * @param string $str
2861
   *
2862
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2863
   */
2864 1 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2865
  {
2866 1
    if (self::is_binary($str)) {
2867 1
      self::checkForSupport();
2868
2869 1
      $maybeUTF16LE = 0;
2870 1
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2871 1
      if ($test !== false && strlen($test) > 1) {
2872 1
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2873 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2874 1
        if ($test3 == $test) {
2875 1
          $strChars = self::count_chars($str);
2876 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2877 1
            if (in_array($test3char, $strChars, true) === true) {
2878 1
              $maybeUTF16LE++;
2879 1
            }
2880 1
          }
2881 1
        }
2882 1
      }
2883
2884 1
      $maybeUTF16BE = 0;
2885 1
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2886 1
      if ($test !== false && strlen($test) > 1) {
2887 1
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2888 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2889 1
        if ($test3 == $test) {
2890 1
          $strChars = self::count_chars($str);
2891 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2892 1
            if (in_array($test3char, $strChars, true) === true) {
2893 1
              $maybeUTF16BE++;
2894 1
            }
2895 1
          }
2896 1
        }
2897 1
      }
2898
2899 1
      if ($maybeUTF16BE != $maybeUTF16LE) {
2900 1
        if ($maybeUTF16LE > $maybeUTF16BE) {
2901 1
          return 1;
2902
        } else {
2903 1
          return 2;
2904
        }
2905
      }
2906
2907 1
    }
2908
2909 1
    return false;
2910
  }
2911
2912
  /**
2913
   * Check if the string is UTF-32.
2914
   *
2915
   * @param string $str
2916
   *
2917
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2918
   */
2919 1 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2920
  {
2921 1
    if (self::is_binary($str)) {
2922 1
      self::checkForSupport();
2923
2924 1
      $maybeUTF32LE = 0;
2925 1
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2926 1
      if ($test !== false && strlen($test) > 1) {
2927 1
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2928 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2929 1
        if ($test3 == $test) {
2930 1
          $strChars = self::count_chars($str);
2931 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2932 1
            if (in_array($test3char, $strChars, true) === true) {
2933
              $maybeUTF32LE++;
2934
            }
2935 1
          }
2936 1
        }
2937 1
      }
2938
2939 1
      $maybeUTF32BE = 0;
2940 1
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2941 1
      if ($test !== false && strlen($test) > 1) {
2942
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2943
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2944
        if ($test3 == $test) {
2945
          $strChars = self::count_chars($str);
2946
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2947
            if (in_array($test3char, $strChars, true) === true) {
2948
              $maybeUTF32BE++;
2949
            }
2950
          }
2951
        }
2952
      }
2953
2954 1
      if ($maybeUTF32BE != $maybeUTF32LE) {
2955
        if ($maybeUTF32LE > $maybeUTF32BE) {
2956
          return 1;
2957
        } else {
2958
          return 2;
2959
        }
2960
      }
2961
2962 1
    }
2963
2964 1
    return false;
2965
  }
2966
2967
  /**
2968
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2969
   *
2970
   * @see    http://hsivonen.iki.fi/php-utf8/
2971
   *
2972
   * @param    string $str The string to be checked.
2973
   *
2974
   * @return   bool
2975
   */
2976 31
  public static function is_utf8($str)
2977
  {
2978 31
    $str = (string)$str;
2979
2980 31
    if (!isset($str[0])) {
2981 3
      return true;
2982
    }
2983
2984 29
    if (self::pcre_utf8_support() !== true) {
2985
2986
      // If even just the first character can be matched, when the /u
2987
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2988
      // invalid, nothing at all will match, even if the string contains
2989
      // some valid sequences
2990
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
2991
2992
    } else {
2993
2994 29
      $mState = 0; // cached expected number of octets after the current octet
2995
      // until the beginning of the next UTF8 character sequence
2996 29
      $mUcs4 = 0; // cached Unicode character
2997 29
      $mBytes = 1; // cached expected number of octets in the current sequence
2998 29
      $len = strlen($str);
2999
3000
      /** @noinspection ForeachInvariantsInspection */
3001 29
      for ($i = 0; $i < $len; $i++) {
3002 29
        $in = ord($str[$i]);
3003 29
        if ($mState == 0) {
3004
          // When mState is zero we expect either a US-ASCII character or a
3005
          // multi-octet sequence.
3006 29
          if (0 == (0x80 & $in)) {
3007
            // US-ASCII, pass straight through.
3008 27
            $mBytes = 1;
3009 29 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3010
            // First octet of 2 octet sequence.
3011 25
            $mUcs4 = $in;
3012 25
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3013 25
            $mState = 1;
3014 25
            $mBytes = 2;
3015 27
          } elseif (0xE0 == (0xF0 & $in)) {
3016
            // First octet of 3 octet sequence.
3017 11
            $mUcs4 = $in;
3018 11
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3019 11
            $mState = 2;
3020 11
            $mBytes = 3;
3021 21 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3022
            // First octet of 4 octet sequence.
3023 5
            $mUcs4 = $in;
3024 5
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3025 5
            $mState = 3;
3026 5
            $mBytes = 4;
3027 11
          } elseif (0xF8 == (0xFC & $in)) {
3028
            /* First octet of 5 octet sequence.
3029
            *
3030
            * This is illegal because the encoded codepoint must be either
3031
            * (a) not the shortest form or
3032
            * (b) outside the Unicode range of 0-0x10FFFF.
3033
            * Rather than trying to resynchronize, we will carry on until the end
3034
            * of the sequence and let the later error handling code catch it.
3035
            */
3036 3
            $mUcs4 = $in;
3037 3
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3038 3
            $mState = 4;
3039 3
            $mBytes = 5;
3040 7 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3041
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3042 3
            $mUcs4 = $in;
3043 3
            $mUcs4 = ($mUcs4 & 1) << 30;
3044 3
            $mState = 5;
3045 3
            $mBytes = 6;
3046 3
          } else {
3047
            /* Current octet is neither in the US-ASCII range nor a legal first
3048
             * octet of a multi-octet sequence.
3049
             */
3050 3
            return false;
3051
          }
3052 29
        } else {
3053
          // When mState is non-zero, we expect a continuation of the multi-octet
3054
          // sequence
3055 27
          if (0x80 == (0xC0 & $in)) {
3056
            // Legal continuation.
3057 25
            $shift = ($mState - 1) * 6;
3058 25
            $tmp = $in;
3059 25
            $tmp = ($tmp & 0x0000003F) << $shift;
3060 25
            $mUcs4 |= $tmp;
3061
            /**
3062
             * End of the multi-octet sequence. mUcs4 now contains the final
3063
             * Unicode code point to be output
3064
             */
3065 25
            if (0 == --$mState) {
3066
              /*
3067
              * Check for illegal sequences and code points.
3068
              */
3069
              // From Unicode 3.1, non-shortest form is illegal
3070
              if (
3071 25
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
3072 25
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
3073 25
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
3074 25
                  (4 < $mBytes) ||
3075
                  // From Unicode 3.2, surrogate characters are illegal.
3076 25
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
3077
                  // Code points outside the Unicode range are illegal.
3078 25
                  ($mUcs4 > 0x10FFFF)
3079 25
              ) {
3080 5
                return false;
3081
              }
3082
              // initialize UTF8 cache
3083 25
              $mState = 0;
3084 25
              $mUcs4 = 0;
3085 25
              $mBytes = 1;
3086 25
            }
3087 25
          } else {
3088
            /**
3089
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3090
             * Incomplete multi-octet sequence.
3091
             */
3092 13
            return false;
3093
          }
3094
        }
3095 29
      }
3096
3097 11
      return true;
3098
    }
3099
  }
3100
3101
  /**
3102
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3103
   * Decodes a JSON string
3104
   *
3105
   * @link http://php.net/manual/en/function.json-decode.php
3106
   *
3107
   * @param string $json    <p>
3108
   *                        The <i>json</i> string being decoded.
3109
   *                        </p>
3110
   *                        <p>
3111
   *                        This function only works with UTF-8 encoded strings.
3112
   *                        </p>
3113
   *                        <p>PHP implements a superset of
3114
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3115
   *                        only supports these values when they are nested inside an array or an object.
3116
   *                        </p>
3117
   * @param bool   $assoc   [optional] <p>
3118
   *                        When <b>TRUE</b>, returned objects will be converted into
3119
   *                        associative arrays.
3120
   *                        </p>
3121
   * @param int    $depth   [optional] <p>
3122
   *                        User specified recursion depth.
3123
   *                        </p>
3124
   * @param int    $options [optional] <p>
3125
   *                        Bitmask of JSON decode options. Currently only
3126
   *                        <b>JSON_BIGINT_AS_STRING</b>
3127
   *                        is supported (default is to cast large integers as floats)
3128
   *                        </p>
3129
   *
3130
   * @return mixed the value encoded in <i>json</i> in appropriate
3131
   * PHP type. Values true, false and
3132
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3133
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3134
   * <i>json</i> cannot be decoded or if the encoded
3135
   * data is deeper than the recursion limit.
3136
   */
3137 2
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3138
  {
3139 2
    $json = self::filter($json);
3140
3141 2
    if (Bootup::is_php('5.4') === true) {
3142 2
      $json = json_decode($json, $assoc, $depth, $options);
3143 2
    } else {
3144
      $json = json_decode($json, $assoc, $depth);
3145
    }
3146
3147 2
    return $json;
3148
  }
3149
3150
  /**
3151
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3152
   * Returns the JSON representation of a value
3153
   *
3154
   * @link http://php.net/manual/en/function.json-encode.php
3155
   *
3156
   * @param mixed $value   <p>
3157
   *                       The <i>value</i> being encoded. Can be any type except
3158
   *                       a resource.
3159
   *                       </p>
3160
   *                       <p>
3161
   *                       All string data must be UTF-8 encoded.
3162
   *                       </p>
3163
   *                       <p>PHP implements a superset of
3164
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3165
   *                       only supports these values when they are nested inside an array or an object.
3166
   *                       </p>
3167
   * @param int   $options [optional] <p>
3168
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3169
   *                       <b>JSON_HEX_TAG</b>,
3170
   *                       <b>JSON_HEX_AMP</b>,
3171
   *                       <b>JSON_HEX_APOS</b>,
3172
   *                       <b>JSON_NUMERIC_CHECK</b>,
3173
   *                       <b>JSON_PRETTY_PRINT</b>,
3174
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3175
   *                       <b>JSON_FORCE_OBJECT</b>,
3176
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3177
   *                       constants is described on
3178
   *                       the JSON constants page.
3179
   *                       </p>
3180
   * @param int   $depth   [optional] <p>
3181
   *                       Set the maximum depth. Must be greater than zero.
3182
   *                       </p>
3183
   *
3184
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3185
   */
3186 1
  public static function json_encode($value, $options = 0, $depth = 512)
3187
  {
3188 1
    $value = self::filter($value);
3189
3190 1
    if (Bootup::is_php('5.5')) {
3191 1
      $json = json_encode($value, $options, $depth);
3192 1
    } else {
3193
      $json = json_encode($value, $options);
3194
    }
3195
3196 1
    return $json;
3197
  }
3198
3199
  /**
3200
   * Makes string's first char lowercase.
3201
   *
3202
   * @param    string $str The input string
3203
   *
3204
   * @return   string The resulting string
3205
   */
3206 6
  public static function lcfirst($str)
3207
  {
3208 6
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3209
  }
3210
3211
  /**
3212
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3213
   *
3214
   * WARNING: This is much slower then "ltrim()" !!!!
3215
   *
3216
   * @param    string $str   The string to be trimmed
3217
   * @param    string $chars Optional characters to be stripped
3218
   *
3219
   * @return   string The string with unwanted characters stripped from the left
3220
   */
3221 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3222
  {
3223 24
    $str = (string)$str;
3224
3225 24
    if (!isset($str[0])) {
3226 2
      return '';
3227
    }
3228
3229 23
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3230
3231 23
    return preg_replace("/^{$chars}+/u", '', $str);
3232
  }
3233
3234
  /**
3235
   * Returns the UTF-8 character with the maximum code point in the given data.
3236
   *
3237
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3238
   *
3239
   * @return   string The character with the highest code point than others.
3240
   */
3241 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3242
  {
3243 1
    if (is_array($arg)) {
3244
      $arg = implode($arg);
3245
    }
3246
3247 1
    return self::chr(max(self::codepoints($arg)));
3248
  }
3249
3250
  /**
3251
   * Calculates and returns the maximum number of bytes taken by any
3252
   * UTF-8 encoded character in the given string.
3253
   *
3254
   * @param    string $str The original Unicode string.
3255
   *
3256
   * @return   int An array of byte lengths of each character.
3257
   */
3258 1
  public static function max_chr_width($str)
3259
  {
3260 1
    $bytes = self::chr_size_list($str);
3261 1
    if (count($bytes) > 0) {
3262 1
      return (int)max($bytes);
3263
    } else {
3264 1
      return 0;
3265
    }
3266
  }
3267
3268
  /**
3269
   * checks whether mbstring is available on the server
3270
   *
3271
   * @return   bool True if available, False otherwise
3272
   */
3273 2
  public static function mbstring_loaded()
3274
  {
3275 2
    $return = extension_loaded('mbstring');
3276
3277 2
    if ($return === true) {
3278 2
      mb_internal_encoding('UTF-8');
3279 2
    }
3280
3281 2
    return $return;
3282
  }
3283
3284
  /**
3285
   * Returns the UTF-8 character with the minimum code point in the given data.
3286
   *
3287
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3288
   *
3289
   * @return   string The character with the lowest code point than others.
3290
   */
3291 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3292
  {
3293 1
    if (is_array($arg)) {
3294
      $arg = implode($arg);
3295
    }
3296
3297 1
    return self::chr(min(self::codepoints($arg)));
3298
  }
3299
3300
  /**
3301
   * Normalize the encoding-name input.
3302
   *
3303
   * @param string $encodingLabel e.g.: ISO, UTF8, WINDOWS-1251 etc.
3304
   *
3305
   * @return string e.g.: ISO-8859-1, UTF-8, ISO-8859-5 etc.
3306
   */
3307 13
  public static function normalizeEncoding($encodingLabel)
3308
  {
3309 13
    $encoding = strtoupper($encodingLabel);
3310
3311 13
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3312
3313
    $equivalences = array(
3314 13
        'ISO88591'    => 'ISO-8859-1',
3315 13
        'ISO8859'     => 'ISO-8859-1',
3316 13
        'ISO'         => 'ISO-8859-1',
3317 13
        'LATIN1'      => 'ISO-8859-1',
3318 13
        'LATIN'       => 'ISO-8859-1',
3319 13
        'UTF16'       => 'UTF-16',
3320 13
        'UTF32'       => 'UTF-32',
3321 13
        'UTF8'        => 'UTF-8',
3322 13
        'UTF'         => 'UTF-8',
3323 13
        'UTF7'        => 'UTF-7',
3324 13
        'WIN1252'     => 'ISO-8859-1',
3325 13
        'WINDOWS1252' => 'ISO-8859-1',
3326 13
        'WINDOWS1251' => 'ISO-8859-5',
3327 13
    );
3328
3329 13
    if (empty($equivalences[$encoding])) {
3330 2
      return $encodingLabel;
3331
    }
3332
3333 13
    return $equivalences[$encoding];
3334
  }
3335
3336
  /**
3337
   * Normalize MS Word special characters.
3338
   *
3339
   * @param string $str The string to be normalized.
3340
   *
3341
   * @return string
3342
   */
3343 2
  public static function normalize_msword($str)
3344
  {
3345 2
    static $utf8MSWordKeys = null;
3346 2
    static $utf8MSWordValues = null;
3347
3348 2
    if ($utf8MSWordKeys === null) {
3349 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3350 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3351 1
    }
3352
3353 2
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3354
  }
3355
3356
  /**
3357
   * Normalize the whitespace.
3358
   *
3359
   * @param string $str                       The string to be normalized.
3360
   * @param bool   $keepNonBreakingSpace      Set to true, to keep non-breaking-spaces.
3361
   * @param bool   $keepBidiUnicodeControls   Set to true, to keep non-printable (for the web) bidirectional text chars.
3362
   *
3363
   * @return string
3364
   */
3365 8
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3366
  {
3367 8
    static $whitespaces = array();
3368 8
    static $bidiUniCodeControls = null;
3369
3370 8
    $cacheKey = (int)$keepNonBreakingSpace;
3371
3372 8
    if (!isset($whitespaces[$cacheKey])) {
3373
3374 2
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3375
3376 2
      if ($keepNonBreakingSpace === true) {
3377
        /** @noinspection OffsetOperationsInspection */
3378 1
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3379 1
      }
3380
3381 2
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3382 2
    }
3383
3384 8
    if ($keepBidiUnicodeControls === false) {
3385 8
      if ($bidiUniCodeControls === null) {
3386 1
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3387 1
      }
3388
3389 8
      $str = str_replace($bidiUniCodeControls, '', $str);
3390 8
    }
3391
3392 8
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3393
  }
3394
3395
  /**
3396
   * Format a number with grouped thousands.
3397
   *
3398
   * @param float  $number
3399
   * @param int    $decimals
3400
   * @param string $dec_point
3401
   * @param string $thousands_sep
3402
   *
3403
   * @return string
3404
   */
3405 1
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3406
  {
3407 1
    if (Bootup::is_php('5.4') === true) {
3408 1
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
3409
        return str_replace(
3410
            array(
3411
                '.',
3412
                ',',
3413
            ),
3414
            array(
3415
                $dec_point,
3416
                $thousands_sep,
3417
            ),
3418
            number_format($number, $decimals, '.', ',')
3419
        );
3420
      }
3421 1
    }
3422
3423 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3424
  }
3425
3426
  /**
3427
   * Calculates Unicode code point of the given UTF-8 encoded character.
3428
   *
3429
   * @param    string $s The character of which to calculate code point.
3430
   *
3431
   * @return   int Unicode code point of the given character,<br />
3432
   *           0 on invalid UTF-8 byte sequence.
3433
   */
3434 15
  public static function ord($s)
3435
  {
3436 15
    if (!$s) {
3437 2
      return 0;
3438
    }
3439
3440 14
    $s = unpack('C*', substr($s, 0, 4));
3441 14
    $a = $s ? $s[1] : 0;
3442
3443 14
    if (0xF0 <= $a && isset($s[4])) {
3444 2
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3445
    }
3446
3447 13
    if (0xE0 <= $a && isset($s[3])) {
3448 7
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3449
    }
3450
3451 12
    if (0xC0 <= $a && isset($s[2])) {
3452 8
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3453
    }
3454
3455 10
    return $a;
3456
  }
3457
3458
  /**
3459
   * Parses the string into variables.
3460
   *
3461
   * WARNING: This differs from parse_str() by returning the results
3462
   *    instead of placing them in the local scope!
3463
   *
3464
   * @link http://php.net/manual/en/function.parse-str.php
3465
   *
3466
   * @param string $str     <p>
3467
   *                        The input string.
3468
   *                        </p>
3469
   * @param array  $result  <p>
3470
   *                        If the second parameter arr is present,
3471
   *                        variables are stored in this variable as array elements instead.
3472
   *                        </p>
3473
   *
3474
   * @return void
3475
   */
3476 1
  public static function parse_str($str, &$result)
3477
  {
3478
    // init
3479 1
    self::checkForSupport();
3480
3481 1
    $str = self::filter($str);
3482
3483 1
    mb_parse_str($str, $result);
3484 1
  }
3485
3486
  /**
3487
   * checks if \u modifier is available that enables Unicode support in PCRE.
3488
   *
3489
   * @return   bool True if support is available, false otherwise
3490
   */
3491 30
  public static function pcre_utf8_support()
3492
  {
3493
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3494 30
    return (bool)@preg_match('//u', '');
3495
  }
3496
3497
  /**
3498
   * Create an array containing a range of UTF-8 characters.
3499
   *
3500
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3501
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3502
   *
3503
   * @return   array
3504
   */
3505 1
  public static function range($var1, $var2)
3506
  {
3507 1
    if (!$var1 || !$var2) {
3508 1
      return array();
3509
    }
3510
3511 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3512
      $start = (int)$var1;
3513 1
    } elseif (ctype_xdigit($var1)) {
3514
      $start = (int)self::hex_to_int($var1);
3515
    } else {
3516 1
      $start = self::ord($var1);
3517
    }
3518
3519 1
    if (!$start) {
3520
      return array();
3521
    }
3522
3523 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3524
      $end = (int)$var2;
3525 1
    } elseif (ctype_xdigit($var2)) {
3526
      $end = (int)self::hex_to_int($var2);
3527
    } else {
3528 1
      $end = self::ord($var2);
3529
    }
3530
3531 1
    if (!$end) {
3532
      return array();
3533
    }
3534
3535 1
    return array_map(
3536
        array(
3537 1
            '\\voku\\helper\\UTF8',
3538 1
            'chr',
3539 1
        ),
3540 1
        range($start, $end)
3541 1
    );
3542
  }
3543
3544
  /**
3545
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3546
   *
3547
   * @param string $str
3548
   *
3549
   * @return string
3550
   */
3551 7
  public static function removeBOM($str = '')
3552
  {
3553
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3554
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3555
    /** @noinspection SubStrUsedAsStrPosInspection */
3556 7 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3557 1
      $str = substr($str, 4);
3558 1
    }
3559
3560
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3561
3562
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3563
    /** @noinspection SubStrUsedAsStrPosInspection */
3564 7 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3565 1
      $str = substr($str, 4);
3566 1
    }
3567
3568
    // UTF-8
3569
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3570
    /** @noinspection SubStrUsedAsStrPosInspection */
3571 7 View Code Duplication
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3572 2
      $str = substr($str, 3);
3573 2
    }
3574
3575
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3576
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3577
    /** @noinspection SubStrUsedAsStrPosInspection */
3578 7 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3579 1
      $str = substr($str, 2);
3580 1
    }
3581
3582
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3583
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3584
    /** @noinspection SubStrUsedAsStrPosInspection */
3585 7 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3586 1
      $str = substr($str, 2);
3587 1
    }
3588
3589 7
    return $str;
3590
  }
3591
3592
  /**
3593
   * Removes duplicate occurrences of a string in another string.
3594
   *
3595
   * @param    string       $str  The base string
3596
   * @param    string|array $what String to search for in the base string
3597
   *
3598
   * @return   string The result string with removed duplicates
3599
   */
3600 1
  public static function remove_duplicates($str, $what = ' ')
3601
  {
3602 1
    if (is_string($what)) {
3603 1
      $what = array($what);
3604 1
    }
3605
3606 1
    if (is_array($what)) {
3607 1
      foreach ($what as $item) {
3608 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3609 1
      }
3610 1
    }
3611
3612 1
    return $str;
3613
  }
3614
3615
  /**
3616
   * Remove Invisible Characters
3617
   *
3618
   * This prevents sandwiching null characters
3619
   * between ascii characters, like Java\0script.
3620
   *
3621
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3622
   *
3623
   * @param  string $str
3624
   * @param  bool   $url_encoded
3625
   *
3626
   * @return  string
3627
   */
3628 36
  public static function remove_invisible_characters($str, $url_encoded = true)
3629
  {
3630
    // init
3631 36
    $non_displayables = array();
3632
3633
    // every control character except newline (dec 10),
3634
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3635 36
    if ($url_encoded) {
3636 36
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3637 36
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3638 36
    }
3639
3640 36
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3641
3642
    do {
3643 36
      $str = preg_replace($non_displayables, '', $str, -1, $count);
3644 36
    } while ($count !== 0);
3645
3646 36
    return $str;
3647
  }
3648
3649
  /**
3650
   * replace diamond question mark (�)
3651
   *
3652
   * @param string $str
3653
   * @param string $unknown
3654
   *
3655
   * @return string
3656
   */
3657 36
  public static function replace_diamond_question_mark($str, $unknown = '?')
3658
  {
3659 36
    return str_replace(
3660
        array(
3661 36
            "\xEF\xBF\xBD",
3662 36
            '�',
3663 36
        ),
3664
        array(
3665 36
            $unknown,
3666 36
            $unknown,
3667 36
        ),
3668
        $str
3669 36
    );
3670
  }
3671
3672
  /**
3673
   * Strip whitespace or other characters from end of a UTF-8 string.
3674
   *
3675
   * WARNING: This is much slower then "rtrim()" !!!!
3676
   *
3677
   * @param    string $str   The string to be trimmed
3678
   * @param    string $chars Optional characters to be stripped
3679
   *
3680
   * @return   string The string with unwanted characters stripped from the right
3681
   */
3682 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3683
  {
3684 23
    $str = (string)$str;
3685
3686 23
    if (!isset($str[0])) {
3687 5
      return '';
3688
    }
3689
3690 19
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3691
3692 19
    return preg_replace("/{$chars}+$/u", '', $str);
3693
  }
3694
3695
  /**
3696
   * rxClass
3697
   *
3698
   * @param string $s
3699
   * @param string $class
3700
   *
3701
   * @return string
3702
   */
3703 40
  protected static function rxClass($s, $class = '')
3704
  {
3705 40
    static $rxClassCache = array();
3706
3707 40
    $cacheKey = $s . $class;
3708
3709 40
    if (isset($rxClassCache[$cacheKey])) {
3710 30
      return $rxClassCache[$cacheKey];
3711
    }
3712
3713 16
    $class = array($class);
3714
3715 16
    foreach (self::str_split($s) as $s) {
3716 15
      if ('-' === $s) {
3717
        $class[0] = '-' . $class[0];
3718 15
      } elseif (!isset($s[2])) {
3719 14
        $class[0] .= preg_quote($s, '/');
3720 15
      } elseif (1 === self::strlen($s)) {
3721 1
        $class[0] .= $s;
3722 1
      } else {
3723
        $class[] = $s;
3724
      }
3725 16
    }
3726
3727 16
    $class[0] = '[' . $class[0] . ']';
3728
3729 16
    if (1 === count($class)) {
3730 16
      $return = $class[0];
3731 16
    } else {
3732
      $return = '(?:' . implode('|', $class) . ')';
3733
    }
3734
3735 16
    $rxClassCache[$cacheKey] = $return;
3736
3737 16
    return $return;
3738
  }
3739
3740
  /**
3741
   * Echo native UTF8-Support libs, e.g. for debugging.
3742
   */
3743
  public static function showSupport()
3744
  {
3745
    foreach (self::$support as $utf8Support) {
3746
      echo $utf8Support . "\n<br>";
3747
    }
3748
  }
3749
3750
  /**
3751
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3752
   *
3753
   * @param    string $chr The Unicode character to be encoded as numbered entity.
3754
   *
3755
   * @return   string The HTML numbered entity.
3756
   */
3757 2
  public static function single_chr_html_encode($chr)
3758
  {
3759 2
    if (!$chr) {
3760 1
      return '';
3761
    }
3762
3763 2
    return '&#' . self::ord($chr) . ';';
3764
  }
3765
3766
  /**
3767
   * Convert a string to an array of Unicode characters.
3768
   *
3769
   * @param    string  $str       The string to split into array.
3770
   * @param    int     $length    Max character length of each array element.
3771
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
3772
   *
3773
   * @return   array An array containing chunks of the string.
3774
   */
3775 24
  public static function split($str, $length = 1, $cleanUtf8 = false)
3776
  {
3777 24
    $str = (string)$str;
3778
3779 24
    if (!isset($str[0])) {
3780 5
      return array();
3781
    }
3782
3783
    // init
3784 23
    self::checkForSupport();
3785 23
    $str = (string)$str;
3786 23
    $ret = array();
3787
3788 23
    if (self::$support['pcre_utf8'] === true) {
3789
3790 23
      if ($cleanUtf8 === true) {
3791
        $str = self::clean($str);
3792
      }
3793
3794 23
      preg_match_all('/./us', $str, $retArray);
3795 23
      if (isset($retArray[0])) {
3796 23
        $ret = $retArray[0];
3797 23
      }
3798 23
      unset($retArray);
3799
3800 23
    } else {
3801
3802
      // fallback
3803
3804
      $len = strlen($str);
3805
3806
      /** @noinspection ForeachInvariantsInspection */
3807
      for ($i = 0; $i < $len; $i++) {
3808
        if (($str[$i] & "\x80") === "\x00") {
3809
          $ret[] = $str[$i];
3810
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3811
          if (($str[$i + 1] & "\xC0") === "\x80") {
3812
            $ret[] = $str[$i] . $str[$i + 1];
3813
3814
            $i++;
3815
          }
3816 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3817
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3818
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3819
3820
            $i += 2;
3821
          }
3822
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3823 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3824
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3825
3826
            $i += 3;
3827
          }
3828
        }
3829
      }
3830
    }
3831
3832 23
    if ($length > 1) {
3833 5
      $ret = array_chunk($ret, $length);
3834
3835 5
      $ret = array_map('implode', $ret);
3836 5
    }
3837
3838 23
    if (isset($ret[0]) && $ret[0] === '') {
3839
      return array();
3840
    }
3841
3842 23
    return $ret;
3843
  }
3844
3845
  /**
3846
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3847
   *
3848
   * @param string $str
3849
   *
3850
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3851
   *                      otherwise it will return false.
3852
   */
3853 2
  public static function str_detect_encoding($str)
3854
  {
3855
    // init
3856 2
    $encoding = '';
3857
3858
    // UTF-8
3859
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3860
    /** @noinspection SubStrUsedAsStrPosInspection */
3861 2
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
3862
      return 'UTF-8';
3863
    }
3864
3865
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3866
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3867
    /** @noinspection SubStrUsedAsStrPosInspection */
3868 2
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
3869
      return 'UTF-16BE';
3870
    }
3871
3872
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3873
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3874
    /** @noinspection SubStrUsedAsStrPosInspection */
3875 2
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
3876
      return 'UTF-16LE';
3877
    }
3878
3879
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3880
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3881
    /** @noinspection SubStrUsedAsStrPosInspection */
3882 2
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
3883
      return 'UTF-32BE';
3884
    }
3885
3886
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3887
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3888
    /** @noinspection SubStrUsedAsStrPosInspection */
3889 2
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
3890
      return 'UTF32LE';
3891
    }
3892
3893 2
    if (!$encoding) {
3894 2
      self::checkForSupport();
3895
3896
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
3897
      $detectOrder = array(
3898 2
          'UTF-8',
3899 2
          'windows-1251',
3900 2
          'windows-1252',
3901 2
          'ISO-8859-1',
3902 2
      );
3903 2
      $encoding = mb_detect_encoding($str, $detectOrder, true);
3904 2
    }
3905
3906 2
    if (self::is_binary($str)) {
3907 1
      if (self::is_utf16($str) == 1) {
3908 1
        return 'UTF-16LE';
3909 1
      } elseif (self::is_utf16($str) == 2) {
3910 1
        return 'UTF-16BE';
3911 1
      } elseif (self::is_utf32($str) == 1) {
3912
        return 'UTF-32LE';
3913 1
      } elseif (self::is_utf32($str) == 2) {
3914
        return 'UTF-32BE';
3915
      }
3916 1
    }
3917
3918 2
    if (!$encoding) {
3919
      $encoding = false;
3920
    }
3921
3922 2
    return $encoding;
3923
  }
3924
3925
  /**
3926
   * str_ireplace
3927
   *
3928
   * @param string $search
3929
   * @param string $replace
3930
   * @param string $subject
3931
   * @param null   $count
3932
   *
3933
   * @return string
3934
   */
3935 13
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3936
  {
3937 13
    $search = (array)$search;
3938
3939
    /** @noinspection AlterInForeachInspection */
3940 13
    foreach ($search as &$s) {
3941 13
      if ('' === $s .= '') {
3942 1
        $s = '/^(?<=.)$/';
3943 1
      } else {
3944 12
        $s = '/' . preg_quote($s, '/') . '/ui';
3945
      }
3946 13
    }
3947
3948 13
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3949 13
    $count = $replace;
3950
3951 13
    return $subject;
3952
  }
3953
3954
  /**
3955
   * Limit the number of characters in a string, but also after the next word.
3956
   *
3957
   * @param  string $str
3958
   * @param  int    $length
3959
   * @param  string $strAddOn
3960
   *
3961
   * @return string
3962
   */
3963 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
3964
  {
3965 1
    if (!isset($str[0])) {
3966
      return '';
3967
    }
3968
3969 1
    $length = (int)$length;
3970
3971 1
    if (self::strlen($str) <= $length) {
3972
      return $str;
3973
    }
3974
3975 1
    if (self::substr($str, $length - 1, 1) === ' ') {
3976 1
      return self::substr($str, 0, $length - 1) . $strAddOn;
3977
    }
3978
3979 1
    $str = self::substr($str, 0, $length);
3980 1
    $array = explode(' ', $str);
3981 1
    array_pop($array);
3982 1
    $new_str = implode(' ', $array);
3983
3984 1
    if ($new_str == '') {
3985
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
3986
    } else {
3987 1
      $str = $new_str . $strAddOn;
3988
    }
3989
3990 1
    return $str;
3991
  }
3992
3993
  /**
3994
   * Pad a UTF-8 string to given length with another string.
3995
   *
3996
   * @param    string $input      The input string
3997
   * @param    int    $pad_length The length of return string
3998
   * @param    string $pad_string String to use for padding the input string
3999
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4000
   *
4001
   * @return   string Returns the padded string
4002
   */
4003 2
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4004
  {
4005 2
    $input_length = self::strlen($input);
4006
4007 2
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4008 2
      $ps_length = self::strlen($pad_string);
4009
4010 2
      $diff = $pad_length - $input_length;
4011
4012
      switch ($pad_type) {
4013 2 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4014 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4015 2
          $pre = self::substr($pre, 0, $diff);
4016 2
          $post = '';
4017 2
          break;
4018
4019 2
        case STR_PAD_BOTH:
4020 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4021 2
          $pre = self::substr($pre, 0, (int)$diff / 2);
4022 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4023 2
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4024 2
          break;
4025
4026 2
        case STR_PAD_RIGHT:
4027 2 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4028 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4029 2
          $post = self::substr($post, 0, $diff);
4030 2
          $pre = '';
4031 2
      }
4032
4033 2
      return $pre . $input . $post;
4034
    }
4035
4036 2
    return $input;
4037
  }
4038
4039
  /**
4040
   * Repeat a string.
4041
   *
4042
   * @param string $input      <p>
4043
   *                           The string to be repeated.
4044
   *                           </p>
4045
   * @param int    $multiplier <p>
4046
   *                           Number of time the input string should be
4047
   *                           repeated.
4048
   *                           </p>
4049
   *                           <p>
4050
   *                           multiplier has to be greater than or equal to 0.
4051
   *                           If the multiplier is set to 0, the function
4052
   *                           will return an empty string.
4053
   *                           </p>
4054
   *
4055
   * @return string the repeated string.
4056
   */
4057 1
  public static function str_repeat($input, $multiplier)
4058
  {
4059 1
    $input = self::filter($input);
4060
4061 1
    return str_repeat($input, $multiplier);
4062
  }
4063
4064
  /**
4065
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4066
   *
4067
   * (PHP 4, PHP 5)<br/>
4068
   * Replace all occurrences of the search string with the replacement string
4069
   *
4070
   * @link http://php.net/manual/en/function.str-replace.php
4071
   *
4072
   * @param mixed $search  <p>
4073
   *                       The value being searched for, otherwise known as the needle.
4074
   *                       An array may be used to designate multiple needles.
4075
   *                       </p>
4076
   * @param mixed $replace <p>
4077
   *                       The replacement value that replaces found search
4078
   *                       values. An array may be used to designate multiple replacements.
4079
   *                       </p>
4080
   * @param mixed $subject <p>
4081
   *                       The string or array being searched and replaced on,
4082
   *                       otherwise known as the haystack.
4083
   *                       </p>
4084
   *                       <p>
4085
   *                       If subject is an array, then the search and
4086
   *                       replace is performed with every entry of
4087
   *                       subject, and the return value is an array as
4088
   *                       well.
4089
   *                       </p>
4090
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4091
   *
4092
   * @return mixed This function returns a string or an array with the replaced values.
4093
   */
4094 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4095
  {
4096 12
    return str_replace($search, $replace, $subject, $count);
4097
  }
4098
4099
  /**
4100
   * Shuffles all the characters in the string.
4101
   *
4102
   * @param    string $str The input string
4103
   *
4104
   * @return   string The shuffled string.
4105
   */
4106
  public static function str_shuffle($str)
4107
  {
4108
    $array = self::split($str);
4109
4110
    shuffle($array);
4111
4112
    return implode('', $array);
4113
  }
4114
4115
  /**
4116
   * Sort all characters according to code points.
4117
   *
4118
   * @param    string $str    A UTF-8 string.
4119
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4120
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4121
   *
4122
   * @return   string String of sorted characters
4123
   */
4124 1
  public static function str_sort($str, $unique = false, $desc = false)
4125
  {
4126 1
    $array = self::codepoints($str);
4127
4128 1
    if ($unique) {
4129 1
      $array = array_flip(array_flip($array));
4130 1
    }
4131
4132 1
    if ($desc) {
4133 1
      arsort($array);
4134 1
    } else {
4135 1
      asort($array);
4136
    }
4137
4138 1
    return self::string($array);
4139
  }
4140
4141
  /**
4142
   * Convert a string to an array.
4143
   *
4144
   * @param string $str
4145
   * @param int    $len
4146
   *
4147
   * @return array
4148
   */
4149 17
  public static function str_split($str, $len = 1)
4150
  {
4151
    // init
4152 17
    self::checkForSupport();
4153
4154 17
    if (1 > $len = (int)$len) {
4155
      $len = func_get_arg(1);
4156
4157
      return str_split($str, $len);
4158
    }
4159
4160 17
    if (self::$support['intl'] === true) {
4161 17
      $a = array();
4162 17
      $p = 0;
4163 17
      $l = strlen($str);
4164 17
      while ($p < $l) {
4165 16
        $a[] = grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4166 16
      }
4167 17
    } else {
4168
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4169
      $a = $a[0];
4170
    }
4171
4172 17
    if (1 == $len) {
4173 17
      return $a;
4174
    }
4175
4176 1
    $arrayOutput = array();
4177 1
    $p = -1;
4178
4179
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4180 1
    foreach ($a as $l => $a) {
4181 1
      if ($l % $len) {
4182 1
        $arrayOutput[$p] .= $a;
4183 1
      } else {
4184 1
        $arrayOutput[++$p] = $a;
4185
      }
4186 1
    }
4187
4188 1
    return $arrayOutput;
4189
  }
4190
4191
  /**
4192
   * Get a binary representation of a specific character.
4193
   *
4194
   * @param   string $str The input character.
4195
   *
4196
   * @return  string
4197
   */
4198 1
  public static function str_to_binary($str)
4199
  {
4200 1
    $str = (string)$str;
4201
4202 1
    if (!isset($str[0])) {
4203
      return '';
4204
    }
4205
4206
    // init
4207 1
    $out = null;
4208 1
    $max = strlen($str);
4209
4210
    /** @noinspection ForeachInvariantsInspection */
4211 1
    for ($i = 0; $i < $max; ++$i) {
4212 1
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4213 1
    }
4214
4215 1
    return $out;
4216
  }
4217
4218
  /**
4219
   * US-ASCII transliterations of Unicode text.
4220
   *
4221
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4222
   * Warning: you should only pass this well formed UTF-8!
4223
   * Be aware it works by making a copy of the input string which it appends transliterated
4224
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4225
   * requiring up to the same amount again as the input string
4226
   *
4227
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4228
   *
4229
   * @author <[email protected]>
4230
   *
4231
   * @param string $str     UTF-8 string to convert
4232
   * @param string $unknown Character use if character unknown. (default is ?)
4233
   *
4234
   * @return string US-ASCII string
4235
   */
4236 8
  public static function str_transliterate($str, $unknown = '?')
4237
  {
4238 8
    static $UTF8_TO_ASCII;
4239
4240 8
    $str = (string)$str;
4241
4242 8
    if (!isset($str[0])) {
4243 2
      return '';
4244
    }
4245
4246 7
    $str = self::clean($str);
4247
4248 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4249 7
    $chars = $ar[0];
4250 7
    foreach ($chars as &$c) {
4251
4252 7
      $ordC0 = ord($c[0]);
4253
4254 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4255 6
        continue;
4256
      }
4257
4258 4
      $ordC1 = ord($c[1]);
4259
4260
      // ASCII - next please
4261 4
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4262 4
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4263 4
      }
4264
4265 4
      if ($ordC0 >= 224) {
4266 3
        $ordC2 = ord($c[2]);
4267
4268 3
        if ($ordC0 <= 239) {
4269 3
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4270 3
        }
4271
4272 3
        if ($ordC0 >= 240) {
4273
          $ordC3 = ord($c[3]);
4274
4275
          if ($ordC0 <= 247) {
4276
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4277
          }
4278
4279
          if ($ordC0 >= 248) {
4280
            $ordC4 = ord($c[4]);
4281
4282 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4283
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4284
            }
4285
4286
            if ($ordC0 >= 252) {
4287
              $ordC5 = ord($c[5]);
4288
4289 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4290
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4291
              }
4292
            }
4293
          }
4294
        }
4295 3
      }
4296
4297 4
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4298
        $c = $unknown;
4299
        continue;
4300
      }
4301
4302 4
      if (!isset($ord)) {
4303
        $c = $unknown;
4304
        continue;
4305
      }
4306
4307 4
      $bank = $ord >> 8;
4308 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4309 2
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4310 2
        if (file_exists($bankfile)) {
4311
          /** @noinspection PhpIncludeInspection */
4312 2
          include $bankfile;
4313 2
        } else {
4314
          $UTF8_TO_ASCII[$bank] = array();
4315
        }
4316 2
      }
4317
4318 4
      $newchar = $ord & 255;
4319 4
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4320 4
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4321 4
      } else {
4322
        $c = $unknown;
4323
      }
4324 7
    }
4325
4326 7
    return implode('', $chars);
4327
  }
4328
4329
  /**
4330
   * Counts number of words in the UTF-8 string.
4331
   *
4332
   * @param string $s The input string.
4333
   * @param int    $format
4334
   * @param string $charlist
4335
   *
4336
   * @return array|float|string The number of words in the string
4337
   */
4338 1
  public static function str_word_count($s, $format = 0, $charlist = '')
4339
  {
4340 1
    $charlist = self::rxClass($charlist, '\pL');
4341 1
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
4342 1
    $charlist = array();
4343 1
    $len = count($s);
4344
4345 1
    if (1 == $format) {
4346
      for ($i = 1; $i < $len; $i += 2) {
4347
        $charlist[] = $s[$i];
4348
      }
4349 1
    } elseif (2 == $format) {
4350
      self::checkForSupport();
4351
4352
      $offset = self::strlen($s[0]);
4353
      for ($i = 1; $i < $len; $i += 2) {
4354
        $charlist[$offset] = $s[$i];
4355
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
4356
      }
4357
    } else {
4358 1
      $charlist = ($len - 1) / 2;
4359
    }
4360
4361 1
    return $charlist;
4362
  }
4363
4364
  /**
4365
   * Case-insensitive string comparison.
4366
   *
4367
   * @param string $str1
4368
   * @param string $str2
4369
   *
4370
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4371
   */
4372 8
  public static function strcasecmp($str1, $str2)
4373
  {
4374 8
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4375
  }
4376
4377
  /**
4378
   * String comparison.
4379
   *
4380
   * @param string $str1
4381
   * @param string $str2
4382
   *
4383
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4384
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4385
   *              <strong>0</strong> if they are equal.
4386
   */
4387 8
  public static function strcmp($str1, $str2)
4388
  {
4389 8
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4390 5
        Normalizer::normalize($str1, Normalizer::NFD),
4391 5
        Normalizer::normalize($str2, Normalizer::NFD)
4392 8
    );
4393
  }
4394
4395
  /**
4396
   * Find length of initial segment not matching mask.
4397
   *
4398
   * @param string $str
4399
   * @param string $charlist
4400
   * @param int    $start
4401
   * @param int    $len
4402
   *
4403
   * @return int|null
4404
   */
4405 5
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
4406
  {
4407 5
    if ('' === $charlist .= '') {
4408
      return null;
4409
    }
4410
4411 5
    if ($start || 2147483647 != $len) {
4412
      $str = (string)self::substr($str, $start, $len);
4413
    } else {
4414 5
      $str = (string)$str;
4415
    }
4416
4417
    /* @var $len array */
4418 5
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
4419 5
      return self::strlen($len[1]);
4420
    } else {
4421
      return self::strlen($str);
4422
    }
4423
  }
4424
4425
  /**
4426
   * Makes a UTF-8 string from code points.
4427
   *
4428
   * @param    array $array Integer or Hexadecimal codepoints
4429
   *
4430
   * @return   string UTF-8 encoded string
4431
   */
4432 2
  public static function string($array)
4433
  {
4434 2
    return implode(
4435 2
        array_map(
4436
            array(
4437 2
                '\\voku\\helper\\UTF8',
4438 2
                'chr',
4439 2
            ),
4440
            $array
4441 2
        )
4442 2
    );
4443
  }
4444
4445
  /**
4446
   * Checks if string starts with "UTF-8 BOM" character.
4447
   *
4448
   * @param    string $str The input string.
4449
   *
4450
   * @return   bool True if the string has BOM at the start, False otherwise.
4451
   */
4452 1
  public static function string_has_bom($str)
4453
  {
4454 1
    return self::is_bom(substr($str, 0, 3));
4455
  }
4456
4457
  /**
4458
   * Strip HTML and PHP tags from a string.
4459
   *
4460
   * @link http://php.net/manual/en/function.strip-tags.php
4461
   *
4462
   * @param string $str            <p>
4463
   *                               The input string.
4464
   *                               </p>
4465
   * @param string $allowable_tags [optional] <p>
4466
   *                               You can use the optional second parameter to specify tags which should
4467
   *                               not be stripped.
4468
   *                               </p>
4469
   *                               <p>
4470
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4471
   *                               can not be changed with allowable_tags.
4472
   *                               </p>
4473
   *
4474
   * @return string the stripped string.
4475
   */
4476 2
  public static function strip_tags($str, $allowable_tags = null)
4477
  {
4478
    //clean broken utf8
4479 2
    $str = self::clean($str);
4480
4481 2
    return strip_tags($str, $allowable_tags);
4482
  }
4483
4484
  /**
4485
   * Finds position of first occurrence of a string within another, case insensitive.
4486
   *
4487
   * @link http://php.net/manual/en/function.mb-stripos.php
4488
   *
4489
   * @param string  $haystack  <p>
4490
   *                           The string from which to get the position of the first occurrence
4491
   *                           of needle
4492
   *                           </p>
4493
   * @param string  $needle    <p>
4494
   *                           The string to find in haystack
4495
   *                           </p>
4496
   * @param int     $offset    [optional] <p>
4497
   *                           The position in haystack
4498
   *                           to start searching
4499
   *                           </p>
4500
   * @param string  $encoding
4501
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4502
   *
4503
   * @return int Return the numeric position of the first occurrence of
4504
   * needle in the haystack
4505
   * string, or false if needle is not found.
4506
   */
4507 8
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4508
  {
4509 8
    $haystack = (string)$haystack;
4510 8
    $needle = (string)$needle;
4511
4512 8
    if (!isset($haystack[0]) || !isset($needle[0])) {
4513 2
      return false;
4514
    }
4515
4516
    // init
4517 7
    self::checkForSupport();
4518
4519 7
    if ($cleanUtf8 === true) {
4520 1
      $haystack = self::clean($haystack);
4521 1
      $needle = self::clean($needle);
4522 1
    }
4523
4524
    // INFO: this is only a fallback for old versions
4525 7
    if ($encoding === true || $encoding === false) {
4526 1
      $encoding = 'UTF-8';
4527 1
    }
4528
4529 7
    return mb_stripos($haystack, $needle, $offset, $encoding);
4530
  }
4531
4532
  /**
4533
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4534
   *
4535
   * @param string $str
4536
   * @param string $needle
4537
   * @param bool   $before_needle
4538
   *
4539
   * @return false|string
4540
   */
4541 7
  public static function stristr($str, $needle, $before_needle = false)
4542
  {
4543 7
    if ('' === $needle .= '') {
4544 2
      return false;
4545
    }
4546
4547
    // init
4548 5
    self::checkForSupport();
4549
4550 5
    return mb_stristr($str, $needle, $before_needle, 'UTF-8');
4551
  }
4552
4553
  /**
4554
   * Get the string length, not the byte-length!
4555
   *
4556
   * @link     http://php.net/manual/en/function.mb-strlen.php
4557
   *
4558
   * @param string  $str       The string being checked for length.
4559
   * @param string  $encoding  Set the charset for e.g. "mb_" function
4560
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4561
   *
4562
   * @return int the number of characters in
4563
   *           string str having character encoding
4564
   *           encoding. A multi-byte character is
4565
   *           counted as 1.
4566
   */
4567 66
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4568
  {
4569 66
    $str = (string)$str;
4570
4571 66
    if (!isset($str[0])) {
4572 4
      return 0;
4573
    }
4574
4575
    // init
4576 65
    self::checkForSupport();
4577
4578
    // INFO: this is only a fallback for old versions
4579 65
    if ($encoding === true || $encoding === false) {
4580
      $encoding = 'UTF-8';
4581
    }
4582
4583 65
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
4584
      $str = self::clean($str);
4585
    }
4586
4587 65
    return mb_strlen($str, $encoding);
4588
  }
4589
4590
  /**
4591
   * Case insensitive string comparisons using a "natural order" algorithm.
4592
   *
4593
   * @param string $str1
4594
   * @param string $str2
4595
   *
4596
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
4597
   *             str1 is greater than str2, and 0 if they are equal.
4598
   */
4599 1
  public static function strnatcasecmp($str1, $str2)
4600
  {
4601 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4602
  }
4603
4604
  /**
4605
   * String comparisons using a "natural order" algorithm.
4606
   *
4607
   * @param string $str1
4608
   * @param string $str2
4609
   *
4610
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
4611
   *             str1 is greater than str2, and 0 if they are equal.
4612
   */
4613 2
  public static function strnatcmp($str1, $str2)
4614
  {
4615 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4616
  }
4617
4618
  /**
4619
   * Case-insensitive string comparison of the first n characters.
4620
   *
4621
   * @param string $str1
4622
   * @param string $str2
4623
   * @param int    $len
4624
   *
4625
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4626
   */
4627
  public static function strncasecmp($str1, $str2, $len)
4628
  {
4629
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4630
  }
4631
4632
  /**
4633
   * Comparison of the first n characters.
4634
   *
4635
   * @param string $str1
4636
   * @param string $str2
4637
   * @param int    $len
4638
   *
4639
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4640
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4641
   *              <strong>0</strong> if they are equal
4642
   */
4643
  public static function strncmp($str1, $str2, $len)
4644
  {
4645
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
4646
  }
4647
4648
  /**
4649
   * Search a string for any of a set of characters.
4650
   *
4651
   * @param string $s
4652
   * @param string $charList
4653
   *
4654
   * @return string|false
4655
   */
4656
  public static function strpbrk($s, $charList)
4657
  {
4658
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
4659
      return substr($s, strpos($s, $m[0]));
4660
    } else {
4661
      return false;
4662
    }
4663
  }
4664
4665
  /**
4666
   * Find position of first occurrence of string in a string.
4667
   *
4668
   * @link http://php.net/manual/en/function.mb-strpos.php
4669
   *
4670
   * @param string  $haystack     <p>
4671
   *                              The string being checked.
4672
   *                              </p>
4673
   * @param string  $needle       <p>
4674
   *                              The position counted from the beginning of haystack.
4675
   *                              </p>
4676
   * @param int     $offset       [optional] <p>
4677
   *                              The search offset. If it is not specified, 0 is used.
4678
   *                              </p>
4679
   * @param string  $encoding
4680
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
4681
   *
4682
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
4683
   *             If needle is not found it returns false.
4684
   */
4685 11
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4686
  {
4687 11
    $haystack = (string)$haystack;
4688 11
    $needle = (string)$needle;
4689
4690 11
    if (!isset($haystack[0]) || !isset($needle[0])) {
4691 2
      return false;
4692
    }
4693
4694
    // init
4695 10
    self::checkForSupport();
4696 10
    $offset = (int)$offset;
4697
4698
    // iconv and mbstring do not support integer $needle
4699
4700 10
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4701
      $needle = self::chr($needle);
4702
    }
4703
4704 10
    if ($cleanUtf8 === true) {
4705
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
4706
      // iconv_strpos is not tolerant to invalid characters
4707
4708 1
      $needle = self::clean((string)$needle);
4709 1
      $haystack = self::clean($haystack);
4710 1
    }
4711
4712 10
    if (self::$support['mbstring'] === true) {
4713
4714
      // INFO: this is only a fallback for old versions
4715 10
      if ($encoding === true || $encoding === false) {
4716 1
        $encoding = 'UTF-8';
4717 1
      }
4718
4719 10
      return mb_strpos($haystack, $needle, $offset, $encoding);
4720
    }
4721
4722
    if (self::$support['iconv'] === true) {
4723
      return grapheme_strpos($haystack, $needle, $offset);
4724
    }
4725
4726
    if ($offset > 0) {
4727
      $haystack = self::substr($haystack, $offset);
4728
    }
4729
4730 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4731
      $left = substr($haystack, 0, $pos);
4732
4733
      // negative offset not supported in PHP strpos(), ignoring
4734
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4735
    }
4736
4737
    return false;
4738
  }
4739
4740
  /**
4741
   * Finds the last occurrence of a character in a string within another.
4742
   *
4743
   * @link http://php.net/manual/en/function.mb-strrchr.php
4744
   *
4745
   * @param string $haystack <p>
4746
   *                         The string from which to get the last occurrence
4747
   *                         of needle
4748
   *                         </p>
4749
   * @param string $needle   <p>
4750
   *                         The string to find in haystack
4751
   *                         </p>
4752
   * @param bool   $part     [optional] <p>
4753
   *                         Determines which portion of haystack
4754
   *                         this function returns.
4755
   *                         If set to true, it returns all of haystack
4756
   *                         from the beginning to the last occurrence of needle.
4757
   *                         If set to false, it returns all of haystack
4758
   *                         from the last occurrence of needle to the end,
4759
   *                         </p>
4760
   * @param string $encoding [optional] <p>
4761
   *                         Character encoding name to use.
4762
   *                         If it is omitted, internal character encoding is used.
4763
   *                         </p>
4764
   *
4765
   * @return string the portion of haystack.
4766
   * or false if needle is not found.
4767
   */
4768 1
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
4769
  {
4770 1
    self::checkForSupport();
4771
4772 1
    return mb_strrchr($haystack, $needle, $part, $encoding);
4773
  }
4774
4775
  /**
4776
   * Reverses characters order in the string.
4777
   *
4778
   * @param    string $str The input string
4779
   *
4780
   * @return   string The string with characters in the reverse sequence
4781
   */
4782 4
  public static function strrev($str)
4783
  {
4784 4
    return implode(array_reverse(self::split($str)));
4785
  }
4786
4787
  /**
4788
   * Finds the last occurrence of a character in a string within another, case insensitive.
4789
   *
4790
   * @link http://php.net/manual/en/function.mb-strrichr.php
4791
   *
4792
   * @param string $haystack <p>
4793
   *                         The string from which to get the last occurrence
4794
   *                         of needle
4795
   *                         </p>
4796
   * @param string $needle   <p>
4797
   *                         The string to find in haystack
4798
   *                         </p>
4799
   * @param bool   $part     [optional] <p>
4800
   *                         Determines which portion of haystack
4801
   *                         this function returns.
4802
   *                         If set to true, it returns all of haystack
4803
   *                         from the beginning to the last occurrence of needle.
4804
   *                         If set to false, it returns all of haystack
4805
   *                         from the last occurrence of needle to the end,
4806
   *                         </p>
4807
   * @param string $encoding [optional] <p>
4808
   *                         Character encoding name to use.
4809
   *                         If it is omitted, internal character encoding is used.
4810
   *                         </p>
4811
   *
4812
   * @return string the portion of haystack.
4813
   * or false if needle is not found.
4814
   */
4815 1
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
4816
  {
4817 1
    self::checkForSupport();
4818
4819 1
    return mb_strrichr($haystack, $needle, $part, $encoding);
4820
  }
4821
4822
  /**
4823
   * Find position of last occurrence of a case-insensitive string.
4824
   *
4825
   * @param    string $haystack The string to look in
4826
   * @param    string $needle   The string to look for
4827
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
4828
   *
4829
   * @return   int The position of offset
4830
   */
4831 1
  public static function strripos($haystack, $needle, $offset = 0)
4832
  {
4833 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
4834
  }
4835
4836
  /**
4837
   * Find position of last occurrence of a string in a string.
4838
   *
4839
   * @link http://php.net/manual/en/function.mb-strrpos.php
4840
   *
4841
   * @param string  $haystack     <p>
4842
   *                              The string being checked, for the last occurrence
4843
   *                              of needle
4844
   *                              </p>
4845
   * @param string  $needle       <p>
4846
   *                              The string to find in haystack.
4847
   *                              </p>
4848
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
4849
   *                              the string. Negative values will stop searching at an arbitrary point
4850
   *                              prior to the end of the string.
4851
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
4852
   *
4853
   * @return int the numeric position of
4854
   * the last occurrence of needle in the
4855
   * haystack string. If
4856
   * needle is not found, it returns false.
4857
   */
4858 10
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
4859
  {
4860 10
    $haystack = (string)$haystack;
4861 10
    $needle = (string)$needle;
4862
4863 10
    if (!isset($haystack[0]) || !isset($needle[0])) {
4864 2
      return false;
4865
    }
4866
4867
    // init
4868 9
    self::checkForSupport();
4869
4870 9
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4871
      $needle = self::chr($needle);
4872
    }
4873
4874 9
    $needle = (string)$needle;
4875 9
    $offset = (int)$offset;
4876
4877 9
    if ($cleanUtf8 === true) {
4878
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
4879
4880 1
      $needle = self::clean($needle);
4881 1
      $haystack = self::clean($haystack);
4882 1
    }
4883
4884 9
    if (self::$support['mbstring'] === true) {
4885 9
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
4886
    }
4887
4888
    if (self::$support['iconv'] === true) {
4889
      return grapheme_strrpos($haystack, $needle, $offset);
4890
    }
4891
4892
    // fallback
4893
4894
    if ($offset > 0) {
4895
      $haystack = self::substr($haystack, $offset);
4896
    } elseif ($offset < 0) {
4897
      $haystack = self::substr($haystack, 0, $offset);
4898
    }
4899
4900 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4901
      $left = substr($haystack, 0, $pos);
4902
4903
      // negative offset not supported in PHP strpos(), ignoring
4904
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4905
    }
4906
4907
    return false;
4908
  }
4909
4910
  /**
4911
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
4912
   * mask.
4913
   *
4914
   * @param string $s
4915
   * @param string $mask
4916
   * @param int    $start
4917
   * @param int    $len
4918
   *
4919
   * @return int|null
4920
   */
4921 6
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
4922
  {
4923 6
    if ($start || 2147483647 != $len) {
4924
      $s = self::substr($s, $start, $len);
4925
    }
4926
4927 6
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
4928
  }
4929
4930
  /**
4931
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
4932
   *
4933
   * @link http://php.net/manual/en/function.grapheme-strstr.php
4934
   *
4935
   * @param string $haystack      <p>
4936
   *                              The input string. Must be valid UTF-8.
4937
   *                              </p>
4938
   * @param string $needle        <p>
4939
   *                              The string to look for. Must be valid UTF-8.
4940
   *                              </p>
4941
   * @param bool   $before_needle [optional] <p>
4942
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
4943
   *                              haystack before the first occurrence of the needle (excluding the needle).
4944
   *                              </p>
4945
   *
4946
   * @return string the portion of string, or FALSE if needle is not found.
4947
   */
4948 1
  public static function strstr($haystack, $needle, $before_needle = false)
4949
  {
4950 1
    self::checkForSupport();
4951
4952 1
    return grapheme_strstr($haystack, $needle, $before_needle);
4953
  }
4954
4955
  /**
4956
   * Unicode transformation for case-less matching.
4957
   *
4958
   * @link http://unicode.org/reports/tr21/tr21-5.html
4959
   *
4960
   * @param string $str
4961
   * @param bool   $full
4962
   *
4963
   * @return string
4964
   */
4965 10
  public static function strtocasefold($str, $full = true)
4966
  {
4967 10
    static $fullCaseFold = null;
4968 10
    static $commonCaseFoldKeys = null;
4969 10
    static $commonCaseFoldValues = null;
4970
4971 10
    if ($commonCaseFoldKeys === null) {
4972 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
4973 1
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
4974 1
    }
4975
4976 10
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
4977
4978 10
    if ($full) {
4979
4980 10
      if ($fullCaseFold === null) {
4981 1
        $fullCaseFold = self::getData('caseFolding_full');
4982 1
      }
4983
4984
      /** @noinspection OffsetOperationsInspection */
4985 10
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
4986 10
    }
4987
4988 10
    $str = self::clean($str);
4989
4990 10
    return self::strtolower($str);
4991
  }
4992
4993
  /**
4994
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
4995
   * Make a string lowercase.
4996
   *
4997
   * @link http://php.net/manual/en/function.mb-strtolower.php
4998
   *
4999
   * @param string $str <p>
5000
   *                    The string being lowercased.
5001
   *                    </p>
5002
   * @param string $encoding
5003
   *
5004
   * @return string str with all alphabetic characters converted to lowercase.
5005
   */
5006 20
  public static function strtolower($str, $encoding = 'UTF-8')
5007
  {
5008 20
    $str = (string)$str;
5009
5010 20
    if (!isset($str[0])) {
5011 5
      return '';
5012
    }
5013
5014
    // init
5015 18
    self::checkForSupport();
5016
5017 18
    return mb_strtolower($str, $encoding);
5018
  }
5019
5020
  /**
5021
   * Generic case sensitive transformation for collation matching.
5022
   *
5023
   * @param string $s
5024
   *
5025
   * @return string
5026
   */
5027 3
  protected static function strtonatfold($s)
5028
  {
5029 3
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
5030
  }
5031
5032
  /**
5033
   * Make a string uppercase.
5034
   *
5035
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5036
   *
5037
   * @param string $str <p>
5038
   *                    The string being uppercased.
5039
   *                    </p>
5040
   * @param string $encoding
5041
   *
5042
   * @return string str with all alphabetic characters converted to uppercase.
5043
   */
5044 16
  public static function strtoupper($str, $encoding = 'UTF-8')
5045
  {
5046 16
    $str = (string)$str;
5047
5048 16
    if (!isset($str[0])) {
5049 4
      return '';
5050
    }
5051
5052
    // init
5053 15
    self::checkForSupport();
5054
5055 15
    if (self::$support['mbstring'] === true) {
5056 15
      return mb_strtoupper($str, $encoding);
5057
    } else {
5058
5059
      // fallback
5060
5061
      static $caseTableKeys = null;
5062
      static $caseTableValues = null;
5063
5064
      if ($caseTableKeys === null) {
5065
        $caseTable = self::case_table();
5066
        $caseTableKeys = array_keys($caseTable);
5067
        $caseTableValues = array_values($caseTable);
5068
      }
5069
5070
      $str = self::clean($str);
5071
5072
      return str_replace($caseTableKeys, $caseTableValues, $str);
5073
    }
5074
  }
5075
5076
  /**
5077
   * Translate characters or replace sub-strings.
5078
   *
5079
   * @param string $s
5080
   * @param string $from
5081
   * @param string $to
5082
   *
5083
   * @return string
5084
   */
5085 1
  public static function strtr($s, $from, $to = INF)
5086
  {
5087 1
    if (INF !== $to) {
5088
      $from = self::str_split($from);
5089
      $to = self::str_split($to);
5090
      $a = count($from);
5091
      $b = count($to);
5092
5093
      if ($a > $b) {
5094
        $from = array_slice($from, 0, $b);
5095
      } elseif ($a < $b) {
5096
        $to = array_slice($to, 0, $a);
5097
      }
5098
5099
      $from = array_combine($from, $to);
5100
    }
5101
5102 1
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5085 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5103
  }
5104
5105
  /**
5106
   * Return the width of a string.
5107
   *
5108
   * @param string $s
5109
   *
5110
   * @return int
5111
   */
5112 1
  public static function strwidth($s)
5113
  {
5114
    // init
5115 1
    self::checkForSupport();
5116
5117 1
    return mb_strwidth($s, 'UTF-8');
5118
  }
5119
5120
  /**
5121
   * Get part of a string.
5122
   *
5123
   * @link http://php.net/manual/en/function.mb-substr.php
5124
   *
5125
   * @param string  $str       <p>
5126
   *                           The string being checked.
5127
   *                           </p>
5128
   * @param int     $start     <p>
5129
   *                           The first position used in str.
5130
   *                           </p>
5131
   * @param int     $length    [optional] <p>
5132
   *                           The maximum length of the returned string.
5133
   *                           </p>
5134
   * @param string  $encoding
5135
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5136
   *
5137
   * @return string mb_substr returns the portion of
5138
   * str specified by the start and length parameters.
5139
   */
5140 39
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5141
  {
5142 39
    $str = (string)$str;
5143
5144 39
    if (!isset($str[0])) {
5145 9
      return '';
5146
    }
5147
5148
    // init
5149 37
    self::checkForSupport();
5150
5151 37
    if ($cleanUtf8 === true) {
5152
      // iconv and mbstring are not tolerant to invalid encoding
5153
      // further, their behaviour is inconsistent with that of PHP's substr
5154
5155 1
      $str = self::clean($str);
5156 1
    }
5157
5158 37
    if ($length === null) {
5159 22
      $length = (int)self::strlen($str);
5160 22
    } else {
5161 33
      $length = (int)$length;
5162
    }
5163
5164 37
    if (self::$support['mbstring'] === true) {
5165
5166
      // INFO: this is only a fallback for old versions
5167 37
      if ($encoding === true || $encoding === false) {
5168 1
        $encoding = 'UTF-8';
5169 1
      }
5170
5171 37
      return mb_substr($str, $start, $length, $encoding);
5172
    }
5173
5174
    if (self::$support['iconv'] === true) {
5175
      return (string)grapheme_substr($str, $start, $length);
5176
    }
5177
5178
    // fallback
5179
5180
    // split to array, and remove invalid characters
5181
    $array = self::split($str);
5182
5183
    // extract relevant part, and join to make sting again
5184
    return implode(array_slice($array, $start, $length));
5185
  }
5186
5187
  /**
5188
   * Binary safe comparison of two strings from an offset, up to length characters.
5189
   *
5190
   * @param string  $main_str           The main string being compared.
5191
   * @param string  $str                The secondary string being compared.
5192
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5193
   *                                    end of the string.
5194
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5195
   *                                    the str compared to the length of main_str less the offset.
5196
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5197
   *
5198
   * @return int
5199
   */
5200 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5201
  {
5202 1
    $main_str = self::substr($main_str, $offset, $length);
5203 1
    $str = self::substr($str, 0, self::strlen($main_str));
5204
5205 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5206
  }
5207
5208
  /**
5209
   * Count the number of sub-string occurrences.
5210
   *
5211
   * @param    string $haystack The string to search in.
5212
   * @param    string $needle   The string to search for.
5213
   * @param    int    $offset   The offset where to start counting.
5214
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
5215
   *
5216
   * @return   int number of occurrences of $needle
5217
   */
5218
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5219
  {
5220
    $offset = (int)$offset;
5221
5222
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5223
      $length = (int)$length;
5224
5225
      $haystack = self::substr($haystack, $offset, $length);
5226
    }
5227
5228
    if ($length === null) {
5229
      return substr_count($haystack, $needle, $offset);
5230
    } else {
5231
      return substr_count($haystack, $needle, $offset, $length);
5232
    }
5233
  }
5234
5235
  /**
5236
   * Replace text within a portion of a string.
5237
   *
5238
   * source: https://gist.github.com/stemar/8287074
5239
   *
5240
   * @param string|array $str
5241
   * @param string|array $replacement
5242
   * @param int          $start
5243
   * @param null|int     $length
5244
   *
5245
   * @return array|string
5246
   */
5247 6
  public static function substr_replace($str, $replacement, $start, $length = null)
5248
  {
5249
5250 6
    if (is_array($str)) {
5251 1
      $num = count($str);
5252
5253
      // $replacement
5254 1
      if (is_array($replacement)) {
5255 1
        $replacement = array_slice($replacement, 0, $num);
5256 1
      } else {
5257 1
        $replacement = array_pad(array($replacement), $num, $replacement);
5258
      }
5259
5260
      // $start
5261 1
      if (is_array($start)) {
5262 1
        $start = array_slice($start, 0, $num);
5263 1
        foreach ($start as &$valueTmp) {
5264 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5265 1
        }
5266 1
        unset($value);
5267 1
      } else {
5268 1
        $start = array_pad(array($start), $num, $start);
5269
      }
5270
5271
      // $length
5272 1
      if (!isset($length)) {
5273 1
        $length = array_fill(0, $num, 0);
5274 1
      } elseif (is_array($length)) {
5275 1
        $length = array_slice($length, 0, $num);
5276 1
        foreach ($length as &$valueTmpV2) {
5277 1
          if (isset($valueTmpV2)) {
5278 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5279 1
          } else {
5280
            $valueTmpV2 = 0;
5281
          }
5282 1
        }
5283 1
        unset($valueTmpV2);
5284 1
      } else {
5285 1
        $length = array_pad(array($length), $num, $length);
5286
      }
5287
5288
      // Recursive call
5289 1
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5290
    } else {
5291 6
      if (is_array($replacement)) {
5292 1
        if (count($replacement) > 0) {
5293 1
          $replacement = $replacement[0];
5294 1
        } else {
5295 1
          $replacement = '';
5296
        }
5297 1
      }
5298
    }
5299
5300 6
    preg_match_all('/./us', (string)$str, $smatches);
5301 6
    preg_match_all('/./us', (string)$replacement, $rmatches);
5302
5303 6
    if ($length === null) {
5304 4
      self::checkForSupport();
5305
5306 4
      $length = mb_strlen($str);
5307 4
    }
5308
5309 6
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5310
5311 6
    return join($smatches[0], null);
5312
  }
5313
5314
  /**
5315
   * Returns a case swapped version of the string.
5316
   *
5317
   * @param string $str
5318
   * @param string $encoding
5319
   *
5320
   * @return string each character's case swapped
5321
   */
5322 1
  public static function swapCase($str, $encoding = 'UTF-8')
5323
  {
5324 1
    $str = (string)$str;
5325
5326 1
    if (!isset($str[0])) {
5327 1
      return '';
5328
    }
5329
5330 1
    $str = self::clean($str);
5331
5332 1
    $strSwappedCase = preg_replace_callback(
5333 1
        '/[\S]/u',
5334
        function ($match) use ($encoding) {
5335 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5336
5337 1
          if ($match[0] == $marchToUpper) {
5338 1
            return UTF8::strtolower($match[0], $encoding);
5339
          } else {
5340 1
            return $marchToUpper;
5341
          }
5342 1
        },
5343
        $str
5344 1
    );
5345
5346 1
    return $strSwappedCase;
5347
  }
5348
5349
  /**
5350
   * alias for "UTF8::to_ascii()"
5351
   *
5352
   * @param string $s The input string e.g. a UTF-8 String
5353
   * @param string $subst_chr
5354
   *
5355
   * @return string
5356
   */
5357 6
  public static function toAscii($s, $subst_chr = '?')
5358
  {
5359 6
    return self::to_ascii($s, $subst_chr);
5360
  }
5361
5362
  /**
5363
   * alias for "UTF8::to_latin1()"
5364
   *
5365
   * @param $str
5366
   *
5367
   * @return string
5368
   */
5369
  public static function toLatin1($str)
5370
  {
5371
    return self::to_latin1($str);
5372
  }
5373
5374
  /**
5375
   * alias for "UTF8::to_utf8"
5376
   *
5377
   * @param string $str
5378
   *
5379
   * @return string
5380
   */
5381
  public static function toUTF8($str)
5382
  {
5383
    return self::to_utf8($str);
5384
  }
5385
5386
  /**
5387
   * convert to ASCII
5388
   *
5389
   * @param string $s The input string e.g. a UTF-8 String
5390
   * @param string $subst_chr
5391
   *
5392
   * @return string
5393
   */
5394 7
  public static function to_ascii($s, $subst_chr = '?')
5395
  {
5396 7
    static $translitExtra = null;
5397
5398 7
    $s = (string)$s;
5399
5400 7
    if (!isset($s[0])) {
5401 2
      return '';
5402
    }
5403
5404 6
    $s = self::clean($s);
5405
5406 6
    if (preg_match("/[\x80-\xFF]/", $s)) {
5407 3
      $s = Normalizer::normalize($s, Normalizer::NFKC);
5408
5409 3
      $glibc = 'glibc' === ICONV_IMPL;
5410
5411 3
      preg_match_all('/./u', $s, $s);
5412
5413
      /** @noinspection AlterInForeachInspection */
5414 3
      foreach ($s[0] as &$c) {
5415
5416 3
        if (!isset($c[1])) {
5417 3
          continue;
5418
        }
5419
5420 3
        if ($glibc) {
5421 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
5422 3
        } else {
5423
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
5424
5425
          if ($t !== false && is_string($t)) {
5426
            if (!isset($t[0])) {
5427
              $t = '?';
5428
            } elseif (isset($t[1])) {
5429
              $t = ltrim($t, '\'`"^~');
5430
            }
5431
          }
5432
        }
5433
5434 3
        if ('?' === $t) {
5435
5436 1
          if ($translitExtra === null) {
5437 1
            $translitExtra = (array)self::getData('translit_extra');
5438 1
          }
5439
5440 1
          if (isset($translitExtra[$c])) {
5441 1
            $t = $translitExtra[$c];
5442 1
          } else {
5443 1
            $t = Normalizer::normalize($c, Normalizer::NFD);
5444
5445 1
            if ($t[0] < "\x80") {
5446
              $t = $t[0];
5447
            } else {
5448 1
              $t = $subst_chr;
5449
            }
5450
          }
5451 1
        }
5452
5453 3
        if ('?' === $t) {
5454 1
          $t = self::str_transliterate($c, $subst_chr);
5455 1
        }
5456
5457 3
        $c = $t;
5458 3
      }
5459
5460 3
      $s = implode('', $s[0]);
5461 3
    }
5462
5463 6
    return $s;
5464
  }
5465
5466
  /**
5467
   * alias for "UTF8::to_win1252()"
5468
   *
5469
   * @param   string $str
5470
   *
5471
   * @return  array|string
5472
   */
5473
  public static function to_iso8859($str)
5474
  {
5475
    return self::to_win1252($str);
5476
  }
5477
5478
  /**
5479
   * alias for "UTF8::to_win1252()"
5480
   *
5481
   * @param string|array $str
5482
   *
5483
   * @return string|array
5484
   */
5485 2
  public static function to_latin1($str)
5486
  {
5487 2
    return self::to_win1252($str);
5488
  }
5489
5490
  /**
5491
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5492
   *
5493
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
5494
   *
5495
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5496
   *
5497
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5498
   *    are followed by any of these:  ("group B")
5499
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5500
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5501
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5502
   * is also a valid unicode character, and will be left unchanged.
5503
   *
5504
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5505
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5506
   *
5507
   * @param string $str Any string or array.
5508
   *
5509
   * @return string The same string, but UTF8 encoded.
5510
   */
5511 20
  public static function to_utf8($str)
5512
  {
5513 20
    if (is_array($str)) {
5514 2
      foreach ($str as $k => $v) {
5515
        /** @noinspection AlterInForeachInspection */
5516 2
        $str[$k] = self::to_utf8($v);
5517 2
      }
5518
5519 2
      return $str;
5520
    }
5521
5522 20
    $str = (string)$str;
5523
5524 20
    if (!isset($str[0])) {
5525 9
      return $str;
5526
    }
5527
5528 20
    $max = self::strlen($str, '8bit');
5529
5530 20
    $buf = '';
5531
    /** @noinspection ForeachInvariantsInspection */
5532 20
    for ($i = 0; $i < $max; $i++) {
5533 20
      $c1 = $str[$i];
5534
5535 20
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5536 20
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5537 20
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5538 20
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5539
5540 20
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5541
5542 18
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
5543 17
            $buf .= $c1 . $c2;
5544 17
            $i++;
5545 17
          } else { // not valid UTF8 - convert it
5546 5
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5547 5
            $cc2 = ($c1 & "\x3f") | "\x80";
5548 5
            $buf .= $cc1 . $cc2;
5549
          }
5550
5551 20 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5552
5553 18
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
5554 14
            $buf .= $c1 . $c2 . $c3;
5555 14
            $i += 2;
5556 14
          } else { // not valid UTF8 - convert it
5557 8
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5558 8
            $cc2 = ($c1 & "\x3f") | "\x80";
5559 8
            $buf .= $cc1 . $cc2;
5560
          }
5561
5562 19
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
5563
5564 7 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5565 1
            $buf .= $c1 . $c2 . $c3 . $c4;
5566 1
            $i += 3;
5567 1
          } else { // not valid UTF8 - convert it
5568 6
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5569 6
            $cc2 = ($c1 & "\x3f") | "\x80";
5570 6
            $buf .= $cc1 . $cc2;
5571
          }
5572
5573 7
        } else { // doesn't look like UTF8, but should be converted
5574 6
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
5575 6
          $cc2 = (($c1 & "\x3f") | "\x80");
5576 6
          $buf .= $cc1 . $cc2;
5577
        }
5578
5579 20
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
5580
5581 2
        $ordC1 = ord($c1);
5582 2
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
5583
          $buf .= self::$win1252ToUtf8[$ordC1];
5584
        } else {
5585 2
          $cc1 = (chr($ordC1 / 64) | "\xc0");
5586 2
          $cc2 = (($c1 & "\x3f") | "\x80");
5587 2
          $buf .= $cc1 . $cc2;
5588
        }
5589
5590 2
      } else { // it doesn't need conversion
5591 18
        $buf .= $c1;
5592
      }
5593 20
    }
5594
5595 20
    self::checkForSupport();
5596
5597
    // decode unicode escape sequences
5598 20
    $buf = preg_replace_callback(
5599 20
        '/\\\\u([0-9a-f]{4})/i',
5600
        function ($match) {
5601 3
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
5602 20
        },
5603
        $buf
5604 20
    );
5605
5606
    // decode UTF-8 codepoints
5607 20
    $buf = preg_replace_callback(
5608 20
        '/&#\d{2,4};/',
5609 20
        function ($match) {
5610 2
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
5611 20
        },
5612
        $buf
5613 20
    );
5614
5615 20
    return $buf;
5616
  }
5617
5618
  /**
5619
   * Convert a string into win1252.
5620
   *
5621
   * @param  string|array $str
5622
   *
5623
   * @return string|array
5624
   */
5625 2
  protected static function to_win1252($str)
5626
  {
5627 2
    if (is_array($str)) {
5628
5629 1
      foreach ($str as $k => $v) {
5630
        /** @noinspection AlterInForeachInspection */
5631 1
        $str[$k] = self::to_win1252($v);
5632 1
      }
5633
5634 1
      return $str;
5635 2
    } elseif (is_string($str)) {
5636 2
      return self::utf8_decode($str);
5637
    } else {
5638
      return $str;
5639
    }
5640
  }
5641
5642
  /**
5643
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
5644
   *
5645
   * INFO: This is slower then "trim()"
5646
   *
5647
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
5648
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
5649
   *
5650
   * @param    string $str   The string to be trimmed
5651
   * @param    string $chars Optional characters to be stripped
5652
   *
5653
   * @return   string The trimmed string
5654
   */
5655 26
  public static function trim($str = '', $chars = INF)
5656
  {
5657 26
    $str = (string)$str;
5658
5659 26
    if (!isset($str[0])) {
5660 5
      return '';
5661
    }
5662
5663
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
5664 22
    if ($chars === INF || !$chars) {
5665 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
5666
    }
5667
5668 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
5669
  }
5670
5671
  /**
5672
   * Makes string's first char uppercase.
5673
   *
5674
   * @param    string $str The input string
5675
   *
5676
   * @return   string The resulting string
5677
   */
5678 14
  public static function ucfirst($str)
5679
  {
5680 14
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
5681
  }
5682
5683
  /**
5684
   * alias for "UTF8::ucfirst"
5685
   *
5686
   * @param $str
5687
   *
5688
   * @return string
5689
   */
5690
  public static function ucword($str)
5691
  {
5692
    return self::ucfirst($str);
5693
  }
5694
5695
  /**
5696
   * Uppercase for all words in the string.
5697
   *
5698
   * @param  string $str
5699
   * @param array   $exceptions
5700
   *
5701
   * @return string
5702
   */
5703 8
  public static function ucwords($str, $exceptions = array())
5704
  {
5705 8
    if (!$str) {
5706 2
      return '';
5707
    }
5708
5709
    // init
5710 7
    $words = explode(' ', $str);
5711 7
    $newwords = array();
5712
5713 7
    if (count($exceptions) > 0) {
5714 1
      $useExceptions = true;
5715 1
    } else {
5716 7
      $useExceptions = false;
5717
    }
5718
5719 7
    foreach ($words as $word) {
5720
      if (
5721 7
          ($useExceptions === false)
5722
          ||
5723
          (
5724
              $useExceptions === true
5725 1
              &&
5726 1
              !in_array($word, $exceptions, true)
5727 1
          )
5728 7
      ) {
5729 7
        $word = self::ucfirst($word);
5730 7
      }
5731 7
      $newwords[] = $word;
5732 7
    }
5733
5734 7
    return self::ucfirst(implode(' ', $newwords));
5735
  }
5736
5737
  /**
5738
   * Multi decode html entity & fix urlencoded-win1252-chars.
5739
   *
5740
   * e.g:
5741
   * 'D&#252;sseldorf'               => 'Düsseldorf'
5742
   * 'D%FCsseldorf'                  => 'Düsseldorf'
5743
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5744
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5745
   * 'Düsseldorf'                   => 'Düsseldorf'
5746
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5747
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5748
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5749
   *
5750
   * @param string $str
5751
   *
5752
   * @return string
5753
   */
5754 1
  public static function urldecode($str)
5755
  {
5756 1
    $str = (string)$str;
5757
5758 1
    if (!isset($str[0])) {
5759 1
      return '';
5760
    }
5761
5762 1
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
5763
5764 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
5765
5766 1
    $str = self::fix_simple_utf8(
5767 1
        rawurldecode(
5768 1
            self::html_entity_decode(
5769 1
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5770
                $flags
5771 1
            )
5772 1
        )
5773 1
    );
5774
5775 1
    return (string)$str;
5776
  }
5777
5778
  /**
5779
   * Return a array with "urlencoded"-win1252 -> UTF-8
5780
   *
5781
   * @return mixed
5782
   */
5783
  protected static function urldecode_fix_win1252_chars()
5784
  {
5785
    static $array = array(
5786
        '%20' => ' ',
5787
        '%21' => '!',
5788
        '%22' => '"',
5789
        '%23' => '#',
5790
        '%24' => '$',
5791
        '%25' => '%',
5792
        '%26' => '&',
5793
        '%27' => "'",
5794
        '%28' => '(',
5795
        '%29' => ')',
5796
        '%2A' => '*',
5797
        '%2B' => '+',
5798
        '%2C' => ',',
5799
        '%2D' => '-',
5800
        '%2E' => '.',
5801
        '%2F' => '/',
5802
        '%30' => '0',
5803
        '%31' => '1',
5804
        '%32' => '2',
5805
        '%33' => '3',
5806
        '%34' => '4',
5807
        '%35' => '5',
5808
        '%36' => '6',
5809
        '%37' => '7',
5810
        '%38' => '8',
5811
        '%39' => '9',
5812
        '%3A' => ':',
5813
        '%3B' => ';',
5814
        '%3C' => '<',
5815
        '%3D' => '=',
5816
        '%3E' => '>',
5817
        '%3F' => '?',
5818
        '%40' => '@',
5819
        '%41' => 'A',
5820
        '%42' => 'B',
5821
        '%43' => 'C',
5822
        '%44' => 'D',
5823
        '%45' => 'E',
5824
        '%46' => 'F',
5825
        '%47' => 'G',
5826
        '%48' => 'H',
5827
        '%49' => 'I',
5828
        '%4A' => 'J',
5829
        '%4B' => 'K',
5830
        '%4C' => 'L',
5831
        '%4D' => 'M',
5832
        '%4E' => 'N',
5833
        '%4F' => 'O',
5834
        '%50' => 'P',
5835
        '%51' => 'Q',
5836
        '%52' => 'R',
5837
        '%53' => 'S',
5838
        '%54' => 'T',
5839
        '%55' => 'U',
5840
        '%56' => 'V',
5841
        '%57' => 'W',
5842
        '%58' => 'X',
5843
        '%59' => 'Y',
5844
        '%5A' => 'Z',
5845
        '%5B' => '[',
5846
        '%5C' => '\\',
5847
        '%5D' => ']',
5848
        '%5E' => '^',
5849
        '%5F' => '_',
5850
        '%60' => '`',
5851
        '%61' => 'a',
5852
        '%62' => 'b',
5853
        '%63' => 'c',
5854
        '%64' => 'd',
5855
        '%65' => 'e',
5856
        '%66' => 'f',
5857
        '%67' => 'g',
5858
        '%68' => 'h',
5859
        '%69' => 'i',
5860
        '%6A' => 'j',
5861
        '%6B' => 'k',
5862
        '%6C' => 'l',
5863
        '%6D' => 'm',
5864
        '%6E' => 'n',
5865
        '%6F' => 'o',
5866
        '%70' => 'p',
5867
        '%71' => 'q',
5868
        '%72' => 'r',
5869
        '%73' => 's',
5870
        '%74' => 't',
5871
        '%75' => 'u',
5872
        '%76' => 'v',
5873
        '%77' => 'w',
5874
        '%78' => 'x',
5875
        '%79' => 'y',
5876
        '%7A' => 'z',
5877
        '%7B' => '{',
5878
        '%7C' => '|',
5879
        '%7D' => '}',
5880
        '%7E' => '~',
5881
        '%7F' => '',
5882
        '%80' => '`',
5883
        '%81' => '',
5884
        '%82' => '‚',
5885
        '%83' => 'ƒ',
5886
        '%84' => '„',
5887
        '%85' => '…',
5888
        '%86' => '†',
5889
        '%87' => '‡',
5890
        '%88' => 'ˆ',
5891
        '%89' => '‰',
5892
        '%8A' => 'Š',
5893
        '%8B' => '‹',
5894
        '%8C' => 'Œ',
5895
        '%8D' => '',
5896
        '%8E' => 'Ž',
5897
        '%8F' => '',
5898
        '%90' => '',
5899
        '%91' => '‘',
5900
        '%92' => '’',
5901
        '%93' => '“',
5902
        '%94' => '”',
5903
        '%95' => '•',
5904
        '%96' => '–',
5905
        '%97' => '—',
5906
        '%98' => '˜',
5907
        '%99' => '™',
5908
        '%9A' => 'š',
5909
        '%9B' => '›',
5910
        '%9C' => 'œ',
5911
        '%9D' => '',
5912
        '%9E' => 'ž',
5913
        '%9F' => 'Ÿ',
5914
        '%A0' => '',
5915
        '%A1' => '¡',
5916
        '%A2' => '¢',
5917
        '%A3' => '£',
5918
        '%A4' => '¤',
5919
        '%A5' => '¥',
5920
        '%A6' => '¦',
5921
        '%A7' => '§',
5922
        '%A8' => '¨',
5923
        '%A9' => '©',
5924
        '%AA' => 'ª',
5925
        '%AB' => '«',
5926
        '%AC' => '¬',
5927
        '%AD' => '',
5928
        '%AE' => '®',
5929
        '%AF' => '¯',
5930
        '%B0' => '°',
5931
        '%B1' => '±',
5932
        '%B2' => '²',
5933
        '%B3' => '³',
5934
        '%B4' => '´',
5935
        '%B5' => 'µ',
5936
        '%B6' => '¶',
5937
        '%B7' => '·',
5938
        '%B8' => '¸',
5939
        '%B9' => '¹',
5940
        '%BA' => 'º',
5941
        '%BB' => '»',
5942
        '%BC' => '¼',
5943
        '%BD' => '½',
5944
        '%BE' => '¾',
5945
        '%BF' => '¿',
5946
        '%C0' => 'À',
5947
        '%C1' => 'Á',
5948
        '%C2' => 'Â',
5949
        '%C3' => 'Ã',
5950
        '%C4' => 'Ä',
5951
        '%C5' => 'Å',
5952
        '%C6' => 'Æ',
5953
        '%C7' => 'Ç',
5954
        '%C8' => 'È',
5955
        '%C9' => 'É',
5956
        '%CA' => 'Ê',
5957
        '%CB' => 'Ë',
5958
        '%CC' => 'Ì',
5959
        '%CD' => 'Í',
5960
        '%CE' => 'Î',
5961
        '%CF' => 'Ï',
5962
        '%D0' => 'Ð',
5963
        '%D1' => 'Ñ',
5964
        '%D2' => 'Ò',
5965
        '%D3' => 'Ó',
5966
        '%D4' => 'Ô',
5967
        '%D5' => 'Õ',
5968
        '%D6' => 'Ö',
5969
        '%D7' => '×',
5970
        '%D8' => 'Ø',
5971
        '%D9' => 'Ù',
5972
        '%DA' => 'Ú',
5973
        '%DB' => 'Û',
5974
        '%DC' => 'Ü',
5975
        '%DD' => 'Ý',
5976
        '%DE' => 'Þ',
5977
        '%DF' => 'ß',
5978
        '%E0' => 'à',
5979
        '%E1' => 'á',
5980
        '%E2' => 'â',
5981
        '%E3' => 'ã',
5982
        '%E4' => 'ä',
5983
        '%E5' => 'å',
5984
        '%E6' => 'æ',
5985
        '%E7' => 'ç',
5986
        '%E8' => 'è',
5987
        '%E9' => 'é',
5988
        '%EA' => 'ê',
5989
        '%EB' => 'ë',
5990
        '%EC' => 'ì',
5991
        '%ED' => 'í',
5992
        '%EE' => 'î',
5993
        '%EF' => 'ï',
5994
        '%F0' => 'ð',
5995
        '%F1' => 'ñ',
5996
        '%F2' => 'ò',
5997
        '%F3' => 'ó',
5998
        '%F4' => 'ô',
5999
        '%F5' => 'õ',
6000
        '%F6' => 'ö',
6001
        '%F7' => '÷',
6002
        '%F8' => 'ø',
6003
        '%F9' => 'ù',
6004
        '%FA' => 'ú',
6005
        '%FB' => 'û',
6006
        '%FC' => 'ü',
6007
        '%FD' => 'ý',
6008
        '%FE' => 'þ',
6009
        '%FF' => 'ÿ',
6010
    );
6011
6012
    return $array;
6013
  }
6014
6015
  /**
6016
   * Decodes an UTF-8 string to ISO-8859-1.
6017
   *
6018
   * @param string $str
6019
   *
6020
   * @return string
6021
   */
6022 6
  public static function utf8_decode($str)
6023
  {
6024 6
    static $utf8ToWin1252Keys = null;
6025 6
    static $utf8ToWin1252Values = null;
6026
6027 6
    $str = (string)$str;
6028
6029 6
    if (!isset($str[0])) {
6030 5
      return '';
6031
    }
6032
6033
    // init
6034 6
    self::checkForSupport();
6035
6036 6
    $str = self::to_utf8($str);
6037
6038 6
    if ($utf8ToWin1252Keys === null) {
6039 1
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6040 1
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6041 1
    }
6042
6043 6
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6044
  }
6045
6046
  /**
6047
   * Encodes an ISO-8859-1 string to UTF-8.
6048
   *
6049
   * @param string $str
6050
   *
6051
   * @return string
6052
   */
6053 6
  public static function utf8_encode($str)
6054
  {
6055 6
    $str = utf8_encode($str);
6056
6057 6
    if (false === strpos($str, "\xC2")) {
6058 6
      return $str;
6059
    } else {
6060
6061 5
      static $cp1252ToUtf8Keys = null;
6062 5
      static $cp1252ToUtf8Values = null;
6063
6064 5
      if ($cp1252ToUtf8Keys === null) {
6065 1
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6066 1
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6067 1
      }
6068
6069 5
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6070
    }
6071
  }
6072
6073
  /**
6074
   * fix -> utf8-win1252 chars
6075
   *
6076
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
6077
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6078
   * See: http://en.wikipedia.org/wiki/Windows-1252
6079
   *
6080
   * @deprecated use "UTF8::fix_simple_utf8()"
6081
   *
6082
   * @param   string $str
6083
   *
6084
   * @return  string
6085
   */
6086
  public static function utf8_fix_win1252_chars($str)
6087
  {
6088
    return self::fix_simple_utf8($str);
6089
  }
6090
6091
  /**
6092
   * Returns an array with all utf8 whitespace characters.
6093
   *
6094
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6095
   *
6096
   * @author: Derek E. [email protected]
6097
   *
6098
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6099
   *         as defined in above URL
6100
   */
6101 1
  public static function whitespace_table()
6102
  {
6103 1
    return self::$whitespaceTable;
6104
  }
6105
6106
  /**
6107
   * Limit the number of words in a string.
6108
   *
6109
   * @param  string $str
6110
   * @param  int    $words
6111
   * @param  string $strAddOn
6112
   *
6113
   * @return string
6114
   */
6115 1
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6116
  {
6117 1
    if (!isset($str[0])) {
6118
      return '';
6119
    }
6120
6121 1
    $words = (int)$words;
6122
6123 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6124
6125
    if (
6126 1
        !isset($matches[0])
6127 1
        ||
6128 1
        self::strlen($str) === self::strlen($matches[0])
6129 1
    ) {
6130 1
      return $str;
6131
    }
6132
6133 1
    return self::rtrim($matches[0]) . $strAddOn;
6134
  }
6135
6136
  /**
6137
   * Wraps a string to a given number of characters.
6138
   *
6139
   * @param string $str
6140
   * @param int    $width
6141
   * @param string $break
6142
   * @param bool   $cut
6143
   *
6144
   * @return false|string Returns the given string wrapped at the specified length.
6145
   */
6146 4
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6147
  {
6148 4
    if (false === wordwrap('-', $width, $break, $cut)) {
6149
      return false;
6150
    }
6151
6152 4
    if (is_string($break)) {
6153 4
      $break = (string)$break;
6154 4
    }
6155
6156 4
    $w = '';
6157 4
    $str = explode($break, $str);
6158 4
    $iLen = count($str);
6159 4
    $chars = array();
6160
6161 4
    if (1 === $iLen && '' === $str[0]) {
6162
      return '';
6163
    }
6164
6165
    /** @noinspection ForeachInvariantsInspection */
6166 4
    for ($i = 0; $i < $iLen; ++$i) {
6167
6168 4
      if ($i) {
6169
        $chars[] = $break;
6170
        $w .= '#';
6171
      }
6172
6173 4
      $c = $str[$i];
6174 4
      unset($str[$i]);
6175
6176 4
      foreach (self::split($c) as $c) {
6177 4
        $chars[] = $c;
6178 4
        $w .= ' ' === $c ? ' ' : '?';
6179 4
      }
6180 4
    }
6181
6182 4
    $str = '';
6183 4
    $j = 0;
6184 4
    $b = $i = -1;
6185 4
    $w = wordwrap($w, $width, '#', $cut);
6186
6187 4
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6188 3
      for (++$i; $i < $b; ++$i) {
6189 3
        $str .= $chars[$j];
6190 3
        unset($chars[$j++]);
6191 3
      }
6192
6193 3
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6194
        unset($chars[$j++]);
6195
      }
6196
6197 3
      $str .= $break;
6198 3
    }
6199
6200 4
    return $str . implode('', $chars);
6201
  }
6202
6203
  /**
6204
   * Returns an array of Unicode White Space characters.
6205
   *
6206
   * @return   array An array with numeric code point as key and White Space Character as value.
6207
   */
6208
  public static function ws()
6209
  {
6210
    return self::$whitespace;
6211
  }
6212
6213
}
6214