Completed
Push — master ( 741bee...ec785e )
by Lars
13:24
created

UTF8::file_get_contents()   C

Complexity

Conditions 8
Paths 16

Size

Total Lines 47
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 16
CRAP Score 8.013

Importance

Changes 4
Bugs 2 Features 0
Metric Value
c 4
b 2
f 0
dl 0
loc 47
ccs 16
cts 17
cp 0.9412
rs 5.7377
cc 8
eloc 25
nc 16
nop 7
crap 8.013
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Intl\Normalizer\Normalizer;
7
use Symfony\Polyfill\Xml\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
86
   *
87
   * @var array
88
   */
89
  protected static $whitespace = array(
90
      // NUL Byte
91
      0     => "\x0",
92
      // Tab
93
      9     => "\x9",
94
      // New Line
95
      10    => "\xa",
96
      // Vertical Tab
97
      11    => "\xb",
98
      // Carriage Return
99
      13    => "\xd",
100
      // Ordinary Space
101
      32    => "\x20",
102
      // NO-BREAK SPACE
103
      160   => "\xc2\xa0",
104
      // OGHAM SPACE MARK
105
      5760  => "\xe1\x9a\x80",
106
      // MONGOLIAN VOWEL SEPARATOR
107
      6158  => "\xe1\xa0\x8e",
108
      // EN QUAD
109
      8192  => "\xe2\x80\x80",
110
      // EM QUAD
111
      8193  => "\xe2\x80\x81",
112
      // EN SPACE
113
      8194  => "\xe2\x80\x82",
114
      // EM SPACE
115
      8195  => "\xe2\x80\x83",
116
      // THREE-PER-EM SPACE
117
      8196  => "\xe2\x80\x84",
118
      // FOUR-PER-EM SPACE
119
      8197  => "\xe2\x80\x85",
120
      // SIX-PER-EM SPACE
121
      8198  => "\xe2\x80\x86",
122
      // FIGURE SPACE
123
      8199  => "\xe2\x80\x87",
124
      // PUNCTUATION SPACE
125
      8200  => "\xe2\x80\x88",
126
      // THIN SPACE
127
      8201  => "\xe2\x80\x89",
128
      //HAIR SPACE
129
      8202  => "\xe2\x80\x8a",
130
      // LINE SEPARATOR
131
      8232  => "\xe2\x80\xa8",
132
      // PARAGRAPH SEPARATOR
133
      8233  => "\xe2\x80\xa9",
134
      // NARROW NO-BREAK SPACE
135
      8239  => "\xe2\x80\xaf",
136
      // MEDIUM MATHEMATICAL SPACE
137
      8287  => "\xe2\x81\x9f",
138
      // IDEOGRAPHIC SPACE
139
      12288 => "\xe3\x80\x80",
140
  );
141
142
  /**
143
   * @var array
144
   */
145
  protected static $whitespaceTable = array(
146
      'SPACE'                     => "\x20",
147
      'NO-BREAK SPACE'            => "\xc2\xa0",
148
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
149
      'EN QUAD'                   => "\xe2\x80\x80",
150
      'EM QUAD'                   => "\xe2\x80\x81",
151
      'EN SPACE'                  => "\xe2\x80\x82",
152
      'EM SPACE'                  => "\xe2\x80\x83",
153
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
154
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
155
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
156
      'FIGURE SPACE'              => "\xe2\x80\x87",
157
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
158
      'THIN SPACE'                => "\xe2\x80\x89",
159
      'HAIR SPACE'                => "\xe2\x80\x8a",
160
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
161
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
162
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
163
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
164
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
165
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
166
  );
167
168
  /**
169
   * bidirectional text chars
170
   *
171
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
172
   *
173
   * @var array
174
   */
175
  protected static $bidiUniCodeControlsTable = array(
176
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
177
    8234 => "\xE2\x80\xAA",
178
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
179
    8235 => "\xE2\x80\xAB",
180
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
181
    8236 => "\xE2\x80\xAC",
182
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
183
    8237 => "\xE2\x80\xAD",
184
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
185
    8238 => "\xE2\x80\xAE",
186
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
187
    8294 => "\xE2\x81\xA6",
188
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
189
    8295 => "\xE2\x81\xA7",
190
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
191
    8296 => "\xE2\x81\xA8",
192
    // POP DIRECTIONAL ISOLATE
193
    8297 => "\xE2\x81\xA9",
194
  );
195
196
  /**
197
   * @var array
198
   */
199
  protected static $commonCaseFold = array(
200
      'ſ'            => 's',
201
      "\xCD\x85"     => 'ι',
202
      'ς'            => 'σ',
203
      "\xCF\x90"     => 'β',
204
      "\xCF\x91"     => 'θ',
205
      "\xCF\x95"     => 'φ',
206
      "\xCF\x96"     => 'π',
207
      "\xCF\xB0"     => 'κ',
208
      "\xCF\xB1"     => 'ρ',
209
      "\xCF\xB5"     => 'ε',
210
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
211
      "\xE1\xBE\xBE" => 'ι',
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  protected static $brokenUtf8ToUtf8 = array(
218
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
219
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
220
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
221
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
222
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
223
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
224
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
225
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
226
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
227
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
228
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
229
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
230
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
231
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
232
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
233
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
234
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
235
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
236
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
237
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
238
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
239
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
240
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
241
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
242
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
243
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
244
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
245
      'ü'       => 'ü',
246
      'ä'       => 'ä',
247
      'ö'       => 'ö',
248
      'Ö'       => 'Ö',
249
      'ß'       => 'ß',
250
      'Ã '       => 'à',
251
      'á'       => 'á',
252
      'â'       => 'â',
253
      'ã'       => 'ã',
254
      'ù'       => 'ù',
255
      'ú'       => 'ú',
256
      'û'       => 'û',
257
      'Ù'       => 'Ù',
258
      'Ú'       => 'Ú',
259
      'Û'       => 'Û',
260
      'Ü'       => 'Ü',
261
      'ò'       => 'ò',
262
      'ó'       => 'ó',
263
      'ô'       => 'ô',
264
      'è'       => 'è',
265
      'é'       => 'é',
266
      'ê'       => 'ê',
267
      'ë'       => 'ë',
268
      'À'       => 'À',
269
      'Á'       => 'Á',
270
      'Â'       => 'Â',
271
      'Ã'       => 'Ã',
272
      'Ä'       => 'Ä',
273
      'Ã…'       => 'Å',
274
      'Ç'       => 'Ç',
275
      'È'       => 'È',
276
      'É'       => 'É',
277
      'Ê'       => 'Ê',
278
      'Ë'       => 'Ë',
279
      'ÃŒ'       => 'Ì',
280
      'Í'       => 'Í',
281
      'ÃŽ'       => 'Î',
282
      'Ï'       => 'Ï',
283
      'Ñ'       => 'Ñ',
284
      'Ã’'       => 'Ò',
285
      'Ó'       => 'Ó',
286
      'Ô'       => 'Ô',
287
      'Õ'       => 'Õ',
288
      'Ø'       => 'Ø',
289
      'Ã¥'       => 'å',
290
      'æ'       => 'æ',
291
      'ç'       => 'ç',
292
      'ì'       => 'ì',
293
      'í'       => 'í',
294
      'î'       => 'î',
295
      'ï'       => 'ï',
296
      'ð'       => 'ð',
297
      'ñ'       => 'ñ',
298
      'õ'       => 'õ',
299
      'ø'       => 'ø',
300
      'ý'       => 'ý',
301
      'ÿ'       => 'ÿ',
302
      '€'      => '€',
303
  );
304
305
  /**
306
   * @var array
307
   */
308
  protected static $utf8ToWin1252 = array(
309
      "\xe2\x82\xac" => "\x80", // EURO SIGN
310
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
311
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
312
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
313
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
314
      "\xe2\x80\xa0" => "\x86", // DAGGER
315
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
316
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
317
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
318
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
319
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
320
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
321
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
322
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
323
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
324
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
325
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
326
      "\xe2\x80\xa2" => "\x95", // BULLET
327
      "\xe2\x80\x93" => "\x96", // EN DASH
328
      "\xe2\x80\x94" => "\x97", // EM DASH
329
      "\xcb\x9c"     => "\x98", // SMALL TILDE
330
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
331
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
332
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
333
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
334
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
335 1
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
336
  );
337 1
338 1
  /**
339
   * @var array
340
   */
341
  protected static $utf8MSWord = array(
342
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
343 151
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
344
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
345 151
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
346
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
347 1
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
348 1
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
349 1
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
350 1
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
351
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
352 1
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
353 1
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
354 1
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
355 1
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
356 151
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
357
  );
358
359
  /**
360
   * @var array
361
   */
362
  private static $support = array();
363 2
364
  /**
365 2
   * __construct()
366
   */
367 2
  public function __construct()
368 2
  {
369 2
    self::checkForSupport();
370
  }
371 2
372
  /**
373
   * Returns a single UTF-8 character from string.
374
   *
375
   * @param    string $str A UTF-8 string.
376
   * @param    int    $pos The position of character to return.
377
   *
378
   * @return   string Single Multi-Byte character.
379 1
   */
380
  public static function access($str, $pos)
381 1
  {
382
    // Return the character at the specified position: $str[1] like functionality.
383
384
    return self::substr($str, $pos, 1);
385
  }
386
387
  /**
388
   * Prepends BOM character to the string and returns the whole string.
389 1
   *
390
   * INFO: If BOM already existed there, the Input string is returned.
391 1
   *
392
   * @param    string $str The input string
393
   *
394
   * @return   string The output string that contains BOM
395
   */
396
  public static function add_bom_to_string($str)
397
  {
398
    if (!self::is_bom(substr($str, 0, 3))) {
399 30
      $str = self::bom() . $str;
400
    }
401
402 30
    return $str;
403
  }
404
405
  /**
406
   * Returns the Byte Order Mark Character.
407
   *
408
   * @return   string Byte Order Mark
409
   */
410
  public static function bom()
411
  {
412
    return "\xEF\xBB\xBF";
413 6
  }
414
415 6
  /**
416
   * @alias of UTF8::chr_map()
417
   *
418
   * @param $callback
419
   * @param $str
420
   *
421
   * @return array
422
   */
423
  public static function callback($callback, $str)
424
  {
425
    return self::chr_map($callback, $str);
426 7
  }
427
428 7
  /**
429
   * Returns an array of all lower and upper case UTF-8 encoded characters.
430 7
   *
431
   * @return   string An array with lower case chars as keys and upper chars as values.
432 7
   */
433 2
  protected static function case_table()
434
  {
435
    static $case = array(
436 6
437
      // lower => upper
438 6
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
439 3
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
440
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
441 3
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
442
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
443 3
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
444
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
445
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
446 3
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
447
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
448 3
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
449 3
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
450
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
451
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
452 3
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
453 3
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
454 3
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
455
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
456
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
457
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
458
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
459
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
460
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
461
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
462
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
463
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
464
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
465
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
466 3
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
467
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
468 1
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
469 1
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
470 1
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
471
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
472 1
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
473 1
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
474 1
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
475 1
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
476
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
477 1
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
478
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
479
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
480 1
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
481
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
482
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
483 1
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
484
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
485 3
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
486 1
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
487 1
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
488
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
489 3
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
490 3
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
491
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
492 3
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
493 3
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
494
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
495 6
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
496
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
497
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
498
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
499
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
500
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
501
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
502
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
503
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
504
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
505
      "\xea\x9e\x87"     => "\xea\x9e\x86",
506
      "\xea\x9e\x85"     => "\xea\x9e\x84",
507
      "\xea\x9e\x83"     => "\xea\x9e\x82",
508
      "\xea\x9e\x81"     => "\xea\x9e\x80",
509 24
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
510
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
511
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
512
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
513
      "\xea\x9d\xad"     => "\xea\x9d\xac",
514
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
515
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
516
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
517
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
518
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
519
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
520
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
521
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
522
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
523
      "\xea\x9d\x99"     => "\xea\x9d\x98",
524
      "\xea\x9d\x97"     => "\xea\x9d\x96",
525 24
      "\xea\x9d\x95"     => "\xea\x9d\x94",
526 24
      "\xea\x9d\x93"     => "\xea\x9d\x92",
527
      "\xea\x9d\x91"     => "\xea\x9d\x90",
528 24
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
529 24
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
530
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
531 24
      "\xea\x9d\x89"     => "\xea\x9d\x88",
532 7
      "\xea\x9d\x87"     => "\xea\x9d\x86",
533 7
      "\xea\x9d\x85"     => "\xea\x9d\x84",
534
      "\xea\x9d\x83"     => "\xea\x9d\x82",
535 24
      "\xea\x9d\x81"     => "\xea\x9d\x80",
536 1
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
537 1
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
538
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
539 24
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
540 6
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
541 6
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
542
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
543 24
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
544
      "\xea\x9c\xad"     => "\xea\x9c\xac",
545
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
546
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
547
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
548
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
549
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
550
      "\xea\x9a\x97"     => "\xea\x9a\x96",
551
      "\xea\x9a\x95"     => "\xea\x9a\x94",
552
      "\xea\x9a\x93"     => "\xea\x9a\x92",
553
      "\xea\x9a\x91"     => "\xea\x9a\x90",
554 25
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
555
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
556 25
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
557
      "\xea\x9a\x89"     => "\xea\x9a\x88",
558 25
      "\xea\x9a\x87"     => "\xea\x9a\x86",
559 25
      "\xea\x9a\x85"     => "\xea\x9a\x84",
560 25
      "\xea\x9a\x83"     => "\xea\x9a\x82",
561
      "\xea\x9a\x81"     => "\xea\x9a\x80",
562 25
      "\xea\x99\xad"     => "\xea\x99\xac",
563 25
      "\xea\x99\xab"     => "\xea\x99\xaa",
564 25
      "\xea\x99\xa9"     => "\xea\x99\xa8",
565
      "\xea\x99\xa7"     => "\xea\x99\xa6",
566 25
      "\xea\x99\xa5"     => "\xea\x99\xa4",
567
      "\xea\x99\xa3"     => "\xea\x99\xa2",
568
      "\xea\x99\x9f"     => "\xea\x99\x9e",
569
      "\xea\x99\x9d"     => "\xea\x99\x9c",
570
      "\xea\x99\x9b"     => "\xea\x99\x9a",
571
      "\xea\x99\x99"     => "\xea\x99\x98",
572
      "\xea\x99\x97"     => "\xea\x99\x96",
573
      "\xea\x99\x95"     => "\xea\x99\x94",
574
      "\xea\x99\x93"     => "\xea\x99\x92",
575
      "\xea\x99\x91"     => "\xea\x99\x90",
576
      "\xea\x99\x8f"     => "\xea\x99\x8e",
577
      "\xea\x99\x8d"     => "\xea\x99\x8c",
578
      "\xea\x99\x8b"     => "\xea\x99\x8a",
579
      "\xea\x99\x89"     => "\xea\x99\x88",
580
      "\xea\x99\x87"     => "\xea\x99\x86",
581
      "\xea\x99\x85"     => "\xea\x99\x84",
582 25
      "\xea\x99\x83"     => "\xea\x99\x82",
583
      "\xea\x99\x81"     => "\xea\x99\x80",
584
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
585 25
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
586
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
587
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
588
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
589 25
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
590 25
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
591 25
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
592 25
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
593
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
594 25
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
595
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
596
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
597 25
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
598 25
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
599
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
600 25
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
601
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
602
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
603
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
604
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
605
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
606
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
607
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
608
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
609
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
610
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
611 8
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
612
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
613 8
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
614
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
615 8
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
616
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
617 2
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
618
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
619 2
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
620
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
621 1
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
622 1
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
623
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
624 2
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
625 2
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
626
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
627 8
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
628
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
629
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
630
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
631
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
632
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
633
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
634
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
635
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
636
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
637
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
638
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
639 1
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
640
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
641 1
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
642
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
643
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
644
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
645
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
646
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
647
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
648
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
649
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
650
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
651 2
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
652
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
653 2
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
654 2
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
655
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
656 2
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
657 1
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
658 1
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
659 1
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
660
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
661 2
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
662
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
663
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
664
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
665
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
666
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
667
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
668
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
669
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
670
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
671 7
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
672
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
673
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
674
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
675 7
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
676 1
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
677 1
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
678
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
679
      "\xe2\xb1\xa6"     => "\xc8\xbe",
680 7
      "\xe2\xb1\xa5"     => "\xc8\xba",
681 1
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
682 1
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
683
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
684
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
685 7
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
686 2
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
687 2
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
688
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
689
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
690 7
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
691 1
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
692 1
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
693
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
694
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
695 7
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
696 1
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
697 1
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
698
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
699 7
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
700
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
701
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
702
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
703
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
704
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
705
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
706
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
707
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
708
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
709 2
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
710
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
711 2
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
712 2
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
713 2
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
714
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
715
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
716
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
717
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
718
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
719
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
720
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
721
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
722
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
723
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
724
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
725
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
726
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
727
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
728
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
729
      "\xe2\x86\x84"     => "\xe2\x86\x83",
730
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
731
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
732
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
733
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
734
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
735
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
736 8
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
737
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
738 8
      "\xe1\xbe\xbe"     => "\xce\x99",
739
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
740 8
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
741
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
742 8
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
743 2
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
744
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
745
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
746 7
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
747
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
748 7
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
749 7
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
750 7
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
751
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
752 7
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
753
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
754 7
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
755 6
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
756
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
757
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
758 4
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
759
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
760
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
761 4
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
762 4
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
763 4
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
764
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
765 4
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
766 3
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
767
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
768 3
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
769 3
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
770 3
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
771
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
772 3
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
773
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
774
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
775
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
776
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
777
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
778
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
779
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
780
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
781
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
782
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
783
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
784
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
785
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
786
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
787
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
788
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
789
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
790
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
791
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
792
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
793
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
794
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
795 3
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
796
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
797 4
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
798
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
799
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
800
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
801
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
802 4
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
803
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
804
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
805
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
806
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
807 4
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
808 4
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
809 2
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
810 2
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
811
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
812 2
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
813 2
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
814
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
815
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
816 2
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
817
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
818 4
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
819 4
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
820 4
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
821 4
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
822
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
823
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
824 7
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
825
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
826 7
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
827
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
828
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
829
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
830
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
831
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
832
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
833
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
834
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
835
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
836
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
837
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
838
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
839
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
840
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
841
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
842
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
843
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
844
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
845
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
846
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
847
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
848
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
849
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
850
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
851
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
852
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
853
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
854
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
855
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
856
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
857
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
858
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
859
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
860
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
861
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
862
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
863
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
864
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
865
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
866
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
867
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
868
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
869
      "\xe1\xba\xad"     => "\xe1\xba\xac",
870
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
871
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
872
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
873
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
874
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
875
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
876
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
877
      "\xe1\xba\x95"     => "\xe1\xba\x94",
878
      "\xe1\xba\x93"     => "\xe1\xba\x92",
879
      "\xe1\xba\x91"     => "\xe1\xba\x90",
880
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
881
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
882
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
883
      "\xe1\xba\x89"     => "\xe1\xba\x88",
884
      "\xe1\xba\x87"     => "\xe1\xba\x86",
885
      "\xe1\xba\x85"     => "\xe1\xba\x84",
886
      "\xe1\xba\x83"     => "\xe1\xba\x82",
887
      "\xe1\xba\x81"     => "\xe1\xba\x80",
888
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
889
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
890
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
891
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
892
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
893
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
894
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
895
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
896
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
897
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
898
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
899
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
900
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
901
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
902
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
903
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
904
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
905
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
906
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
907
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
908
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
909
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
910
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
911
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
912
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
913
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
914
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
915
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
916
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
917
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
918
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
919
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
920
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
921
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
922
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
923
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
924
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
925
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
926
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
927
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
928
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
929
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
930
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
931
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
932
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
933
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
934
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
935
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
936
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
937
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
938
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
939
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
940
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
941
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
942
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
943
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
944 2
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
945
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
946 2
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
947
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
948
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
949
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
950
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
951
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
952
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
953
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
954
      "\xd6\x86"         => "\xd5\x96",
955
      "\xd6\x85"         => "\xd5\x95",
956
      "\xd6\x84"         => "\xd5\x94",
957
      "\xd6\x83"         => "\xd5\x93",
958
      "\xd6\x82"         => "\xd5\x92",
959
      "\xd6\x81"         => "\xd5\x91",
960
      "\xd6\x80"         => "\xd5\x90",
961
      "\xd5\xbf"         => "\xd5\x8f",
962
      "\xd5\xbe"         => "\xd5\x8e",
963
      "\xd5\xbd"         => "\xd5\x8d",
964
      "\xd5\xbc"         => "\xd5\x8c",
965
      "\xd5\xbb"         => "\xd5\x8b",
966
      "\xd5\xba"         => "\xd5\x8a",
967
      "\xd5\xb9"         => "\xd5\x89",
968
      "\xd5\xb8"         => "\xd5\x88",
969
      "\xd5\xb7"         => "\xd5\x87",
970
      "\xd5\xb6"         => "\xd5\x86",
971
      "\xd5\xb5"         => "\xd5\x85",
972
      "\xd5\xb4"         => "\xd5\x84",
973
      "\xd5\xb3"         => "\xd5\x83",
974
      "\xd5\xb2"         => "\xd5\x82",
975
      "\xd5\xb1"         => "\xd5\x81",
976
      "\xd5\xb0"         => "\xd5\x80",
977
      "\xd5\xaf"         => "\xd4\xbf",
978
      "\xd5\xae"         => "\xd4\xbe",
979
      "\xd5\xad"         => "\xd4\xbd",
980
      "\xd5\xac"         => "\xd4\xbc",
981
      "\xd5\xab"         => "\xd4\xbb",
982
      "\xd5\xaa"         => "\xd4\xba",
983
      "\xd5\xa9"         => "\xd4\xb9",
984
      "\xd5\xa8"         => "\xd4\xb8",
985
      "\xd5\xa7"         => "\xd4\xb7",
986
      "\xd5\xa6"         => "\xd4\xb6",
987
      "\xd5\xa5"         => "\xd4\xb5",
988
      "\xd5\xa4"         => "\xd4\xb4",
989
      "\xd5\xa3"         => "\xd4\xb3",
990
      "\xd5\xa2"         => "\xd4\xb2",
991
      "\xd5\xa1"         => "\xd4\xb1",
992
      "\xd4\xa5"         => "\xd4\xa4",
993
      "\xd4\xa3"         => "\xd4\xa2",
994
      "\xd4\xa1"         => "\xd4\xa0",
995
      "\xd4\x9f"         => "\xd4\x9e",
996
      "\xd4\x9d"         => "\xd4\x9c",
997
      "\xd4\x9b"         => "\xd4\x9a",
998
      "\xd4\x99"         => "\xd4\x98",
999
      "\xd4\x97"         => "\xd4\x96",
1000
      "\xd4\x95"         => "\xd4\x94",
1001
      "\xd4\x93"         => "\xd4\x92",
1002
      "\xd4\x91"         => "\xd4\x90",
1003
      "\xd4\x8f"         => "\xd4\x8e",
1004
      "\xd4\x8d"         => "\xd4\x8c",
1005
      "\xd4\x8b"         => "\xd4\x8a",
1006
      "\xd4\x89"         => "\xd4\x88",
1007
      "\xd4\x87"         => "\xd4\x86",
1008
      "\xd4\x85"         => "\xd4\x84",
1009
      "\xd4\x83"         => "\xd4\x82",
1010
      "\xd4\x81"         => "\xd4\x80",
1011
      "\xd3\xbf"         => "\xd3\xbe",
1012
      "\xd3\xbd"         => "\xd3\xbc",
1013
      "\xd3\xbb"         => "\xd3\xba",
1014
      "\xd3\xb9"         => "\xd3\xb8",
1015
      "\xd3\xb7"         => "\xd3\xb6",
1016
      "\xd3\xb5"         => "\xd3\xb4",
1017
      "\xd3\xb3"         => "\xd3\xb2",
1018
      "\xd3\xb1"         => "\xd3\xb0",
1019
      "\xd3\xaf"         => "\xd3\xae",
1020
      "\xd3\xad"         => "\xd3\xac",
1021
      "\xd3\xab"         => "\xd3\xaa",
1022
      "\xd3\xa9"         => "\xd3\xa8",
1023
      "\xd3\xa7"         => "\xd3\xa6",
1024
      "\xd3\xa5"         => "\xd3\xa4",
1025
      "\xd3\xa3"         => "\xd3\xa2",
1026
      "\xd3\xa1"         => "\xd3\xa0",
1027
      "\xd3\x9f"         => "\xd3\x9e",
1028
      "\xd3\x9d"         => "\xd3\x9c",
1029
      "\xd3\x9b"         => "\xd3\x9a",
1030
      "\xd3\x99"         => "\xd3\x98",
1031
      "\xd3\x97"         => "\xd3\x96",
1032
      "\xd3\x95"         => "\xd3\x94",
1033
      "\xd3\x93"         => "\xd3\x92",
1034
      "\xd3\x91"         => "\xd3\x90",
1035
      "\xd3\x8f"         => "\xd3\x80",
1036
      "\xd3\x8e"         => "\xd3\x8d",
1037
      "\xd3\x8c"         => "\xd3\x8b",
1038
      "\xd3\x8a"         => "\xd3\x89",
1039
      "\xd3\x88"         => "\xd3\x87",
1040
      "\xd3\x86"         => "\xd3\x85",
1041
      "\xd3\x84"         => "\xd3\x83",
1042
      "\xd3\x82"         => "\xd3\x81",
1043
      "\xd2\xbf"         => "\xd2\xbe",
1044
      "\xd2\xbd"         => "\xd2\xbc",
1045
      "\xd2\xbb"         => "\xd2\xba",
1046
      "\xd2\xb9"         => "\xd2\xb8",
1047
      "\xd2\xb7"         => "\xd2\xb6",
1048
      "\xd2\xb5"         => "\xd2\xb4",
1049
      "\xd2\xb3"         => "\xd2\xb2",
1050
      "\xd2\xb1"         => "\xd2\xb0",
1051
      "\xd2\xaf"         => "\xd2\xae",
1052
      "\xd2\xad"         => "\xd2\xac",
1053
      "\xd2\xab"         => "\xd2\xaa",
1054
      "\xd2\xa9"         => "\xd2\xa8",
1055
      "\xd2\xa7"         => "\xd2\xa6",
1056
      "\xd2\xa5"         => "\xd2\xa4",
1057
      "\xd2\xa3"         => "\xd2\xa2",
1058 1
      "\xd2\xa1"         => "\xd2\xa0",
1059
      "\xd2\x9f"         => "\xd2\x9e",
1060 1
      "\xd2\x9d"         => "\xd2\x9c",
1061
      "\xd2\x9b"         => "\xd2\x9a",
1062
      "\xd2\x99"         => "\xd2\x98",
1063
      "\xd2\x97"         => "\xd2\x96",
1064
      "\xd2\x95"         => "\xd2\x94",
1065
      "\xd2\x93"         => "\xd2\x92",
1066
      "\xd2\x91"         => "\xd2\x90",
1067
      "\xd2\x8f"         => "\xd2\x8e",
1068
      "\xd2\x8d"         => "\xd2\x8c",
1069
      "\xd2\x8b"         => "\xd2\x8a",
1070 16
      "\xd2\x81"         => "\xd2\x80",
1071
      "\xd1\xbf"         => "\xd1\xbe",
1072 16
      "\xd1\xbd"         => "\xd1\xbc",
1073
      "\xd1\xbb"         => "\xd1\xba",
1074
      "\xd1\xb9"         => "\xd1\xb8",
1075
      "\xd1\xb7"         => "\xd1\xb6",
1076
      "\xd1\xb5"         => "\xd1\xb4",
1077
      "\xd1\xb3"         => "\xd1\xb2",
1078
      "\xd1\xb1"         => "\xd1\xb0",
1079
      "\xd1\xaf"         => "\xd1\xae",
1080
      "\xd1\xad"         => "\xd1\xac",
1081
      "\xd1\xab"         => "\xd1\xaa",
1082
      "\xd1\xa9"         => "\xd1\xa8",
1083
      "\xd1\xa7"         => "\xd1\xa6",
1084
      "\xd1\xa5"         => "\xd1\xa4",
1085
      "\xd1\xa3"         => "\xd1\xa2",
1086
      "\xd1\xa1"         => "\xd1\xa0",
1087 31
      "\xd1\x9f"         => "\xd0\x8f",
1088
      "\xd1\x9e"         => "\xd0\x8e",
1089 31
      "\xd1\x9d"         => "\xd0\x8d",
1090
      "\xd1\x9c"         => "\xd0\x8c",
1091 31
      "\xd1\x9b"         => "\xd0\x8b",
1092 3
      "\xd1\x9a"         => "\xd0\x8a",
1093
      "\xd1\x99"         => "\xd0\x89",
1094
      "\xd1\x98"         => "\xd0\x88",
1095 29
      "\xd1\x97"         => "\xd0\x87",
1096
      "\xd1\x96"         => "\xd0\x86",
1097
      "\xd1\x95"         => "\xd0\x85",
1098
      "\xd1\x94"         => "\xd0\x84",
1099
      "\xd1\x93"         => "\xd0\x83",
1100
      "\xd1\x92"         => "\xd0\x82",
1101
      "\xd1\x91"         => "\xd0\x81",
1102 29
      "\xd1\x90"         => "\xd0\x80",
1103
      "\xd1\x8f"         => "\xd0\xaf",
1104 29
      "\xd1\x8e"         => "\xd0\xae",
1105 29
      "\xd1\x8d"         => "\xd0\xad",
1106 29
      "\xd1\x8c"         => "\xd0\xac",
1107 29
      "\xd1\x8b"         => "\xd0\xab",
1108 29
      "\xd1\x8a"         => "\xd0\xaa",
1109 29
      "\xd1\x89"         => "\xd0\xa9",
1110
      "\xd1\x88"         => "\xd0\xa8",
1111
      "\xd1\x87"         => "\xd0\xa7",
1112 29
      "\xd1\x86"         => "\xd0\xa6",
1113
      "\xd1\x85"         => "\xd0\xa5",
1114 27
      "\xd1\x84"         => "\xd0\xa4",
1115 29
      "\xd1\x83"         => "\xd0\xa3",
1116
      "\xd1\x82"         => "\xd0\xa2",
1117 25
      "\xd1\x81"         => "\xd0\xa1",
1118 25
      "\xd1\x80"         => "\xd0\xa0",
1119 25
      "\xd0\xbf"         => "\xd0\x9f",
1120 25
      "\xd0\xbe"         => "\xd0\x9e",
1121 27
      "\xd0\xbd"         => "\xd0\x9d",
1122
      "\xd0\xbc"         => "\xd0\x9c",
1123 11
      "\xd0\xbb"         => "\xd0\x9b",
1124 11
      "\xd0\xba"         => "\xd0\x9a",
1125 11
      "\xd0\xb9"         => "\xd0\x99",
1126 11
      "\xd0\xb8"         => "\xd0\x98",
1127 21
      "\xd0\xb7"         => "\xd0\x97",
1128
      "\xd0\xb6"         => "\xd0\x96",
1129 5
      "\xd0\xb5"         => "\xd0\x95",
1130 5
      "\xd0\xb4"         => "\xd0\x94",
1131 5
      "\xd0\xb3"         => "\xd0\x93",
1132 5
      "\xd0\xb2"         => "\xd0\x92",
1133 11
      "\xd0\xb1"         => "\xd0\x91",
1134
      "\xd0\xb0"         => "\xd0\x90",
1135
      "\xcf\xbb"         => "\xcf\xba",
1136
      "\xcf\xb8"         => "\xcf\xb7",
1137
      "\xcf\xb5"         => "\xce\x95",
1138
      "\xcf\xb2"         => "\xcf\xb9",
1139
      "\xcf\xb1"         => "\xce\xa1",
1140
      "\xcf\xb0"         => "\xce\x9a",
1141
      "\xcf\xaf"         => "\xcf\xae",
1142 3
      "\xcf\xad"         => "\xcf\xac",
1143 3
      "\xcf\xab"         => "\xcf\xaa",
1144 3
      "\xcf\xa9"         => "\xcf\xa8",
1145 3
      "\xcf\xa7"         => "\xcf\xa6",
1146 7
      "\xcf\xa5"         => "\xcf\xa4",
1147
      "\xcf\xa3"         => "\xcf\xa2",
1148 3
      "\xcf\xa1"         => "\xcf\xa0",
1149 3
      "\xcf\x9f"         => "\xcf\x9e",
1150 3
      "\xcf\x9d"         => "\xcf\x9c",
1151 3
      "\xcf\x9b"         => "\xcf\x9a",
1152 3
      "\xcf\x99"         => "\xcf\x98",
1153
      "\xcf\x97"         => "\xcf\x8f",
1154
      "\xcf\x96"         => "\xce\xa0",
1155
      "\xcf\x95"         => "\xce\xa6",
1156 3
      "\xcf\x91"         => "\xce\x98",
1157
      "\xcf\x90"         => "\xce\x92",
1158 29
      "\xcf\x8e"         => "\xce\x8f",
1159
      "\xcf\x8d"         => "\xce\x8e",
1160
      "\xcf\x8c"         => "\xce\x8c",
1161 27
      "\xcf\x8b"         => "\xce\xab",
1162
      "\xcf\x8a"         => "\xce\xaa",
1163 25
      "\xcf\x89"         => "\xce\xa9",
1164 25
      "\xcf\x88"         => "\xce\xa8",
1165 25
      "\xcf\x87"         => "\xce\xa7",
1166 25
      "\xcf\x86"         => "\xce\xa6",
1167
      "\xcf\x85"         => "\xce\xa5",
1168
      "\xcf\x84"         => "\xce\xa4",
1169
      "\xcf\x83"         => "\xce\xa3",
1170
      "\xcf\x82"         => "\xce\xa3",
1171 25
      "\xcf\x81"         => "\xce\xa1",
1172
      "\xcf\x80"         => "\xce\xa0",
1173
      "\xce\xbf"         => "\xce\x9f",
1174
      "\xce\xbe"         => "\xce\x9e",
1175
      "\xce\xbd"         => "\xce\x9d",
1176
      "\xce\xbc"         => "\xce\x9c",
1177 25
      "\xce\xbb"         => "\xce\x9b",
1178 25
      "\xce\xba"         => "\xce\x9a",
1179 25
      "\xce\xb9"         => "\xce\x99",
1180 25
      "\xce\xb8"         => "\xce\x98",
1181
      "\xce\xb7"         => "\xce\x97",
1182 25
      "\xce\xb6"         => "\xce\x96",
1183
      "\xce\xb5"         => "\xce\x95",
1184 25
      "\xce\xb4"         => "\xce\x94",
1185 25
      "\xce\xb3"         => "\xce\x93",
1186 5
      "\xce\xb2"         => "\xce\x92",
1187
      "\xce\xb1"         => "\xce\x91",
1188
      "\xce\xaf"         => "\xce\x8a",
1189 25
      "\xce\xae"         => "\xce\x89",
1190 25
      "\xce\xad"         => "\xce\x88",
1191 25
      "\xce\xac"         => "\xce\x86",
1192 25
      "\xcd\xbd"         => "\xcf\xbf",
1193 25
      "\xcd\xbc"         => "\xcf\xbe",
1194
      "\xcd\xbb"         => "\xcf\xbd",
1195
      "\xcd\xb7"         => "\xcd\xb6",
1196
      "\xcd\xb3"         => "\xcd\xb2",
1197
      "\xcd\xb1"         => "\xcd\xb0",
1198 13
      "\xca\x92"         => "\xc6\xb7",
1199
      "\xca\x8c"         => "\xc9\x85",
1200
      "\xca\x8b"         => "\xc6\xb2",
1201 29
      "\xca\x8a"         => "\xc6\xb1",
1202
      "\xca\x89"         => "\xc9\x84",
1203 11
      "\xca\x88"         => "\xc6\xae",
1204
      "\xca\x83"         => "\xc6\xa9",
1205
      "\xca\x80"         => "\xc6\xa6",
1206
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1207
      "\xc9\xb5"         => "\xc6\x9f",
1208
      "\xc9\xb2"         => "\xc6\x9d",
1209
      "\xc9\xb1"         => "\xe2\xb1\xae",
1210
      "\xc9\xaf"         => "\xc6\x9c",
1211
      "\xc9\xab"         => "\xe2\xb1\xa2",
1212
      "\xc9\xa9"         => "\xc6\x96",
1213
      "\xc9\xa8"         => "\xc6\x97",
1214
      "\xc9\xa5"         => "\xea\x9e\x8d",
1215
      "\xc9\xa3"         => "\xc6\x94",
1216
      "\xc9\xa0"         => "\xc6\x93",
1217
      "\xc9\x9b"         => "\xc6\x90",
1218 6
      "\xc9\x99"         => "\xc6\x8f",
1219
      "\xc9\x97"         => "\xc6\x8a",
1220 6
      "\xc9\x96"         => "\xc6\x89",
1221
      "\xc9\x94"         => "\xc6\x86",
1222
      "\xc9\x93"         => "\xc6\x81",
1223
      "\xc9\x92"         => "\xe2\xb1\xb0",
1224 6
      "\xc9\x91"         => "\xe2\xb1\xad",
1225
      "\xc9\x90"         => "\xe2\xb1\xaf",
1226
      "\xc9\x8f"         => "\xc9\x8e",
1227
      "\xc9\x8d"         => "\xc9\x8c",
1228
      "\xc9\x8b"         => "\xc9\x8a",
1229
      "\xc9\x89"         => "\xc9\x88",
1230
      "\xc9\x87"         => "\xc9\x86",
1231
      "\xc9\x82"         => "\xc9\x81",
1232
      "\xc9\x80"         => "\xe2\xb1\xbf",
1233
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1234
      "\xc8\xbc"         => "\xc8\xbb",
1235
      "\xc8\xb3"         => "\xc8\xb2",
1236
      "\xc8\xb1"         => "\xc8\xb0",
1237
      "\xc8\xaf"         => "\xc8\xae",
1238
      "\xc8\xad"         => "\xc8\xac",
1239
      "\xc8\xab"         => "\xc8\xaa",
1240
      "\xc8\xa9"         => "\xc8\xa8",
1241
      "\xc8\xa7"         => "\xc8\xa6",
1242
      "\xc8\xa5"         => "\xc8\xa4",
1243
      "\xc8\xa3"         => "\xc8\xa2",
1244
      "\xc8\x9f"         => "\xc8\x9e",
1245
      "\xc8\x9d"         => "\xc8\x9c",
1246
      "\xc8\x9b"         => "\xc8\x9a",
1247 37
      "\xc8\x99"         => "\xc8\x98",
1248
      "\xc8\x97"         => "\xc8\x96",
1249 37
      "\xc8\x95"         => "\xc8\x94",
1250
      "\xc8\x93"         => "\xc8\x92",
1251 37
      "\xc8\x91"         => "\xc8\x90",
1252
      "\xc8\x8f"         => "\xc8\x8e",
1253 37
      "\xc8\x8d"         => "\xc8\x8c",
1254 9
      "\xc8\x8b"         => "\xc8\x8a",
1255
      "\xc8\x89"         => "\xc8\x88",
1256
      "\xc8\x87"         => "\xc8\x86",
1257
      "\xc8\x85"         => "\xc8\x84",
1258 35
      "\xc8\x83"         => "\xc8\x82",
1259
      "\xc8\x81"         => "\xc8\x80",
1260 35
      "\xc7\xbf"         => "\xc7\xbe",
1261
      "\xc7\xbd"         => "\xc7\xbc",
1262
      "\xc7\xbb"         => "\xc7\xba",
1263
      "\xc7\xb9"         => "\xc7\xb8",
1264 1
      "\xc7\xb5"         => "\xc7\xb4",
1265 1
      "\xc7\xb3"         => "\xc7\xb2",
1266
      "\xc7\xaf"         => "\xc7\xae",
1267 35
      "\xc7\xad"         => "\xc7\xac",
1268 21
      "\xc7\xab"         => "\xc7\xaa",
1269 21
      "\xc7\xa9"         => "\xc7\xa8",
1270 31
      "\xc7\xa7"         => "\xc7\xa6",
1271
      "\xc7\xa5"         => "\xc7\xa4",
1272
      "\xc7\xa3"         => "\xc7\xa2",
1273 35
      "\xc7\xa1"         => "\xc7\xa0",
1274
      "\xc7\x9f"         => "\xc7\x9e",
1275
      "\xc7\x9d"         => "\xc6\x8e",
1276 35
      "\xc7\x9c"         => "\xc7\x9b",
1277 1
      "\xc7\x9a"         => "\xc7\x99",
1278 1
      "\xc7\x98"         => "\xc7\x97",
1279
      "\xc7\x96"         => "\xc7\x95",
1280 35
      "\xc7\x94"         => "\xc7\x93",
1281
      "\xc7\x92"         => "\xc7\x91",
1282
      "\xc7\x90"         => "\xc7\x8f",
1283
      "\xc7\x8e"         => "\xc7\x8d",
1284
      "\xc7\x8c"         => "\xc7\x8b",
1285
      "\xc7\x89"         => "\xc7\x88",
1286
      "\xc7\x86"         => "\xc7\x85",
1287
      "\xc6\xbf"         => "\xc7\xb7",
1288
      "\xc6\xbd"         => "\xc6\xbc",
1289
      "\xc6\xb9"         => "\xc6\xb8",
1290
      "\xc6\xb6"         => "\xc6\xb5",
1291
      "\xc6\xb4"         => "\xc6\xb3",
1292
      "\xc6\xb0"         => "\xc6\xaf",
1293
      "\xc6\xad"         => "\xc6\xac",
1294
      "\xc6\xa8"         => "\xc6\xa7",
1295
      "\xc6\xa5"         => "\xc6\xa4",
1296
      "\xc6\xa3"         => "\xc6\xa2",
1297
      "\xc6\xa1"         => "\xc6\xa0",
1298
      "\xc6\x9e"         => "\xc8\xa0",
1299
      "\xc6\x9a"         => "\xc8\xbd",
1300
      "\xc6\x99"         => "\xc6\x98",
1301
      "\xc6\x95"         => "\xc7\xb6",
1302
      "\xc6\x92"         => "\xc6\x91",
1303
      "\xc6\x8c"         => "\xc6\x8b",
1304
      "\xc6\x88"         => "\xc6\x87",
1305
      "\xc6\x85"         => "\xc6\x84",
1306
      "\xc6\x83"         => "\xc6\x82",
1307
      "\xc6\x80"         => "\xc9\x83",
1308
      "\xc5\xbf"         => "\x53",
1309
      "\xc5\xbe"         => "\xc5\xbd",
1310
      "\xc5\xbc"         => "\xc5\xbb",
1311
      "\xc5\xba"         => "\xc5\xb9",
1312
      "\xc5\xb7"         => "\xc5\xb6",
1313
      "\xc5\xb5"         => "\xc5\xb4",
1314
      "\xc5\xb3"         => "\xc5\xb2",
1315
      "\xc5\xb1"         => "\xc5\xb0",
1316
      "\xc5\xaf"         => "\xc5\xae",
1317
      "\xc5\xad"         => "\xc5\xac",
1318
      "\xc5\xab"         => "\xc5\xaa",
1319 63
      "\xc5\xa9"         => "\xc5\xa8",
1320
      "\xc5\xa7"         => "\xc5\xa6",
1321 63
      "\xc5\xa5"         => "\xc5\xa4",
1322
      "\xc5\xa3"         => "\xc5\xa2",
1323 63
      "\xc5\xa1"         => "\xc5\xa0",
1324 4
      "\xc5\x9f"         => "\xc5\x9e",
1325
      "\xc5\x9d"         => "\xc5\x9c",
1326
      "\xc5\x9b"         => "\xc5\x9a",
1327
      "\xc5\x99"         => "\xc5\x98",
1328 62
      "\xc5\x97"         => "\xc5\x96",
1329
      "\xc5\x95"         => "\xc5\x94",
1330
      "\xc5\x93"         => "\xc5\x92",
1331 62
      "\xc5\x91"         => "\xc5\x90",
1332
      "\xc5\x8f"         => "\xc5\x8e",
1333
      "\xc5\x8d"         => "\xc5\x8c",
1334
      "\xc5\x8b"         => "\xc5\x8a",
1335 62
      "\xc5\x88"         => "\xc5\x87",
1336
      "\xc5\x86"         => "\xc5\x85",
1337
      "\xc5\x84"         => "\xc5\x83",
1338 62
      "\xc5\x82"         => "\xc5\x81",
1339
      "\xc5\x80"         => "\xc4\xbf",
1340
      "\xc4\xbe"         => "\xc4\xbd",
1341 62
      "\xc4\xbc"         => "\xc4\xbb",
1342
      "\xc4\xba"         => "\xc4\xb9",
1343
      "\xc4\xb7"         => "\xc4\xb6",
1344
      "\xc4\xb5"         => "\xc4\xb4",
1345
      "\xc4\xb3"         => "\xc4\xb2",
1346
      "\xc4\xb1"         => "\x49",
1347
      "\xc4\xaf"         => "\xc4\xae",
1348
      "\xc4\xad"         => "\xc4\xac",
1349
      "\xc4\xab"         => "\xc4\xaa",
1350
      "\xc4\xa9"         => "\xc4\xa8",
1351
      "\xc4\xa7"         => "\xc4\xa6",
1352
      "\xc4\xa5"         => "\xc4\xa4",
1353 24
      "\xc4\xa3"         => "\xc4\xa2",
1354
      "\xc4\xa1"         => "\xc4\xa0",
1355 24
      "\xc4\x9f"         => "\xc4\x9e",
1356
      "\xc4\x9d"         => "\xc4\x9c",
1357 24
      "\xc4\x9b"         => "\xc4\x9a",
1358 5
      "\xc4\x99"         => "\xc4\x98",
1359
      "\xc4\x97"         => "\xc4\x96",
1360
      "\xc4\x95"         => "\xc4\x94",
1361
      "\xc4\x93"         => "\xc4\x92",
1362 23
      "\xc4\x91"         => "\xc4\x90",
1363 23
      "\xc4\x8f"         => "\xc4\x8e",
1364 23
      "\xc4\x8d"         => "\xc4\x8c",
1365
      "\xc4\x8b"         => "\xc4\x8a",
1366 23
      "\xc4\x89"         => "\xc4\x88",
1367
      "\xc4\x87"         => "\xc4\x86",
1368 23
      "\xc4\x85"         => "\xc4\x84",
1369
      "\xc4\x83"         => "\xc4\x82",
1370
      "\xc4\x81"         => "\xc4\x80",
1371
      "\xc3\xbf"         => "\xc5\xb8",
1372 23
      "\xc3\xbe"         => "\xc3\x9e",
1373 23
      "\xc3\xbd"         => "\xc3\x9d",
1374 23
      "\xc3\xbc"         => "\xc3\x9c",
1375 23
      "\xc3\xbb"         => "\xc3\x9b",
1376 23
      "\xc3\xba"         => "\xc3\x9a",
1377
      "\xc3\xb9"         => "\xc3\x99",
1378 23
      "\xc3\xb8"         => "\xc3\x98",
1379
      "\xc3\xb6"         => "\xc3\x96",
1380
      "\xc3\xb5"         => "\xc3\x95",
1381
      "\xc3\xb4"         => "\xc3\x94",
1382
      "\xc3\xb3"         => "\xc3\x93",
1383
      "\xc3\xb2"         => "\xc3\x92",
1384
      "\xc3\xb1"         => "\xc3\x91",
1385
      "\xc3\xb0"         => "\xc3\x90",
1386
      "\xc3\xaf"         => "\xc3\x8f",
1387
      "\xc3\xae"         => "\xc3\x8e",
1388
      "\xc3\xad"         => "\xc3\x8d",
1389
      "\xc3\xac"         => "\xc3\x8c",
1390
      "\xc3\xab"         => "\xc3\x8b",
1391
      "\xc3\xaa"         => "\xc3\x8a",
1392
      "\xc3\xa9"         => "\xc3\x89",
1393
      "\xc3\xa8"         => "\xc3\x88",
1394
      "\xc3\xa7"         => "\xc3\x87",
1395
      "\xc3\xa6"         => "\xc3\x86",
1396
      "\xc3\xa5"         => "\xc3\x85",
1397
      "\xc3\xa4"         => "\xc3\x84",
1398
      "\xc3\xa3"         => "\xc3\x83",
1399
      "\xc3\xa2"         => "\xc3\x82",
1400
      "\xc3\xa1"         => "\xc3\x81",
1401
      "\xc3\xa0"         => "\xc3\x80",
1402
      "\xc2\xb5"         => "\xce\x9c",
1403
      "\x7a"             => "\x5a",
1404
      "\x79"             => "\x59",
1405
      "\x78"             => "\x58",
1406
      "\x77"             => "\x57",
1407
      "\x76"             => "\x56",
1408
      "\x75"             => "\x55",
1409 23
      "\x74"             => "\x54",
1410 5
      "\x73"             => "\x53",
1411
      "\x72"             => "\x52",
1412 5
      "\x71"             => "\x51",
1413 5
      "\x70"             => "\x50",
1414
      "\x6f"             => "\x4f",
1415 23
      "\x6e"             => "\x4e",
1416
      "\x6d"             => "\x4d",
1417
      "\x6c"             => "\x4c",
1418
      "\x6b"             => "\x4b",
1419 23
      "\x6a"             => "\x4a",
1420
      "\x69"             => "\x49",
1421
      "\x68"             => "\x48",
1422
      "\x67"             => "\x47",
1423
      "\x66"             => "\x46",
1424
      "\x65"             => "\x45",
1425
      "\x64"             => "\x44",
1426
      "\x63"             => "\x43",
1427
      "\x62"             => "\x42",
1428
      "\x61"             => "\x41",
1429
1430 40
    );
1431
1432 40
    return $case;
1433
  }
1434 40
1435
  /**
1436 40
   * check for UTF8-Support
1437 30
   */
1438
  public static function checkForSupport()
1439
  {
1440 16
    if (!isset(self::$support['mbstring'])) {
1441
1442 16
      self::$support['mbstring'] = self::mbstring_loaded();
1443 15
      self::$support['iconv'] = self::iconv_loaded();
1444
      self::$support['intl'] = self::intl_loaded();
1445 15
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1446 14
    }
1447 15
  }
1448 1
1449 1
  /**
1450
   * Generates a UTF-8 encoded character from the given code point.
1451
   *
1452 16
   * @param    int $code_point The code point for which to generate a character.
1453
   *
1454 16
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1455
   */
1456 16
  public static function chr($code_point)
1457 16
  {
1458 16
    self::checkForSupport();
1459
1460
    if (($i = (int)$code_point) !== $code_point) {
1461
      // $code_point is a string, lets extract int code point from it
1462 16
      if (!($i = (int)self::hex_to_int($code_point))) {
1463
        return '';
1464 16
      }
1465
    }
1466
1467
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1468
  }
1469
1470
  /**
1471
   * Applies callback to all characters of a string.
1472
   *
1473
   * @param    string $callback The callback function.
1474
   * @param    string $str      UTF-8 string to run callback on.
1475 17
   *
1476
   * @return   array The outcome of callback.
1477
   */
1478 17
1479
  public static function chr_map($callback, $str)
1480 17
  {
1481
    $chars = self::split($str);
1482
1483
    return array_map($callback, $chars);
1484
  }
1485
1486 17
  /**
1487 17
   * Generates an array of byte length of each character of a Unicode string.
1488 17
   *
1489 17
   * 1 byte => U+0000  - U+007F
1490 17
   * 2 byte => U+0080  - U+07FF
1491 16
   * 3 byte => U+0800  - U+FFFF
1492 16
   * 4 byte => U+10000 - U+10FFFF
1493 17
   *
1494
   * @param    string $str The original Unicode string.
1495
   *
1496
   * @return   array An array of byte lengths of each character.
1497
   */
1498 17
  public static function chr_size_list($str)
1499 17
  {
1500
    if (!$str) {
1501
      return array();
1502 1
    }
1503 1
1504
    return array_map('strlen', self::split($str));
1505
  }
1506 1
1507 1
  /**
1508 1
   * Get a decimal code representation of a specific character.
1509 1
   *
1510 1
   * @param   string $chr The input character
1511
   *
1512 1
   * @return  int
1513
   */
1514 1
  public static function chr_to_decimal($chr)
1515
  {
1516
    $chr = (string)$chr;
1517
    $code = self::ord($chr[0]);
1518
    $bytes = 1;
1519
1520
    if (!($code & 0x80)) {
1521
      // 0xxxxxxx
1522
      return $code;
1523
    }
1524 1
1525
    if (($code & 0xe0) === 0xc0) {
1526
      // 110xxxxx
1527 1
      $bytes = 2;
1528
      $code &= ~0xc0;
1529 1
    } elseif (($code & 0xf0) == 0xe0) {
1530
      // 1110xxxx
1531
      $bytes = 3;
1532
      $code &= ~0xe0;
1533
    } elseif (($code & 0xf8) === 0xf0) {
1534
      // 11110xxx
1535
      $bytes = 4;
1536
      $code &= ~0xf0;
1537
    }
1538
1539
    for ($i = 2; $i <= $bytes; $i++) {
1540
      // 10xxxxxx
1541
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1542 5
    }
1543
1544 5
    return $code;
1545
  }
1546
1547
  /**
1548 5
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1549
   *
1550
   * @param    string $chr The input character
1551 5
   * @param    string $pfix
1552
   *
1553
   * @return   string The code point encoded as U+xxxx
1554
   */
1555 5
  public static function chr_to_hex($chr, $pfix = 'U+')
1556 5
  {
1557
    return self::int_to_hex(self::ord($chr), $pfix);
1558
  }
1559
1560
  /**
1561
   * Splits a string into smaller chunks and multiple lines, using the specified
1562
   * line ending character.
1563
   *
1564
   * @param    string $body     The original string to be split.
1565
   * @param    int    $chunklen The maximum character length of a chunk.
1566
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1567
   *
1568
   * @return   string The chunked string
1569
   */
1570
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1571
  {
1572 1
    return implode($end, self::split($body, $chunklen));
1573
  }
1574 1
1575
  /**
1576
   * accepts a string and removes all non-UTF-8 characters from it.
1577
   *
1578
   * @param string $str                     The string to be sanitized.
1579
   * @param bool   $remove_bom
1580
   * @param bool   $normalize_whitespace
1581
   * @param bool   $normalize_msword        e.g.: "…" => "..."
1582
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
1583
   *
1584
   * @return string Clean UTF-8 encoded string
1585
   */
1586 7
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1587
  {
1588 7
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1589 2
    // caused connection reset problem on larger strings
1590
1591
    $regx = '/
1592
       (
1593 5
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
1594
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
1595 5
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
1596
        |   [\xE1-\xEC][\x80-\xBF]{2}
1597
        |   \xED[\x80-\x9F][\x80-\xBF]
1598
        |   [\xEE-\xEF][\x80-\xBF]{2}
1599
        ){1,50}                          # ...one or more times
1600
       )
1601
       | .                               # anything else
1602
       /x';
1603
    $str = preg_replace($regx, '$1', $str);
1604
1605
    $str = self::replace_diamond_question_mark($str, '');
1606
    $str = self::remove_invisible_characters($str);
1607 1
1608
    if ($normalize_whitespace === true) {
1609 1
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1610
    }
1611
1612
    if ($normalize_msword === true) {
1613
      $str = self::normalize_msword($str);
1614
    }
1615
1616
    if ($remove_bom === true) {
1617
      $str = self::removeBOM($str);
1618
    }
1619
1620
    return $str;
1621 2
  }
1622
1623 2
  /**
1624
   * Clean-up a and show only printable UTF-8 chars at the end.
1625
   *
1626
   * @param string|false $str
1627
   *
1628
   * @return string
1629
   */
1630
  public static function cleanup($str)
1631
  {
1632
    $str = (string)$str;
1633 3
1634
    if (!isset($str[0])) {
1635 3
      return '';
1636
    }
1637
1638
    // init
1639
    self::checkForSupport();
1640
1641
    // fixed ISO <-> UTF-8 Errors
1642
    $str = self::fix_simple_utf8($str);
1643
1644
    // remove all none UTF-8 symbols
1645
    // && remove diamond question mark (�)
1646
    // && remove remove invisible characters (e.g. "\0")
1647
    // && remove BOM
1648 10
    // && normalize whitespace chars (but keep non-breaking-spaces)
1649
    $str = self::clean($str, true, true, false, true);
1650 10
1651 10
    return (string)$str;
1652 10
  }
1653
1654 10
  /**
1655 1
   * Accepts a string and returns an array of Unicode code points.
1656 1
   *
1657 1
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
1658
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
1659 10
   *                          default, code points will be returned as integers.
1660
   *
1661 10
   * @return   array The array of code points
1662
   */
1663 10
  public static function codepoints($arg, $u_style = false)
1664 1
  {
1665 1
    if (is_string($arg)) {
1666
      $arg = self::split($arg);
1667
    }
1668 10
1669 10
    $arg = array_map(
1670
        array(
1671 10
            '\\voku\\helper\\UTF8',
1672
            'ord',
1673
        ),
1674
        $arg
1675
    );
1676
1677
    if ($u_style) {
1678
      $arg = array_map(
1679
          array(
1680
              '\\voku\\helper\\UTF8',
1681
              'int_to_hex',
1682
          ),
1683
          $arg
1684
      );
1685
    }
1686
1687 19
    return $arg;
1688
  }
1689 19
1690
  /**
1691 19
   * Returns count of characters used in a string.
1692 5
   *
1693
   * @param    string $str The input string.
1694
   *
1695
   * @return   array An associative array of Character as keys and
1696 17
   *           their count as values.
1697
   */
1698 17
  public static function count_chars($str) // there is no $mode parameters
1699
  {
1700
    $array = array_count_values(self::split($str));
1701
1702
    ksort($array);
1703
1704
    return $array;
1705
  }
1706
1707
  /**
1708
   * Get a UTF-8 character from its decimal code representation.
1709
   *
1710 1
   * @param   int $code Code.
1711
   *
1712 1
   * @return  string
1713
   */
1714 1
  public static function decimal_to_chr($code)
1715 1
  {
1716
    self::checkForSupport();
1717
1718 1
    return mb_convert_encoding(
1719
        '&#x' . dechex($code) . ';',
1720 1
        'UTF-8',
1721
        'HTML-ENTITIES'
1722 1
    );
1723 1
  }
1724 1
1725 1
  /**
1726
   * Encode to UTF8 or LATIN1.
1727 1
   *
1728 1
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1729 1
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1730
   *
1731 1
   * @param string $encodingLabel ISO-8859-1 || UTF-8
1732
   * @param string $str
1733
   *
1734
   * @return false|string Will return false on error.
1735
   */
1736
  public static function encode($encodingLabel, $str)
1737
  {
1738
    $encodingLabel = self::normalizeEncoding($encodingLabel);
1739
1740
    if ($encodingLabel === 'UTF-8') {
1741 8
      return self::to_utf8($str);
1742
    }
1743 8
1744 8
    if ($encodingLabel === 'ISO-8859-1') {
1745
      return self::to_latin1($str);
1746 8
    }
1747
1748 8
    return false;
1749 2
  }
1750
1751
  /**
1752 8
   * Reads entire file into a string.
1753 1
   *
1754 1
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
1755 1
   *
1756
   * @link http://php.net/manual/en/function.file-get-contents.php
1757 8
   *
1758
   * @param string   $filename      <p>
1759
   *                                Name of the file to read.
1760
   *                                </p>
1761
   * @param int      $flags         [optional] <p>
1762
   *                                Prior to PHP 6, this parameter is called
1763
   *                                use_include_path and is a bool.
1764
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1765
   *                                to trigger include path
1766
   *                                search.
1767
   *                                </p>
1768
   *                                <p>
1769
   *                                The value of flags can be any combination of
1770
   *                                the following flags (with some restrictions), joined with the
1771
   *                                binary OR (|)
1772
   *                                operator.
1773
   *                                </p>
1774
   *                                <p>
1775
   *                                <table>
1776
   *                                Available flags
1777
   *                                <tr valign="top">
1778
   *                                <td>Flag</td>
1779
   *                                <td>Description</td>
1780
   *                                </tr>
1781
   *                                <tr valign="top">
1782
   *                                <td>
1783
   *                                FILE_USE_INCLUDE_PATH
1784
   *                                </td>
1785
   *                                <td>
1786
   *                                Search for filename in the include directory.
1787
   *                                See include_path for more
1788
   *                                information.
1789
   *                                </td>
1790
   *                                </tr>
1791
   *                                <tr valign="top">
1792
   *                                <td>
1793
   *                                FILE_TEXT
1794
   *                                </td>
1795
   *                                <td>
1796
   *                                As of PHP 6, the default encoding of the read
1797
   *                                data is UTF-8. You can specify a different encoding by creating a
1798
   *                                custom context or by changing the default using
1799
   *                                stream_default_encoding. This flag cannot be
1800
   *                                used with FILE_BINARY.
1801
   *                                </td>
1802
   *                                </tr>
1803
   *                                <tr valign="top">
1804
   *                                <td>
1805
   *                                FILE_BINARY
1806
   *                                </td>
1807
   *                                <td>
1808
   *                                With this flag, the file is read in binary mode. This is the default
1809
   *                                setting and cannot be used with FILE_TEXT.
1810
   *                                </td>
1811
   *                                </tr>
1812
   *                                </table>
1813
   *                                </p>
1814
   * @param resource $context       [optional] <p>
1815
   *                                A valid context resource created with
1816
   *                                stream_context_create. If you don't need to use a
1817
   *                                custom context, you can skip this parameter by &null;.
1818
   *                                </p>
1819
   * @param int      $offset        [optional] <p>
1820
   *                                The offset where the reading starts.
1821
   *                                </p>
1822
   * @param int      $maxlen        [optional] <p>
1823
   *                                Maximum length of data read. The default is to read until end
1824
   *                                of file is reached.
1825
   *                                </p>
1826
   * @param int      $timeout
1827
   *
1828
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
1829
   *                                default utf-8 chars
1830 14
   *
1831
   * @return string The function returns the read data or false on failure.
1832 14
   */
1833
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1834 14
  {
1835 3
    // init
1836
    $timeout = (int)$timeout;
1837
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1838 14
1839 4
    if ($timeout && $context === null) {
1840
      $context = stream_context_create(
1841
          array(
1842 14
              'http' =>
1843 2
                  array(
1844 2
                      'timeout' => $timeout,
1845 2
                  ),
1846
          )
1847
      );
1848 2
    }
1849
1850
    if (is_int($maxlen)) {
1851 14
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1852
    } else {
1853
      $data = file_get_contents($filename, $flags, $context, $offset);
1854 14
    }
1855 14
1856 14
    // return false on error
1857
    if ($data === false) {
1858 14
      return false;
1859 14
    }
1860
1861 14
    if ($convertToUtf8 === true) {
1862
      self::checkForSupport();
1863
1864
      $encoding = self::str_detect_encoding($data);
1865
      if ($encoding && $encoding != 'UTF-8') {
0 ignored issues
show
Bug Best Practice introduced by
The expression $encoding of type string|false is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1866
1867
        $data = mb_convert_encoding(
1868
            $data,
1869
            'UTF-8',
1870
            self::normalizeEncoding($encoding)
1871
        );
1872
      }
1873
1874
      $data = self::cleanup($data);
1875
    }
1876
1877
    // clean utf-8 string
1878
    return $data;
1879
  }
1880
1881
  /**
1882
   * Checks if a file starts with BOM character.
1883
   *
1884
   * @param    string $file_path Path to a valid file.
1885
   *
1886
   * @return   bool True if the file has BOM at the start, False otherwise.
1887
   */
1888
  public static function file_has_bom($file_path)
1889
  {
1890 20
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
1891
  }
1892 20
1893 2
  /**
1894
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1895 2
   *
1896 2
   * @param mixed  $var
1897
   * @param int    $normalization_form
1898 2
   * @param string $leading_combining
1899
   *
1900
   * @return mixed
1901 20
   */
1902
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
1903 20
  {
1904 9
    switch (gettype($var)) {
1905 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1906
        foreach ($var as $k => $v) {
1907 20
          /** @noinspection AlterInForeachInspection */
1908
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1909 20
        }
1910 20
        break;
1911 20 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1912
        foreach ($var as $k => $v) {
1913 20
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
1914 20
        }
1915 20
        break;
1916 20
      case 'string':
1917 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1918 20
          // Workaround https://bugs.php.net/65732
1919
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1920 18
        }
1921 17 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1922 17
          if (Normalizer::isNormalized($var, $normalization_form)) {
1923 17
            $n = '-';
1924 5
          } else {
1925 5
            $n = Normalizer::normalize($var, $normalization_form);
1926 5
1927
            if (isset($n[0])) {
1928
              $var = $n;
1929 20
            } else {
1930
              $var = self::encode('UTF-8', $var);
1931 18
            }
1932 14
1933 14
          }
1934 14
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
1935 8
            // Prevent leading combining chars
1936 8
            // for NFC-safe concatenations.
1937 8
            $var = $leading_combining . $var;
1938
          }
1939
        }
1940 19
        break;
1941
    }
1942 7
1943 1
    return $var;
1944 1
  }
1945 1
1946 6
  /**
1947 6
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1948 6
   *
1949
   * @param int    $type
1950
   * @param string $var
1951 7
   * @param int    $filter
1952 6
   * @param mixed  $option
1953 6
   *
1954 6
   * @return mixed
1955
   */
1956 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1957 20
  {
1958
    if (4 > func_num_args()) {
1959 2
      $var = filter_input($type, $var, $filter);
1960 2
    } else {
1961
      $var = filter_input($type, $var, $filter, $option);
1962
    }
1963 2
1964 2
    return self::filter($var);
1965 2
  }
1966
1967
  /**
1968 2
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1969 18
   *
1970
   * @param int   $type
1971 20
   * @param mixed $definition
1972
   * @param bool  $add_empty
1973 20
   *
1974
   * @return mixed
1975
   */
1976 20 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1977 20
  {
1978
    if (2 > func_num_args()) {
1979 3
      $a = filter_input_array($type);
1980 20
    } else {
1981
      $a = filter_input_array($type, $definition, $add_empty);
1982 20
    }
1983
1984
    return self::filter($a);
1985 20
  }
1986 20
1987 20
  /**
1988 2
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
1989 20
   *
1990
   * @param mixed $var
1991 20
   * @param int   $filter
1992
   * @param mixed $option
1993 20
   *
1994
   * @return mixed
1995
   */
1996 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1997
  {
1998
    if (3 > func_num_args()) {
1999
      $var = filter_var($var, $filter);
2000
    } else {
2001
      $var = filter_var($var, $filter, $option);
2002
    }
2003
2004
    return self::filter($var);
2005
  }
2006
2007
  /**
2008
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2009
   *
2010
   * @param array $data
2011
   * @param mixed $definition
2012
   * @param bool  $add_empty
2013
   *
2014
   * @return mixed
2015
   */
2016 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2017
  {
2018
    if (2 > func_num_args()) {
2019
      $a = filter_var_array($data);
2020
    } else {
2021
      $a = filter_var_array($data, $definition, $add_empty);
2022
    }
2023
2024
    return self::filter($a);
2025
  }
2026
2027
  /**
2028
   * Checks if the number of Unicode characters in a string are not
2029
   * more than the specified integer.
2030
   *
2031
   * @param    string $str      The original string to be checked.
2032
   * @param    int    $box_size The size in number of chars to be checked against string.
2033
   *
2034
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2035
   */
2036
  public static function fits_inside($str, $box_size)
2037
  {
2038
    return (self::strlen($str) <= $box_size);
2039
  }
2040
2041
  /**
2042
   * Fixing a broken UTF-8 string.
2043
   *
2044
   * @param string $str
2045
   *
2046
   * @return string
2047
   */
2048
  public static function fix_simple_utf8($str)
2049
  {
2050
    static $brokenUtf8ToUtf8Keys = null;
2051
    static $brokenUtf8ToUtf8Values = null;
2052
2053
    $str = (string)$str;
2054 1
2055
    if (!isset($str[0])) {
2056 1
      return '';
2057
    }
2058 1
2059
    if ($brokenUtf8ToUtf8Keys === null) {
2060
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2061
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2062
    }
2063
2064
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2065
  }
2066
2067
  /**
2068
   * Fix a double (or multiple) encoded UTF8 string.
2069
   *
2070
   * @param array|string $str
2071
   *
2072
   * @return string
2073
   */
2074
  public static function fix_utf8($str)
2075
  {
2076
    if (is_array($str)) {
2077
2078
      foreach ($str as $k => $v) {
2079
        /** @noinspection AlterInForeachInspection */
2080
        $str[$k] = self::fix_utf8($v);
2081
      }
2082
2083
      return $str;
2084
    }
2085
2086
    $last = '';
2087
    while ($last <> $str) {
2088
      $last = $str;
2089
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2089 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2090
    }
2091
2092
    return $str;
2093
  }
2094
2095
  /**
2096
   * Get character of a specific character.
2097
   *
2098
   * @param   string $chr Character.
2099
   *
2100
   * @return  string 'RTL' or 'LTR'
2101
   */
2102
  public static function getCharDirection($chr)
2103
  {
2104
    $c = static::chr_to_decimal($chr);
2105
2106
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2107
      return 'LTR';
2108
    }
2109
2110
    if (0x85e >= $c) {
2111
2112
      if (0x5be === $c ||
2113
          0x5c0 === $c ||
2114
          0x5c3 === $c ||
2115
          0x5c6 === $c ||
2116
          (0x5d0 <= $c && 0x5ea >= $c) ||
2117
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2118
          0x608 === $c ||
2119
          0x60b === $c ||
2120
          0x60d === $c ||
2121
          0x61b === $c ||
2122
          (0x61e <= $c && 0x64a >= $c) ||
2123
          (0x66d <= $c && 0x66f >= $c) ||
2124
          (0x671 <= $c && 0x6d5 >= $c) ||
2125
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2126
          (0x6ee <= $c && 0x6ef >= $c) ||
2127
          (0x6fa <= $c && 0x70d >= $c) ||
2128
          0x710 === $c ||
2129
          (0x712 <= $c && 0x72f >= $c) ||
2130
          (0x74d <= $c && 0x7a5 >= $c) ||
2131
          0x7b1 === $c ||
2132
          (0x7c0 <= $c && 0x7ea >= $c) ||
2133
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2134
          0x7fa === $c ||
2135
          (0x800 <= $c && 0x815 >= $c) ||
2136
          0x81a === $c ||
2137
          0x824 === $c ||
2138
          0x828 === $c ||
2139
          (0x830 <= $c && 0x83e >= $c) ||
2140
          (0x840 <= $c && 0x858 >= $c) ||
2141 2
          0x85e === $c
2142
      ) {
2143
        return 'RTL';
2144 2
      }
2145 2
2146
    } elseif (0x200f === $c) {
2147 2
2148 2
      return 'RTL';
2149
2150
    } elseif (0xfb1d <= $c) {
2151
2152 2
      if (0xfb1d === $c ||
2153 2
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2154
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2155 2
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2156 2
          0xfb3e === $c ||
2157
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2158 2
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2159 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2160 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2161 2
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2162
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2163
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2164
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2165 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
2166
          (0x10800 <= $c && 0x10805 >= $c) ||
2167
          0x10808 === $c ||
2168
          (0x1080a <= $c && 0x10835 >= $c) ||
2169 2
          (0x10837 <= $c && 0x10838 >= $c) ||
2170 2
          0x1083c === $c ||
2171
          (0x1083f <= $c && 0x10855 >= $c) ||
2172 2
          (0x10857 <= $c && 0x1085f >= $c) ||
2173 2
          (0x10900 <= $c && 0x1091b >= $c) ||
2174 1
          (0x10920 <= $c && 0x10939 >= $c) ||
2175 1
          0x1093f === $c ||
2176
          0x10a00 === $c ||
2177 2
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2178 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2179
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2180
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2181 2
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2182
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2183
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2184
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2185
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2186
          (0x10b78 <= $c && 0x10b7f >= $c)
2187
      ) {
2188
        return 'RTL';
2189
      }
2190
    }
2191 2
2192
    return 'LTR';
2193
  }
2194 2
2195
  /**
2196
   * get data from "/data/*.ser"
2197
   *
2198 2
   * @param string $file
2199
   *
2200
   * @return bool|string|array|int false on error
2201
   */
2202
  protected static function getData($file)
2203
  {
2204 2
    $file = __DIR__ . '/data/' . $file . '.ser';
2205
    if (file_exists($file)) {
2206
      return unserialize(file_get_contents($file));
2207
    } else {
2208
      return false;
2209
    }
2210 2
  }
2211
2212
  /**
2213
   * Creates a random string of UTF-8 characters.
2214
   *
2215
   * @param    int $len The length of string in characters.
2216 2
   *
2217
   * @return   string String consisting of random characters.
2218
   */
2219
  public static function hash($len = 8)
2220
  {
2221
    static $chars = array();
2222 2
    static $chars_len = null;
2223
2224
    if ($len <= 0) {
2225
      return '';
2226 2
    }
2227 2
2228
    // init
2229
    self::checkForSupport();
2230
2231 2
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2232 2
      if (self::$support['pcre_utf8'] === true) {
2233 2
        $chars = array_map(
2234 2
            array(
2235 2
                '\\voku\\helper\\UTF8',
2236 2
                'chr',
2237
            ),
2238 2
            range(48, 79)
2239 1
        );
2240 1
2241 1
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2242 1
2243 1
        $chars = array_values(array_filter($chars));
2244
      } else {
2245 1
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2246
      }
2247
2248 1
      $chars_len = count($chars);
2249
    }
2250 2
2251
    $hash = '';
2252
2253
    for (; $len; --$len) {
2254 2
      $hash .= $chars[mt_rand() % $chars_len];
2255
    }
2256
2257
    return $hash;
2258
  }
2259
2260
  /**
2261
   * Converts hexadecimal U+xxxx code point representation to Integer.
2262
   *
2263
   * INFO: opposite to UTF8::int_to_hex( )
2264 3
   *
2265
   * @param    string $str The hexadecimal code point representation.
2266
   *
2267 3
   * @return   int The code point, or 0 on failure.
2268
   */
2269
  public static function hex_to_int($str)
2270 3
  {
2271
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2272 3
      return intval($match[1], 16);
2273 3
    }
2274 3
2275 3
    return 0;
2276 2
  }
2277
2278 3
  /**
2279
   * Converts a UTF-8 string to a series of HTML numbered entities.
2280
   *
2281
   * e.g.: &#123;&#39;&#1740;
2282
   *
2283
   * @param  string $str The Unicode string to be encoded as numbered entities.
2284
   *
2285
   * @return string HTML numbered entities.
2286
   */
2287
  public static function html_encode($str)
2288
  {
2289 1
    return implode(
2290
        array_map(
2291 1
            array(
2292 1
                '\\voku\\helper\\UTF8',
2293
                'single_chr_html_encode',
2294 1
            ),
2295 1
            self::split($str)
2296 1
        )
2297 1
    );
2298 1
  }
2299 1
2300 1
  /**
2301 1
   * UTF-8 version of html_entity_decode()
2302 1
   *
2303 1
   * The reason we are not using html_entity_decode() by itself is because
2304 1
   * while it is not technically correct to leave out the semicolon
2305 1
   * at the end of an entity most browsers will still interpret the entity
2306 1
   * correctly. html_entity_decode() does not convert entities without
2307 1
   * semicolons, so we are left with our own little solution here. Bummer.
2308
   *
2309 1
   * Convert all HTML entities to their applicable characters
2310 1
   *
2311 1
   * @link http://php.net/manual/en/function.html-entity-decode.php
2312 1
   *
2313 1
   * @param string $str      <p>
2314 1
   *                         The input string.
2315 1
   *                         </p>
2316 1
   * @param int    $flags    [optional] <p>
2317 1
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2318 1
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2319 1
   *                         <table>
2320 1
   *                         Available <i>flags</i> constants
2321 1
   *                         <tr valign="top">
2322 1
   *                         <td>Constant Name</td>
2323
   *                         <td>Description</td>
2324 1
   *                         </tr>
2325 1
   *                         <tr valign="top">
2326 1
   *                         <td><b>ENT_COMPAT</b></td>
2327
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2328 1
   *                         </tr>
2329
   *                         <tr valign="top">
2330
   *                         <td><b>ENT_QUOTES</b></td>
2331
   *                         <td>Will convert both double and single quotes.</td>
2332 1
   *                         </tr>
2333
   *                         <tr valign="top">
2334 1
   *                         <td><b>ENT_NOQUOTES</b></td>
2335
   *                         <td>Will leave both double and single quotes unconverted.</td>
2336
   *                         </tr>
2337
   *                         <tr valign="top">
2338
   *                         <td><b>ENT_HTML401</b></td>
2339
   *                         <td>
2340
   *                         Handle code as HTML 4.01.
2341
   *                         </td>
2342
   *                         </tr>
2343
   *                         <tr valign="top">
2344
   *                         <td><b>ENT_XML1</b></td>
2345 2
   *                         <td>
2346
   *                         Handle code as XML 1.
2347 2
   *                         </td>
2348
   *                         </tr>
2349 2
   *                         <tr valign="top">
2350
   *                         <td><b>ENT_XHTML</b></td>
2351 2
   *                         <td>
2352
   *                         Handle code as XHTML.
2353
   *                         </td>
2354
   *                         </tr>
2355
   *                         <tr valign="top">
2356
   *                         <td><b>ENT_HTML5</b></td>
2357
   *                         <td>
2358
   *                         Handle code as HTML 5.
2359
   *                         </td>
2360
   *                         </tr>
2361 1
   *                         </table>
2362
   *                         </p>
2363 1
   * @param string $encoding [optional] <p>
2364 1
   *                         Encoding to use.
2365
   *                         </p>
2366 1
   *
2367 1
   * @return string the decoded string.
2368 1
   */
2369 1
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2370 1
  {
2371 1
    $str = (string)$str;
2372 1
2373 1
    if (!isset($str[0])) {
2374 1
      return '';
2375
    }
2376
2377 1
    if (strpos($str, '&') === false) {
2378 1
      return $str;
2379 1
    }
2380
2381 1
    if ($flags === null) {
2382 1
      if (Bootup::is_php('5.4') === true) {
2383 1
        $flags = ENT_COMPAT | ENT_HTML5;
2384
      } else {
2385
        $flags = ENT_COMPAT;
2386
      }
2387
    }
2388
2389
    do {
2390
      $str_compare = $str;
2391
2392
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2393
2394
      // decode numeric & UTF16 two byte entities
2395
      $str = html_entity_decode(
2396 1
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2397
          $flags,
2398
          $encoding
2399
      );
2400
2401
    } while ($str_compare !== $str);
2402
2403
    return $str;
2404 1
  }
2405
2406 1
  /**
2407
   * Callback function for preg_replace_callback use.
2408
   *
2409
   * @param  array $matches PREG matches
2410
   *
2411
   * @return string
2412
   */
2413
  protected static function entityCallback($matches)
2414
  {
2415
    self::checkForSupport();
2416 5
2417
    $return = mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2418 5
2419
    if ($return === "'") {
2420 5
      return '&#x27;';
2421
    }
2422
2423
    return $return;
2424
  }
2425 5
2426
  /**
2427
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2428 5
   *
2429
   * @link http://php.net/manual/en/function.htmlentities.php
2430
   *
2431
   * @param string $str           <p>
2432
   *                              The input string.
2433
   *                              </p>
2434
   * @param int    $flags         [optional] <p>
2435 5
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2436
   *                              invalid code unit sequences and the used document type. The default is
2437 5
   *                              ENT_COMPAT | ENT_HTML401.
2438
   *                              <table>
2439
   *                              Available <i>flags</i> constants
2440
   *                              <tr valign="top">
2441
   *                              <td>Constant Name</td>
2442
   *                              <td>Description</td>
2443
   *                              </tr>
2444
   *                              <tr valign="top">
2445
   *                              <td><b>ENT_COMPAT</b></td>
2446
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2447
   *                              </tr>
2448
   *                              <tr valign="top">
2449
   *                              <td><b>ENT_QUOTES</b></td>
2450
   *                              <td>Will convert both double and single quotes.</td>
2451
   *                              </tr>
2452
   *                              <tr valign="top">
2453
   *                              <td><b>ENT_NOQUOTES</b></td>
2454
   *                              <td>Will leave both double and single quotes unconverted.</td>
2455
   *                              </tr>
2456
   *                              <tr valign="top">
2457
   *                              <td><b>ENT_IGNORE</b></td>
2458
   *                              <td>
2459
   *                              Silently discard invalid code unit sequences instead of returning
2460
   *                              an empty string. Using this flag is discouraged as it
2461
   *                              may have security implications.
2462
   *                              </td>
2463
   *                              </tr>
2464
   *                              <tr valign="top">
2465
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2466
   *                              <td>
2467
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2468
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2469
   *                              </td>
2470
   *                              </tr>
2471
   *                              <tr valign="top">
2472
   *                              <td><b>ENT_DISALLOWED</b></td>
2473
   *                              <td>
2474
   *                              Replace invalid code points for the given document type with a
2475
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2476
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2477
   *                              instance, to ensure the well-formedness of XML documents with
2478
   *                              embedded external content.
2479
   *                              </td>
2480
   *                              </tr>
2481
   *                              <tr valign="top">
2482
   *                              <td><b>ENT_HTML401</b></td>
2483
   *                              <td>
2484
   *                              Handle code as HTML 4.01.
2485
   *                              </td>
2486
   *                              </tr>
2487
   *                              <tr valign="top">
2488 1
   *                              <td><b>ENT_XML1</b></td>
2489
   *                              <td>
2490 1
   *                              Handle code as XML 1.
2491
   *                              </td>
2492 1
   *                              </tr>
2493
   *                              <tr valign="top">
2494
   *                              <td><b>ENT_XHTML</b></td>
2495
   *                              <td>
2496
   *                              Handle code as XHTML.
2497
   *                              </td>
2498
   *                              </tr>
2499
   *                              <tr valign="top">
2500
   *                              <td><b>ENT_HTML5</b></td>
2501
   *                              <td>
2502
   *                              Handle code as HTML 5.
2503
   *                              </td>
2504
   *                              </tr>
2505
   *                              </table>
2506
   *                              </p>
2507
   * @param string $encoding      [optional] <p>
2508
   *                              Like <b>htmlspecialchars</b>,
2509
   *                              <b>htmlentities</b> takes an optional third argument
2510
   *                              <i>encoding</i> which defines encoding used in
2511
   *                              conversion.
2512
   *                              Although this argument is technically optional, you are highly
2513
   *                              encouraged to specify the correct value for your code.
2514
   *                              </p>
2515
   * @param bool   $double_encode [optional] <p>
2516
   *                              When <i>double_encode</i> is turned off PHP will not
2517
   *                              encode existing html entities. The default is to convert everything.
2518
   *                              </p>
2519
   *
2520
   *
2521
   * @return string the encoded string.
2522
   * </p>
2523 1
   * <p>
2524
   * If the input <i>string</i> contains an invalid code unit
2525 1
   * sequence within the given <i>encoding</i> an empty string
2526
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2527 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2528
   */
2529
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2530
  {
2531
    return htmlentities($str, $flags, $encoding, $double_encode);
2532
  }
2533
2534
  /**
2535
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
2536
   *
2537
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2538
   *
2539 1
   * @param string $str           <p>
2540
   *                              The string being converted.
2541 1
   *                              </p>
2542 1
   * @param int    $flags         [optional] <p>
2543 1
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2544 1
   *                              invalid code unit sequences and the used document type. The default is
2545
   *                              ENT_COMPAT | ENT_HTML401.
2546
   *                              <table>
2547 1
   *                              Available <i>flags</i> constants
2548
   *                              <tr valign="top">
2549
   *                              <td>Constant Name</td>
2550
   *                              <td>Description</td>
2551
   *                              </tr>
2552
   *                              <tr valign="top">
2553
   *                              <td><b>ENT_COMPAT</b></td>
2554
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2555
   *                              </tr>
2556
   *                              <tr valign="top">
2557
   *                              <td><b>ENT_QUOTES</b></td>
2558
   *                              <td>Will convert both double and single quotes.</td>
2559 6
   *                              </tr>
2560
   *                              <tr valign="top">
2561 6
   *                              <td><b>ENT_NOQUOTES</b></td>
2562 6
   *                              <td>Will leave both double and single quotes unconverted.</td>
2563 1
   *                              </tr>
2564
   *                              <tr valign="top">
2565
   *                              <td><b>ENT_IGNORE</b></td>
2566 1
   *                              <td>
2567 1
   *                              Silently discard invalid code unit sequences instead of returning
2568 6
   *                              an empty string. Using this flag is discouraged as it
2569 1
   *                              may have security implications.
2570 1
   *                              </td>
2571 1
   *                              </tr>
2572 1
   *                              <tr valign="top">
2573 6
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2574 6
   *                              <td>
2575
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2576
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2577
   *                              </td>
2578 6
   *                              </tr>
2579 6
   *                              <tr valign="top">
2580 1
   *                              <td><b>ENT_DISALLOWED</b></td>
2581 1
   *                              <td>
2582 6
   *                              Replace invalid code points for the given document type with a
2583
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2584 6
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2585 4
   *                              instance, to ensure the well-formedness of XML documents with
2586 4
   *                              embedded external content.
2587 4
   *                              </td>
2588
   *                              </tr>
2589
   *                              <tr valign="top">
2590
   *                              <td><b>ENT_HTML401</b></td>
2591 6
   *                              <td>
2592
   *                              Handle code as HTML 4.01.
2593
   *                              </td>
2594
   *                              </tr>
2595
   *                              <tr valign="top">
2596 6
   *                              <td><b>ENT_XML1</b></td>
2597 6
   *                              <td>
2598 6
   *                              Handle code as XML 1.
2599
   *                              </td>
2600 6
   *                              </tr>
2601
   *                              <tr valign="top">
2602
   *                              <td><b>ENT_XHTML</b></td>
2603
   *                              <td>
2604
   *                              Handle code as XHTML.
2605
   *                              </td>
2606
   *                              </tr>
2607
   *                              <tr valign="top">
2608
   *                              <td><b>ENT_HTML5</b></td>
2609
   *                              <td>
2610
   *                              Handle code as HTML 5.
2611
   *                              </td>
2612
   *                              </tr>
2613
   *                              </table>
2614 11
   *                              </p>
2615
   * @param string $encoding      [optional] <p>
2616 11
   *                              Defines encoding used in conversion.
2617
   *                              </p>
2618 11
   *                              <p>
2619 11
   *                              For the purposes of this function, the encodings
2620
   *                              ISO-8859-1, ISO-8859-15,
2621
   *                              UTF-8, cp866,
2622 1
   *                              cp1251, cp1252, and
2623 1
   *                              KOI8-R are effectively equivalent, provided the
2624
   *                              <i>string</i> itself is valid for the encoding, as
2625
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2626
   *                              the same positions in all of these encodings.
2627
   *                              </p>
2628
   * @param bool   $double_encode [optional] <p>
2629
   *                              When <i>double_encode</i> is turned off PHP will not
2630
   *                              encode existing html entities, the default is to convert everything.
2631
   *                              </p>
2632
   *
2633
   * @return string The converted string.
2634
   * </p>
2635
   * <p>
2636 11
   * If the input <i>string</i> contains an invalid code unit
2637
   * sequence within the given <i>encoding</i> an empty string
2638 11
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2639 11
   * <b>ENT_SUBSTITUTE</b> flags are set.
2640
   */
2641 11
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2642 11
  {
2643 11
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2644 11
  }
2645 11
2646 11
  /**
2647 11
   * checks whether iconv is available on the server
2648 11
   *
2649 11
   * @return   bool True if available, False otherwise
2650 11
   */
2651 11
  public static function iconv_loaded()
2652
  {
2653
    return extension_loaded('iconv') ? true : false;
2654
  }
2655 11
2656
  /**
2657
   * Converts Integer to hexadecimal U+xxxx code point representation.
2658
   *
2659
   * @param    int    $int The integer to be converted to hexadecimal code point.
2660
   * @param    string $pfix
2661
   *
2662
   * @return   string The code point, or empty string on failure.
2663
   */
2664
  public static function int_to_hex($int, $pfix = 'U+')
2665 2
  {
2666
    if (ctype_digit((string)$int)) {
2667 2
      $hex = dechex((int)$int);
2668
2669
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2670
2671
      return $pfix . $hex;
2672
    }
2673
2674
    return '';
2675
  }
2676
2677 2
  /**
2678
   * checks whether intl is available on the server
2679 2
   *
2680
   * @return   bool True if available, False otherwise
2681 1
   */
2682
  public static function intl_loaded()
2683 1
  {
2684 1
    return extension_loaded('intl') ? true : false;
2685
  }
2686 1
2687 2
  /**
2688 2
   * alias for "UTF8::is_ascii()"
2689
   *
2690
   * @param string $str
2691
   *
2692
   * @return boolean
2693
   */
2694
  public static function isAscii($str)
2695
  {
2696
    return self::is_ascii($str);
2697
  }
2698
2699
  /**
2700
   * alias for "UTF8::is_base64"
2701 6
   *
2702
   * @param string $str
2703 6
   *
2704 6
   * @return bool
2705
   */
2706 6
  public static function isBase64($str)
2707
  {
2708 6
    return self::is_base64($str);
2709 5
  }
2710
2711
  /**
2712
   * alias for "UTF8::is_bom"
2713 6
   *
2714
   * @param string $utf8_chr
2715 6
   *
2716
   * @return boolean
2717 6
   */
2718 1
  public static function isBom($utf8_chr)
2719 1
  {
2720 1
    return self::is_bom($utf8_chr);
2721
  }
2722 6
2723
  /**
2724
   * Try to check if a string is a json-string...
2725
   *
2726
   * @param $str
2727
   *
2728
   * @return bool
2729
   *
2730
   * @deprecated
2731
   */
2732
  public static function isJson($str)
2733
  {
2734
    $str = (string)$str;
2735
2736
    if (!isset($str[0])) {
2737
      return false;
2738
    }
2739
2740
    if (
2741
        is_object(json_decode($str))
2742
        &&
2743
        json_last_error() == JSON_ERROR_NONE
2744
    ) {
2745
      return true;
2746
    } else {
2747
      return false;
2748
    }
2749
  }
2750
2751
  /**
2752
   * alias for "UTF8::is_utf8"
2753 6
   *
2754
   * @param string $str
2755 6
   *
2756
   * @return bool
2757 6
   */
2758 6
  public static function isUtf8($str)
2759
  {
2760
    return self::is_utf8($str);
2761 5
  }
2762 5
2763
  /**
2764 5
   * Checks if a string is 7 bit ASCII.
2765 1
   *
2766 1
   * @param    string $str The string to check.
2767 1
   *
2768
   * @return   bool <strong>true</strong> if it is ASCII<br />
2769 5
   *                <strong>false</strong> otherwise
2770
   */
2771
  public static function is_ascii($str)
2772
  {
2773
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2774
  }
2775
2776
  /**
2777
   * Returns true if the string is base64 encoded, false otherwise.
2778
   *
2779
   * @param string $str
2780
   *
2781
   * @return bool Whether or not $str is base64 encoded
2782
   */
2783
  public static function is_base64($str)
2784
  {
2785
    $str = (string)$str;
2786
2787
    if (!isset($str[0])) {
2788
      return false;
2789
    }
2790
2791
    if (base64_encode(base64_decode($str, true)) === $str) {
2792
      return true;
2793
    } else {
2794
      return false;
2795
    }
2796
  }
2797
2798
  /**
2799
   * Check if the input is binary... (is look like a hack)
2800
   *
2801
   * @param string $input
2802
   *
2803
   * @return bool
2804
   */
2805
  public static function is_binary($input)
2806
  {
2807
2808
    $testLength = strlen($input);
2809 1
2810
    if (
2811 1
        preg_match('~^[01]+$~', $input)
2812
        ||
2813 1
        substr_count($input, "\x00") > 0
2814 1
        ||
2815 1
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2816
    ) {
2817
      return true;
2818
    } else {
2819 1
      return false;
2820
    }
2821
  }
2822
2823
  /**
2824
   * Check if the file is binary.
2825
   *
2826
   * @param string $file
2827
   *
2828
   * @return boolean
2829
   */
2830
  public static function is_binary_file($file)
2831
  {
2832
    try {
2833
      $fp = fopen($file, 'r');
2834
      $block = fread($fp, 512);
2835
      fclose($fp);
2836
    } catch (\Exception $e) {
2837
      $block = '';
2838
    }
2839
2840
    return self::is_binary($block);
2841
  }
2842
2843
  /**
2844
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
2845
   *
2846
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2847
   *
2848
   * @param    string $utf8_chr The input string.
2849
   *
2850
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
2851
   */
2852
  public static function is_bom($utf8_chr)
2853
  {
2854
    return ($utf8_chr === self::bom());
2855
  }
2856
2857
  /**
2858 2
   * Check if the string is UTF-16.
2859
   *
2860 2
   * @param string $str
2861
   *
2862 2
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2863 2
   */
2864 2 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2865
  {
2866
    if (self::is_binary($str)) {
2867
      self::checkForSupport();
2868 2
2869
      $maybeUTF16LE = 0;
2870
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2871
      if ($test !== false && strlen($test) > 1) {
2872
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2873
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2874
        if ($test3 == $test) {
2875
          $strChars = self::count_chars($str);
2876
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2877
            if (in_array($test3char, $strChars, true) === true) {
2878
              $maybeUTF16LE++;
2879
            }
2880
          }
2881
        }
2882
      }
2883
2884
      $maybeUTF16BE = 0;
2885
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2886
      if ($test !== false && strlen($test) > 1) {
2887
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2888
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2889
        if ($test3 == $test) {
2890
          $strChars = self::count_chars($str);
2891
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2892
            if (in_array($test3char, $strChars, true) === true) {
2893
              $maybeUTF16BE++;
2894
            }
2895
          }
2896
        }
2897
      }
2898
2899
      if ($maybeUTF16BE != $maybeUTF16LE) {
2900
        if ($maybeUTF16LE > $maybeUTF16BE) {
2901
          return 1;
2902
        } else {
2903
          return 2;
2904
        }
2905
      }
2906
2907
    }
2908
2909
    return false;
2910
  }
2911
2912
  /**
2913
   * Check if the string is UTF-32.
2914
   *
2915
   * @param string $str
2916
   *
2917
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2918
   */
2919 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2920
  {
2921
    if (self::is_binary($str)) {
2922
      self::checkForSupport();
2923
2924
      $maybeUTF32LE = 0;
2925
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2926
      if ($test !== false && strlen($test) > 1) {
2927
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2928
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2929
        if ($test3 == $test) {
2930
          $strChars = self::count_chars($str);
2931
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2932
            if (in_array($test3char, $strChars, true) === true) {
2933
              $maybeUTF32LE++;
2934
            }
2935
          }
2936
        }
2937
      }
2938
2939
      $maybeUTF32BE = 0;
2940
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2941
      if ($test !== false && strlen($test) > 1) {
2942
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2943
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2944 8
        if ($test3 == $test) {
2945
          $strChars = self::count_chars($str);
2946 8
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2947 5
            if (in_array($test3char, $strChars, true) === true) {
2948 5
              $maybeUTF32BE++;
2949 8
            }
2950
          }
2951
        }
2952
      }
2953
2954
      if ($maybeUTF32BE != $maybeUTF32LE) {
2955
        if ($maybeUTF32LE > $maybeUTF32BE) {
2956
          return 1;
2957
        } else {
2958
          return 2;
2959
        }
2960 1
      }
2961
2962 1
    }
2963 1
2964 1
    return false;
2965
  }
2966 1
2967
  /**
2968
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2969
   *
2970
   * @see    http://hsivonen.iki.fi/php-utf8/
2971
   *
2972
   * @param    string $str The string to be checked.
2973
   *
2974
   * @return   bool
2975
   */
2976
  public static function is_utf8($str)
2977
  {
2978
    $str = (string)$str;
2979
2980
    if (!isset($str[0])) {
2981
      return true;
2982 2
    }
2983
2984 2
    if (self::pcre_utf8_support() !== true) {
2985 2
2986
      // If even just the first character can be matched, when the /u
2987
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2988 2
      // invalid, nothing at all will match, even if the string contains
2989
      // some valid sequences
2990
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
2991
2992
    } else {
2993
2994
      $mState = 0; // cached expected number of octets after the current octet
2995
      // until the beginning of the next UTF8 character sequence
2996
      $mUcs4 = 0; // cached Unicode character
2997
      $mBytes = 1; // cached expected number of octets in the current sequence
2998 2
      $len = strlen($str);
2999
3000 2
      /** @noinspection ForeachInvariantsInspection */
3001 1
      for ($i = 0; $i < $len; $i++) {
3002
        $in = ord($str[$i]);
3003
        if ($mState == 0) {
3004 2
          // When mState is zero we expect either a US-ASCII character or a
3005
          // multi-octet sequence.
3006
          if (0 == (0x80 & $in)) {
3007
            // US-ASCII, pass straight through.
3008
            $mBytes = 1;
3009 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3010
            // First octet of 2 octet sequence.
3011
            $mUcs4 = $in;
3012
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3013
            $mState = 1;
3014
            $mBytes = 2;
3015 15
          } elseif (0xE0 == (0xF0 & $in)) {
3016
            // First octet of 3 octet sequence.
3017 15
            $mUcs4 = $in;
3018 2
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3019
            $mState = 2;
3020
            $mBytes = 3;
3021 14 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3022 14
            // First octet of 4 octet sequence.
3023
            $mUcs4 = $in;
3024 14
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3025 2
            $mState = 3;
3026
            $mBytes = 4;
3027
          } elseif (0xF8 == (0xFC & $in)) {
3028 13
            /* First octet of 5 octet sequence.
3029 7
            *
3030
            * This is illegal because the encoded codepoint must be either
3031
            * (a) not the shortest form or
3032 12
            * (b) outside the Unicode range of 0-0x10FFFF.
3033 8
            * Rather than trying to resynchronize, we will carry on until the end
3034
            * of the sequence and let the later error handling code catch it.
3035
            */
3036 10
            $mUcs4 = $in;
3037
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3038
            $mState = 4;
3039
            $mBytes = 5;
3040 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3041
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3042
            $mUcs4 = $in;
3043
            $mUcs4 = ($mUcs4 & 1) << 30;
3044
            $mState = 5;
3045
            $mBytes = 6;
3046
          } else {
3047
            /* Current octet is neither in the US-ASCII range nor a legal first
3048 1
             * octet of a multi-octet sequence.
3049
             */
3050 1
            return false;
3051 1
          }
3052
        } else {
3053 1
          // When mState is non-zero, we expect a continuation of the multi-octet
3054 1
          // sequence
3055 1
          if (0x80 == (0xC0 & $in)) {
3056 1
            // Legal continuation.
3057 1
            $shift = ($mState - 1) * 6;
3058 1
            $tmp = $in;
3059
            $tmp = ($tmp & 0x0000003F) << $shift;
3060
            $mUcs4 |= $tmp;
3061
            /**
3062
             * End of the multi-octet sequence. mUcs4 now contains the final
3063
             * Unicode code point to be output
3064
             */
3065
            if (0 == --$mState) {
3066
              /*
3067
              * Check for illegal sequences and code points.
3068 1
              */
3069
              // From Unicode 3.1, non-shortest form is illegal
3070 1
              if (
3071
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
3072
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
3073
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
3074
                  (4 < $mBytes) ||
3075
                  // From Unicode 3.2, surrogate characters are illegal.
3076
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
3077
                  // Code points outside the Unicode range are illegal.
3078
                  ($mUcs4 > 0x10FFFF)
3079
              ) {
3080
                return false;
3081
              }
3082 2
              // initialize UTF8 cache
3083
              $mState = 0;
3084 2
              $mUcs4 = 0;
3085
              $mBytes = 1;
3086
            }
3087
          } else {
3088
            /**
3089
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3090
             * Incomplete multi-octet sequence.
3091
             */
3092 2
            return false;
3093
          }
3094 2
        }
3095
      }
3096
3097
      return true;
3098
    }
3099
  }
3100
3101
  /**
3102
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3103
   * Decodes a JSON string
3104
   *
3105
   * @link http://php.net/manual/en/function.json-decode.php
3106
   *
3107
   * @param string $json    <p>
3108
   *                        The <i>json</i> string being decoded.
3109
   *                        </p>
3110
   *                        <p>
3111
   *                        This function only works with UTF-8 encoded strings.
3112
   *                        </p>
3113
   *                        <p>PHP implements a superset of
3114
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3115
   *                        only supports these values when they are nested inside an array or an object.
3116 1
   *                        </p>
3117
   * @param bool   $assoc   [optional] <p>
3118 1
   *                        When <b>TRUE</b>, returned objects will be converted into
3119
   *                        associative arrays.
3120
   *                        </p>
3121
   * @param int    $depth   [optional] <p>
3122
   *                        User specified recursion depth.
3123
   *                        </p>
3124
   * @param int    $options [optional] <p>
3125
   *                        Bitmask of JSON decode options. Currently only
3126
   *                        <b>JSON_BIGINT_AS_STRING</b>
3127
   *                        is supported (default is to cast large integers as floats)
3128
   *                        </p>
3129
   *
3130
   * @return mixed the value encoded in <i>json</i> in appropriate
3131
   * PHP type. Values true, false and
3132
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3133
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3134
   * <i>json</i> cannot be decoded or if the encoded
3135
   * data is deeper than the recursion limit.
3136
   */
3137
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3138
  {
3139
    $json = self::filter($json);
3140
3141
    if (Bootup::is_php('5.4') === true) {
3142
      $json = json_decode($json, $assoc, $depth, $options);
3143
    } else {
3144
      $json = json_decode($json, $assoc, $depth);
3145
    }
3146
3147
    return $json;
3148
  }
3149
3150
  /**
3151
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3152
   * Returns the JSON representation of a value
3153
   *
3154
   * @link http://php.net/manual/en/function.json-encode.php
3155
   *
3156
   * @param mixed $value   <p>
3157
   *                       The <i>value</i> being encoded. Can be any type except
3158
   *                       a resource.
3159
   *                       </p>
3160
   *                       <p>
3161
   *                       All string data must be UTF-8 encoded.
3162
   *                       </p>
3163
   *                       <p>PHP implements a superset of
3164
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3165 4
   *                       only supports these values when they are nested inside an array or an object.
3166
   *                       </p>
3167 4
   * @param int   $options [optional] <p>
3168
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3169
   *                       <b>JSON_HEX_TAG</b>,
3170
   *                       <b>JSON_HEX_AMP</b>,
3171 4
   *                       <b>JSON_HEX_APOS</b>,
3172 4
   *                       <b>JSON_NUMERIC_CHECK</b>,
3173 4
   *                       <b>JSON_PRETTY_PRINT</b>,
3174
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3175 4
   *                       <b>JSON_FORCE_OBJECT</b>,
3176 4
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3177 4
   *                       constants is described on
3178 4
   *                       the JSON constants page.
3179
   *                       </p>
3180 4
   * @param int   $depth   [optional] <p>
3181
   *                       Set the maximum depth. Must be greater than zero.
3182
   *                       </p>
3183
   *
3184 4
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3185
   */
3186 4
  public static function json_encode($value, $options = 0, $depth = 512)
3187
  {
3188
    $value = self::filter($value);
3189
3190
    if (Bootup::is_php('5.5')) {
3191 4
      $json = json_encode($value, $options, $depth);
3192 4
    } else {
3193
      $json = json_encode($value, $options);
3194 4
    }
3195 4
3196 4
    return $json;
3197 4
  }
3198 4
3199
  /**
3200 4
   * Makes string's first char lowercase.
3201 4
   *
3202 4
   * @param    string $str The input string
3203 4
   *
3204
   * @return   string The resulting string
3205 4
   */
3206 3
  public static function lcfirst($str)
3207 3
  {
3208 3
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3209 3
  }
3210
3211 3
  /**
3212
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3213
   *
3214
   * WARNING: This is much slower then "ltrim()" !!!!
3215 3
   *
3216 3
   * @param    string $str   The string to be trimmed
3217
   * @param    string $chars Optional characters to be stripped
3218 4
   *
3219
   * @return   string The string with unwanted characters stripped from the left
3220
   */
3221 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3222
  {
3223
    $str = (string)$str;
3224
3225
    if (!isset($str[0])) {
3226
      return '';
3227
    }
3228
3229
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3230
3231
    return preg_replace("/^{$chars}+/u", '', $str);
3232
  }
3233
3234
  /**
3235
   * Returns the UTF-8 character with the maximum code point in the given data.
3236
   *
3237
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3238
   *
3239
   * @return   string The character with the highest code point than others.
3240
   */
3241 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3242
  {
3243 11
    if (is_array($arg)) {
3244
      $arg = implode($arg);
3245 11
    }
3246 11
3247
    return self::chr(max(self::codepoints($arg)));
3248 11
  }
3249 2
3250
  /**
3251
   * Calculates and returns the maximum number of bytes taken by any
3252
   * UTF-8 encoded character in the given string.
3253 10
   *
3254 10
   * @param    string $str The original Unicode string.
3255
   *
3256
   * @return   int An array of byte lengths of each character.
3257
   */
3258 10
  public static function max_chr_width($str)
3259
  {
3260
    $bytes = self::chr_size_list($str);
3261
    if (count($bytes) > 0) {
3262 10
      return (int)max($bytes);
3263
    } else {
3264
      return 0;
3265
    }
3266 1
  }
3267 1
3268 1
  /**
3269
   * checks whether mbstring is available on the server
3270 10
   *
3271
   * @return   bool True if available, False otherwise
3272
   */
3273 10
  public static function mbstring_loaded()
3274 1
  {
3275 1
    $return = extension_loaded('mbstring');
3276
3277 10
    if ($return === true) {
3278
      mb_internal_encoding('UTF-8');
3279
    }
3280
3281
    return $return;
3282
  }
3283
3284
  /**
3285
   * Returns the UTF-8 character with the minimum code point in the given data.
3286
   *
3287
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3288
   *
3289
   * @return   string The character with the lowest code point than others.
3290
   */
3291 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3292
  {
3293
    if (is_array($arg)) {
3294
      $arg = implode($arg);
3295
    }
3296
3297
    return self::chr(min(self::codepoints($arg)));
3298
  }
3299
3300
  /**
3301
   * Normalize the encoding-name input.
3302
   *
3303
   * @param string $encodingLabel e.g.: ISO, UTF8, WINDOWS-1251 etc.
3304
   *
3305
   * @return string e.g.: ISO-8859-1, UTF-8, ISO-8859-5 etc.
3306 8
   */
3307
  public static function normalizeEncoding($encodingLabel)
3308 8
  {
3309
    $encoding = strtoupper($encodingLabel);
3310 8
3311
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3312
3313
    $equivalences = array(
3314
        'ISO88591'    => 'ISO-8859-1',
3315
        'ISO8859'     => 'ISO-8859-1',
3316
        'ISO'         => 'ISO-8859-1',
3317 8
        'LATIN1'      => 'ISO-8859-1',
3318
        'LATIN'       => 'ISO-8859-1',
3319
        'UTF16'       => 'UTF-16',
3320
        'UTF32'       => 'UTF-32',
3321
        'UTF8'        => 'UTF-8',
3322
        'UTF'         => 'UTF-8',
3323
        'UTF7'        => 'UTF-7',
3324
        'WIN1252'     => 'ISO-8859-1',
3325
        'WINDOWS1252' => 'ISO-8859-1',
3326
        'WINDOWS1251' => 'ISO-8859-5',
3327
    );
3328
3329
    if (empty($equivalences[$encoding])) {
3330
      return $encodingLabel;
3331
    }
3332
3333
    return $equivalences[$encoding];
3334
  }
3335
3336
  /**
3337
   * Normalize MS Word special characters.
3338
   *
3339
   * @param string $str The string to be normalized.
3340
   *
3341
   * @return string
3342
   */
3343
  public static function normalize_msword($str)
3344
  {
3345 4
    static $utf8MSWordKeys = null;
3346
    static $utf8MSWordValues = null;
3347 4
3348
    if ($utf8MSWordKeys === null) {
3349
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3350
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3351
    }
3352
3353
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3354
  }
3355
3356
  /**
3357 1
   * Normalize the whitespace.
3358
   *
3359 1
   * @param string $str                       The string to be normalized.
3360
   * @param bool   $keepNonBreakingSpace      Set to true, to keep non-breaking-spaces.
3361
   * @param bool   $keepBidiUnicodeControls   Set to true, to keep non-printable (for the web) bidirectional text chars.
3362
   *
3363 1
   * @return string
3364
   */
3365
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3366
  {
3367
    static $whitespaces = array();
3368
    static $bidiUniCodeControls = null;
3369
3370
    $cacheKey = (int)$keepNonBreakingSpace;
3371
3372
    if (!isset($whitespaces[$cacheKey])) {
3373
3374
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3375
3376
      if ($keepNonBreakingSpace === true) {
3377 3
        /** @noinspection OffsetOperationsInspection */
3378
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3379 3
      }
3380 3
3381 3
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3382
    }
3383 3
3384
    if ($keepBidiUnicodeControls === false) {
3385 3
      if ($bidiUniCodeControls === null) {
3386 3
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3387 3
      }
3388
3389 3
      $str = str_replace($bidiUniCodeControls, '', $str);
3390
    }
3391 3
3392
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3393
  }
3394
3395
  /**
3396
   * Format a number with grouped thousands.
3397
   *
3398
   * @param float  $number
3399
   * @param int    $decimals
3400
   * @param string $dec_point
3401 3
   * @param string $thousands_sep
3402
   *
3403
   * @return string
3404
   */
3405
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3406
  {
3407
    if (Bootup::is_php('5.4') === true) {
3408
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
3409
        return str_replace(
3410
            array(
3411 1
                '.',
3412
                ',',
3413 1
            ),
3414
            array(
3415
                $dec_point,
3416
                $thousands_sep,
3417 1
            ),
3418
            number_format($number, $decimals, '.', ',')
3419
        );
3420
      }
3421
    }
3422
3423
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3424
  }
3425
3426
  /**
3427
   * Calculates Unicode code point of the given UTF-8 encoded character.
3428
   *
3429
   * @param    string $s The character of which to calculate code point.
3430
   *
3431
   * @return   int Unicode code point of the given character,<br />
3432
   *           0 on invalid UTF-8 byte sequence.
3433
   */
3434
  public static function ord($s)
3435
  {
3436
    if (!$s) {
3437
      return 0;
3438
    }
3439
3440
    $s = unpack('C*', substr($s, 0, 4));
3441
    $a = $s ? $s[1] : 0;
3442
3443
    if (0xF0 <= $a && isset($s[4])) {
3444
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3445
    }
3446
3447
    if (0xE0 <= $a && isset($s[3])) {
3448
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3449
    }
3450
3451
    if (0xC0 <= $a && isset($s[2])) {
3452
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3453
    }
3454
3455
    return $a;
3456
  }
3457
3458
  /**
3459
   * Parses the string into variables.
3460
   *
3461 1
   * WARNING: This differs from parse_str() by returning the results
3462
   *    instead of placing them in the local scope!
3463 1
   *
3464
   * @link http://php.net/manual/en/function.parse-str.php
3465 1
   *
3466
   * @param string $str     <p>
3467
   *                        The input string.
3468
   *                        </p>
3469
   * @param array  $result  <p>
3470 1
   *                        If the second parameter arr is present,
3471 1
   *                        variables are stored in this variable as array elements instead.
3472
   *                        </p>
3473 1
   *
3474 1
   * @return void
3475 1
   */
3476
  public static function parse_str($str, &$result)
3477 1
  {
3478
    // init
3479
    self::checkForSupport();
3480
3481
    $str = self::filter($str);
3482
3483
    mb_parse_str($str, $result);
3484
  }
3485
3486
  /**
3487
   * checks if \u modifier is available that enables Unicode support in PCRE.
3488
   *
3489 1
   * @return   bool True if support is available, false otherwise
3490
   */
3491 1
  public static function pcre_utf8_support()
3492 1
  {
3493 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3494 1
    return (bool)@preg_match('//u', '');
3495
  }
3496 1
3497
  /**
3498
   * Create an array containing a range of UTF-8 characters.
3499
   *
3500 1
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3501
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3502
   *
3503
   * @return   array
3504
   */
3505
  public static function range($var1, $var2)
3506
  {
3507
    if (!$var1 || !$var2) {
3508
      return array();
3509 1
    }
3510
3511 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3512 1
      $start = (int)$var1;
3513
    } elseif (ctype_xdigit($var1)) {
3514
      $start = (int)self::hex_to_int($var1);
3515
    } else {
3516
      $start = self::ord($var1);
3517
    }
3518
3519
    if (!$start) {
3520
      return array();
3521
    }
3522
3523 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3524
      $end = (int)$var2;
3525
    } elseif (ctype_xdigit($var2)) {
3526
      $end = (int)self::hex_to_int($var2);
3527
    } else {
3528 26
      $end = self::ord($var2);
3529
    }
3530 26
3531
    if (!$end) {
3532 26
      return array();
3533 5
    }
3534
3535
    return array_map(
3536
        array(
3537 22
            '\\voku\\helper\\UTF8',
3538 6
            'chr',
3539
        ),
3540
        range($start, $end)
3541 16
    );
3542
  }
3543
3544
  /**
3545
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3546
   *
3547
   * @param string $str
3548
   *
3549
   * @return string
3550
   */
3551
  public static function removeBOM($str = '')
3552
  {
3553
    // UTF-32 (BE)
3554 22
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3555
    /** @noinspection SubStrUsedAsStrPosInspection */
3556 22 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3557
      $str = substr($str, 4);
3558 22
    }
3559 5
3560
    // UTF-32 (LE)
3561
3562 18
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3563
    /** @noinspection SubStrUsedAsStrPosInspection */
3564 18 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3565
      $str = substr($str, 4);
3566
    }
3567
3568
    // UTF-8
3569
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3570
    /** @noinspection SubStrUsedAsStrPosInspection */
3571 View Code Duplication
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3572
      $str = substr($str, 3);
3573
    }
3574
3575
    // UTF-16 (BE)
3576
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3577 24
    /** @noinspection SubStrUsedAsStrPosInspection */
3578 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3579 24
      $str = substr($str, 2);
3580
    }
3581 24
3582 2
    // UTF-16 (LE)
3583
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3584
    /** @noinspection SubStrUsedAsStrPosInspection */
3585 23 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3586
      $str = substr($str, 2);
3587 23
    }
3588
3589
    return $str;
3590
  }
3591
3592
  /**
3593
   * Removes duplicate occurrences of a string in another string.
3594
   *
3595
   * @param    string       $str  The base string
3596
   * @param    string|array $what String to search for in the base string
3597
   *
3598
   * @return   string The result string with removed duplicates
3599
   */
3600
  public static function remove_duplicates($str, $what = ' ')
3601
  {
3602 6
    if (is_string($what)) {
3603
      $what = array($what);
3604
    }
3605 6
3606 1
    if (is_array($what)) {
3607
      foreach ($what as $item) {
3608
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3609 1
      }
3610
    }
3611
3612 1
    return $str;
3613
  }
3614
3615
  /**
3616 1
   * Remove Invisible Characters
3617
   *
3618
   * This prevents sandwiching null characters
3619
   * between ascii characters, like Java\0script.
3620
   *
3621
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3622 1
   *
3623
   * @param  string $str
3624
   * @param  bool   $url_encoded
3625
   *
3626 1
   * @return  string
3627 1
   */
3628 1
  public static function remove_invisible_characters($str, $url_encoded = true)
3629
  {
3630
    // init
3631
    $non_displayables = array();
3632
3633
    // every control character except newline (dec 10),
3634
    // carriage return (dec 13) and horizontal tab (dec 09)
3635
    if ($url_encoded) {
3636
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3637
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3638 1
    }
3639
3640
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
3641
3642 1
    do {
3643
      $str = preg_replace($non_displayables, '', $str, -1, $count);
3644 6
    } while ($count !== 0);
3645 1
3646 1
    return $str;
3647 1
  }
3648 1
3649
  /**
3650 1
   * replace diamond question mark (�)
3651
   *
3652
   * @param string $str
3653 6
   * @param string $unknown
3654 6
   *
3655
   * @return string
3656 6
   */
3657 4
  public static function replace_diamond_question_mark($str, $unknown = '?')
3658
  {
3659 4
    return str_replace(
3660 4
        array(
3661
            "\xEF\xBF\xBD",
3662 6
            '�',
3663
        ),
3664 6
        array(
3665
            $unknown,
3666
            $unknown,
3667
        ),
3668
        $str
3669
    );
3670
  }
3671
3672
  /**
3673
   * Strip whitespace or other characters from end of a UTF-8 string.
3674
   *
3675
   * WARNING: This is much slower then "rtrim()" !!!!
3676
   *
3677
   * @param    string $str   The string to be trimmed
3678
   * @param    string $chars Optional characters to be stripped
3679
   *
3680
   * @return   string The string with unwanted characters stripped from the right
3681
   */
3682 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3683
  {
3684
    $str = (string)$str;
3685
3686
    if (!isset($str[0])) {
3687
      return '';
3688
    }
3689
3690
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3691
3692
    return preg_replace("/{$chars}+$/u", '', $str);
3693
  }
3694
3695
  /**
3696
   * rxClass
3697
   *
3698
   * @param string $s
3699
   * @param string $class
3700
   *
3701
   * @return string
3702
   */
3703
  protected static function rxClass($s, $class = '')
3704
  {
3705
    static $rxClassCache = array();
3706
3707
    $cacheKey = $s . $class;
3708
3709
    if (isset($rxClassCache[$cacheKey])) {
3710
      return $rxClassCache[$cacheKey];
3711
    }
3712
3713 1
    $class = array($class);
3714
3715 1
    foreach (self::str_split($s) as $s) {
3716
      if ('-' === $s) {
3717
        $class[0] = '-' . $class[0];
3718
      } elseif (!isset($s[2])) {
3719
        $class[0] .= preg_quote($s, '/');
3720
      } elseif (1 === self::strlen($s)) {
3721
        $class[0] .= $s;
3722
      } else {
3723
        $class[] = $s;
3724
      }
3725 1
    }
3726
3727 1
    $class[0] = '[' . $class[0] . ']';
3728
3729
    if (1 === count($class)) {
3730
      $return = $class[0];
3731
    } else {
3732
      $return = '(?:' . implode('|', $class) . ')';
3733
    }
3734
3735
    $rxClassCache[$cacheKey] = $return;
3736
3737
    return $return;
3738 1
  }
3739
3740 1
  /**
3741 1
   * Echo native UTF8-Support libs, e.g. for debugging.
3742
   */
3743
  public static function showSupport()
3744 1
  {
3745
    foreach (self::$support as $utf8Support) {
3746 1
      echo $utf8Support . "\n<br>";
3747
    }
3748
  }
3749 1
3750
  /**
3751
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3752 1
   *
3753
   * @param    string $chr The Unicode character to be encoded as numbered entity.
3754
   *
3755
   * @return   string The HTML numbered entity.
3756 1
   */
3757
  public static function single_chr_html_encode($chr)
3758 1
  {
3759
    if (!$chr) {
3760
      return '';
3761 1
    }
3762
3763
    return '&#' . self::ord($chr) . ';';
3764 1
  }
3765
3766
  /**
3767
   * Convert a string to an array of Unicode characters.
3768 1
   *
3769
   * @param    string  $str       The string to split into array.
3770 1
   * @param    int     $length    Max character length of each array element.
3771 1
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
3772 1
   *
3773 1
   * @return   array An array containing chunks of the string.
3774 1
   */
3775
  public static function split($str, $length = 1, $cleanUtf8 = false)
3776
  {
3777
    $str = (string)$str;
3778
3779
    if (!isset($str[0])) {
3780
      return array();
3781
    }
3782
3783
    // init
3784 1
    self::checkForSupport();
3785
    $str = (string)$str;
3786 1
    $ret = array();
3787 1
3788
    if (self::$support['pcre_utf8'] === true) {
3789 1
3790 1
      if ($cleanUtf8 === true) {
3791
        $str = self::clean($str);
3792
      }
3793
3794 1
      preg_match_all('/./us', $str, $retArray);
3795
      if (isset($retArray[0])) {
3796 1
        $ret = $retArray[0];
3797 1
      }
3798 1
      unset($retArray);
3799
3800 1
    } else {
3801 1
3802 1
      // fallback
3803 1
3804 1
      $len = strlen($str);
3805
3806 1
      /** @noinspection ForeachInvariantsInspection */
3807
      for ($i = 0; $i < $len; $i++) {
3808 1
        if (($str[$i] & "\x80") === "\x00") {
3809 1
          $ret[] = $str[$i];
3810
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3811
          if (($str[$i + 1] & "\xC0") === "\x80") {
3812
            $ret[] = $str[$i] . $str[$i + 1];
3813 1
3814 1
            $i++;
3815
          }
3816 1 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3817
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3818 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3819 1
3820 1
            $i += 2;
3821
          }
3822 1
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3823 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3824
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3825
3826
            $i += 3;
3827
          }
3828
        }
3829
      }
3830
    }
3831
3832
    if ($length > 1) {
3833
      $ret = array_chunk($ret, $length);
3834
3835 1
      $ret = array_map('implode', $ret);
3836
    }
3837 1
3838
    if (isset($ret[0]) && $ret[0] === '') {
3839
      return array();
3840
    }
3841
3842
    return $ret;
3843
  }
3844
3845
  /**
3846
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3847
   *
3848
   * @param string $str
3849 1
   *
3850
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3851 1
   *                      otherwise it will return false.
3852
   */
3853 1
  public static function str_detect_encoding($str)
3854
  {
3855
    // init
3856
    $encoding = '';
3857
3858
    // UTF-8
3859
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3860
    /** @noinspection SubStrUsedAsStrPosInspection */
3861
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
3862
      return 'UTF-8';
3863
    }
3864 1
3865
    // UTF-16 (BE)
3866
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3867
    /** @noinspection SubStrUsedAsStrPosInspection */
3868 1
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
3869
      return 'UTF-16BE';
3870
    }
3871
3872
    // UTF-16 (LE)
3873
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3874
    /** @noinspection SubStrUsedAsStrPosInspection */
3875
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
3876
      return 'UTF-16LE';
3877
    }
3878
3879
    // UTF-32 (BE)
3880 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3881
    /** @noinspection SubStrUsedAsStrPosInspection */
3882 1
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
3883
      return 'UTF-32BE';
3884 1
    }
3885 1
3886 1
    // UTF-32 (LE)
3887
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3888 1
    /** @noinspection SubStrUsedAsStrPosInspection */
3889 1
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
3890 1
      return 'UTF32LE';
3891 1
    }
3892
3893
    if (!$encoding) {
3894 1
      self::checkForSupport();
3895
3896
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
3897
      $detectOrder = array(
3898
          'UTF-8',
3899
          'windows-1251',
3900
          'windows-1252',
3901
          'ISO-8859-1',
3902
      );
3903
      $encoding = mb_detect_encoding($str, $detectOrder, true);
3904 2
    }
3905
3906 2
    if (self::is_binary($str)) {
3907 2
      if (self::is_utf16($str) == 1) {
3908
        return 'UTF-16LE';
3909 2
      } elseif (self::is_utf16($str) == 2) {
3910 2
        return 'UTF-16BE';
3911 2
      } elseif (self::is_utf32($str) == 1) {
3912
        return 'UTF-32LE';
3913 2
      } elseif (self::is_utf32($str) == 2) {
3914 2
        return 'UTF-32BE';
3915
      }
3916
    }
3917
3918
    if (!$encoding) {
3919
      $encoding = false;
3920
    }
3921
3922
    return $encoding;
3923
  }
3924
3925
  /**
3926
   * str_ireplace
3927
   *
3928
   * @param string $search
3929
   * @param string $replace
3930
   * @param string $subject
3931
   * @param null   $count
3932
   *
3933
   * @return string
3934
   */
3935
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3936 2
  {
3937
    $search = (array)$search;
3938
3939 2
    /** @noinspection AlterInForeachInspection */
3940
    foreach ($search as &$s) {
3941 2
      if ('' === $s .= '') {
3942
        $s = '/^(?<=.)$/';
3943
      } else {
3944
        $s = '/' . preg_quote($s, '/') . '/ui';
3945
      }
3946
    }
3947
3948
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3949
    $count = $replace;
3950
3951
    return $subject;
3952
  }
3953
3954 2
  /**
3955
   * Limit the number of characters in a string, but also after the next word.
3956 2
   *
3957
   * @param  string $str
3958 2
   * @param  int    $length
3959 2
   * @param  string $strAddOn
3960
   *
3961 2
   * @return string
3962
   */
3963
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
3964 2
  {
3965 2
    if (!isset($str[0])) {
3966 2
      return '';
3967 2
    }
3968 2
3969
    $length = (int)$length;
3970 2
3971 2
    if (self::strlen($str) <= $length) {
3972 2
      return $str;
3973 2
    }
3974 2
3975 2
    if (self::substr($str, $length - 1, 1) === ' ') {
3976
      return self::substr($str, 0, $length - 1) . $strAddOn;
3977 2
    }
3978 2
3979 2
    $str = self::substr($str, 0, $length);
3980 2
    $array = explode(' ', $str);
3981 2
    array_pop($array);
3982 2
    $new_str = implode(' ', $array);
3983
3984 2
    if ($new_str == '') {
3985
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
3986
    } else {
3987 2
      $str = $new_str . $strAddOn;
3988
    }
3989
3990
    return $str;
3991
  }
3992
3993
  /**
3994
   * Pad a UTF-8 string to given length with another string.
3995
   *
3996
   * @param    string $input      The input string
3997
   * @param    int    $pad_length The length of return string
3998
   * @param    string $pad_string String to use for padding the input string
3999
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4000
   *
4001
   * @return   string Returns the padded string
4002
   */
4003
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4004
  {
4005
    $input_length = self::strlen($input);
4006
4007
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4008 1
      $ps_length = self::strlen($pad_string);
4009
4010 1
      $diff = $pad_length - $input_length;
4011
4012 1
      switch ($pad_type) {
4013 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4014
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4015
          $pre = self::substr($pre, 0, $diff);
4016
          $post = '';
4017
          break;
4018
4019
        case STR_PAD_BOTH:
4020
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4021
          $pre = self::substr($pre, 0, (int)$diff / 2);
4022
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4023 1
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4024
          break;
4025 1
4026 1
        case STR_PAD_RIGHT:
4027 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4028
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4029 1
          $post = self::substr($post, 0, $diff);
4030 1
          $pre = '';
4031 1
      }
4032 1
4033 1
      return $pre . $input . $post;
4034
    }
4035 1
4036
    return $input;
4037
  }
4038
4039
  /**
4040
   * Repeat a string.
4041
   *
4042
   * @param string $input      <p>
4043
   *                           The string to be repeated.
4044
   *                           </p>
4045
   * @param int    $multiplier <p>
4046
   *                           Number of time the input string should be
4047
   *                           repeated.
4048
   *                           </p>
4049
   *                           <p>
4050
   *                           multiplier has to be greater than or equal to 0.
4051
   *                           If the multiplier is set to 0, the function
4052
   *                           will return an empty string.
4053
   *                           </p>
4054
   *
4055
   * @return string the repeated string.
4056
   */
4057
  public static function str_repeat($input, $multiplier)
4058
  {
4059
    $input = self::filter($input);
4060
4061 8
    return str_repeat($input, $multiplier);
4062
  }
4063 8
4064 8
  /**
4065
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4066 8
   *
4067 2
   * (PHP 4, PHP 5)<br/>
4068
   * Replace all occurrences of the search string with the replacement string
4069
   *
4070
   * @link http://php.net/manual/en/function.str-replace.php
4071 7
   *
4072
   * @param mixed $search  <p>
4073 7
   *                       The value being searched for, otherwise known as the needle.
4074 1
   *                       An array may be used to designate multiple needles.
4075 1
   *                       </p>
4076 1
   * @param mixed $replace <p>
4077
   *                       The replacement value that replaces found search
4078
   *                       values. An array may be used to designate multiple replacements.
4079 7
   *                       </p>
4080 1
   * @param mixed $subject <p>
4081 1
   *                       The string or array being searched and replaced on,
4082
   *                       otherwise known as the haystack.
4083 7
   *                       </p>
4084
   *                       <p>
4085
   *                       If subject is an array, then the search and
4086
   *                       replace is performed with every entry of
4087
   *                       subject, and the return value is an array as
4088
   *                       well.
4089
   *                       </p>
4090
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4091
   *
4092
   * @return mixed This function returns a string or an array with the replaced values.
4093 1
   */
4094
  public static function str_replace($search, $replace, $subject, &$count = null)
4095 1
  {
4096
    return str_replace($search, $replace, $subject, $count);
4097
  }
4098
4099
  /**
4100
   * Shuffles all the characters in the string.
4101
   *
4102
   * @param    string $str The input string
4103
   *
4104
   * @return   string The shuffled string.
4105 1
   */
4106 1
  public static function str_shuffle($str)
4107 1
  {
4108 1
    $array = self::split($str);
4109 1
4110
    shuffle($array);
4111 1
4112
    return implode('', $array);
4113
  }
4114
4115
  /**
4116
   * Sort all characters according to code points.
4117
   *
4118
   * @param    string $str    A UTF-8 string.
4119
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4120
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4121
   *
4122
   * @return   string String of sorted characters
4123
   */
4124
  public static function str_sort($str, $unique = false, $desc = false)
4125
  {
4126
    $array = self::codepoints($str);
4127
4128
    if ($unique) {
4129
      $array = array_flip(array_flip($array));
4130
    }
4131
4132
    if ($desc) {
4133 13
      arsort($array);
4134
    } else {
4135 13
      asort($array);
4136
    }
4137
4138
    return self::string($array);
4139
  }
4140
4141
  /**
4142
   * Convert a string to an array.
4143
   *
4144
   * @param string $str
4145
   * @param int    $len
4146
   *
4147
   * @return array
4148
   */
4149
  public static function str_split($str, $len = 1)
4150 14
  {
4151
    // init
4152 14
    self::checkForSupport();
4153
4154 14
    if (1 > $len = (int)$len) {
4155 4
      $len = func_get_arg(1);
4156
4157
      return str_split($str, $len);
4158
    }
4159 13
4160
    if (self::$support['intl'] === true) {
4161 13
      $a = array();
4162 13
      $p = 0;
4163
      $l = strlen($str);
4164
      while ($p < $l) {
4165
        $a[] = grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4166
      }
4167
    } else {
4168
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4169
      $a = $a[0];
4170
    }
4171
4172
    if (1 == $len) {
4173
      return $a;
4174
    }
4175
4176
    $arrayOutput = array();
4177
    $p = -1;
4178
4179
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4180
    foreach ($a as $l => $a) {
4181
      if ($l % $len) {
4182
        $arrayOutput[$p] .= $a;
4183
      } else {
4184
        $arrayOutput[++$p] = $a;
4185
      }
4186
    }
4187
4188
    return $arrayOutput;
4189
  }
4190
4191
  /**
4192
   * Get a binary representation of a specific character.
4193
   *
4194
   * @param   string $str The input character.
4195
   *
4196
   * @return  string
4197
   */
4198
  public static function str_to_binary($str)
4199
  {
4200
    $str = (string)$str;
4201
4202
    if (!isset($str[0])) {
4203
      return '';
4204
    }
4205
4206
    // init
4207
    $out = null;
4208
    $max = strlen($str);
4209
4210
    /** @noinspection ForeachInvariantsInspection */
4211
    for ($i = 0; $i < $max; ++$i) {
4212
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4213
    }
4214
4215
    return $out;
4216
  }
4217
4218
  /**
4219
   * US-ASCII transliterations of Unicode text.
4220
   *
4221
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4222
   * Warning: you should only pass this well formed UTF-8!
4223
   * Be aware it works by making a copy of the input string which it appends transliterated
4224
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4225
   * requiring up to the same amount again as the input string
4226
   *
4227
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4228
   *
4229
   * @author <[email protected]>
4230
   *
4231
   * @param string $str     UTF-8 string to convert
4232
   * @param string $unknown Character use if character unknown. (default is ?)
4233
   *
4234
   * @return string US-ASCII string
4235
   */
4236
  public static function str_transliterate($str, $unknown = '?')
4237
  {
4238
    static $UTF8_TO_ASCII;
4239
4240
    $str = (string)$str;
4241
4242
    if (!isset($str[0])) {
4243
      return '';
4244
    }
4245
4246
    $str = self::clean($str);
4247
4248
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4249
    $chars = $ar[0];
4250
    foreach ($chars as &$c) {
4251
4252
      $ordC0 = ord($c[0]);
4253
4254
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4255
        continue;
4256
      }
4257
4258
      $ordC1 = ord($c[1]);
4259
4260
      // ASCII - next please
4261
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4262
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4263
      }
4264
4265
      if ($ordC0 >= 224) {
4266
        $ordC2 = ord($c[2]);
4267
4268
        if ($ordC0 <= 239) {
4269
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4270
        }
4271
4272
        if ($ordC0 >= 240) {
4273
          $ordC3 = ord($c[3]);
4274
4275
          if ($ordC0 <= 247) {
4276
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4277
          }
4278
4279
          if ($ordC0 >= 248) {
4280
            $ordC4 = ord($c[4]);
4281
4282 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4283
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4284
            }
4285
4286
            if ($ordC0 >= 252) {
4287
              $ordC5 = ord($c[5]);
4288
4289 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4290
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4291
              }
4292
            }
4293
          }
4294
        }
4295
      }
4296
4297
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4298
        $c = $unknown;
4299
        continue;
4300
      }
4301
4302
      if (!isset($ord)) {
4303
        $c = $unknown;
4304
        continue;
4305
      }
4306
4307
      $bank = $ord >> 8;
4308
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4309
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4310
        if (file_exists($bankfile)) {
4311
          /** @noinspection PhpIncludeInspection */
4312
          include $bankfile;
4313
        } else {
4314
          $UTF8_TO_ASCII[$bank] = array();
4315
        }
4316
      }
4317
4318
      $newchar = $ord & 255;
4319
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4320
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4321
      } else {
4322
        $c = $unknown;
4323
      }
4324
    }
4325
4326
    return implode('', $chars);
4327
  }
4328
4329
  /**
4330
   * Counts number of words in the UTF-8 string.
4331
   *
4332
   * @param string $s The input string.
4333
   * @param int    $format
4334
   * @param string $charlist
4335
   *
4336
   * @return array|float|string The number of words in the string
4337
   */
4338
  public static function str_word_count($s, $format = 0, $charlist = '')
4339
  {
4340
    $charlist = self::rxClass($charlist, '\pL');
4341
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
4342
    $charlist = array();
4343
    $len = count($s);
4344
4345
    if (1 == $format) {
4346
      for ($i = 1; $i < $len; $i += 2) {
4347
        $charlist[] = $s[$i];
4348
      }
4349
    } elseif (2 == $format) {
4350
      self::checkForSupport();
4351
4352
      $offset = self::strlen($s[0]);
4353
      for ($i = 1; $i < $len; $i += 2) {
4354
        $charlist[$offset] = $s[$i];
4355
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
4356
      }
4357
    } else {
4358
      $charlist = ($len - 1) / 2;
4359
    }
4360
4361
    return $charlist;
4362
  }
4363
4364
  /**
4365
   * Case-insensitive string comparison.
4366
   *
4367
   * @param string $str1
4368
   * @param string $str2
4369
   *
4370
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4371
   */
4372
  public static function strcasecmp($str1, $str2)
4373
  {
4374
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4375
  }
4376
4377
  /**
4378
   * String comparison.
4379
   *
4380
   * @param string $str1
4381
   * @param string $str2
4382
   *
4383
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4384
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4385
   *              <strong>0</strong> if they are equal.
4386
   */
4387
  public static function strcmp($str1, $str2)
4388
  {
4389
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4390
        Normalizer::normalize($str1, Normalizer::NFD),
4391
        Normalizer::normalize($str2, Normalizer::NFD)
4392
    );
4393
  }
4394
4395
  /**
4396
   * Find length of initial segment not matching mask.
4397
   *
4398
   * @param string $str
4399
   * @param string $charlist
4400
   * @param int    $start
4401
   * @param int    $len
4402
   *
4403
   * @return int|null
4404
   */
4405
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
4406
  {
4407
    if ('' === $charlist .= '') {
4408
      return null;
4409
    }
4410
4411
    if ($start || 2147483647 != $len) {
4412
      $str = (string)self::substr($str, $start, $len);
4413
    } else {
4414
      $str = (string)$str;
4415
    }
4416
4417
    /* @var $len array */
4418
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
4419
      return self::strlen($len[1]);
4420
    } else {
4421
      return self::strlen($str);
4422
    }
4423
  }
4424
4425
  /**
4426
   * Makes a UTF-8 string from code points.
4427
   *
4428
   * @param    array $array Integer or Hexadecimal codepoints
4429
   *
4430
   * @return   string UTF-8 encoded string
4431
   */
4432
  public static function string($array)
4433
  {
4434
    return implode(
4435
        array_map(
4436
            array(
4437
                '\\voku\\helper\\UTF8',
4438
                'chr',
4439
            ),
4440
            $array
4441
        )
4442
    );
4443
  }
4444
4445
  /**
4446
   * Checks if string starts with "UTF-8 BOM" character.
4447
   *
4448
   * @param    string $str The input string.
4449
   *
4450
   * @return   bool True if the string has BOM at the start, False otherwise.
4451
   */
4452
  public static function string_has_bom($str)
4453
  {
4454
    return self::is_bom(substr($str, 0, 3));
4455
  }
4456
4457
  /**
4458
   * Strip HTML and PHP tags from a string.
4459
   *
4460
   * @link http://php.net/manual/en/function.strip-tags.php
4461
   *
4462
   * @param string $str            <p>
4463
   *                               The input string.
4464
   *                               </p>
4465
   * @param string $allowable_tags [optional] <p>
4466
   *                               You can use the optional second parameter to specify tags which should
4467
   *                               not be stripped.
4468
   *                               </p>
4469
   *                               <p>
4470
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4471
   *                               can not be changed with allowable_tags.
4472
   *                               </p>
4473
   *
4474
   * @return string the stripped string.
4475
   */
4476
  public static function strip_tags($str, $allowable_tags = null)
4477
  {
4478
    //clean broken utf8
4479
    $str = self::clean($str);
4480
4481
    return strip_tags($str, $allowable_tags);
4482
  }
4483
4484
  /**
4485
   * Finds position of first occurrence of a string within another, case insensitive.
4486
   *
4487
   * @link http://php.net/manual/en/function.mb-stripos.php
4488
   *
4489
   * @param string  $haystack  <p>
4490
   *                           The string from which to get the position of the first occurrence
4491
   *                           of needle
4492
   *                           </p>
4493
   * @param string  $needle    <p>
4494
   *                           The string to find in haystack
4495
   *                           </p>
4496
   * @param int     $offset    [optional] <p>
4497
   *                           The position in haystack
4498
   *                           to start searching
4499
   *                           </p>
4500
   * @param string  $encoding
4501
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4502
   *
4503
   * @return int Return the numeric position of the first occurrence of
4504
   * needle in the haystack
4505
   * string, or false if needle is not found.
4506
   */
4507
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4508
  {
4509
    $haystack = (string)$haystack;
4510
    $needle = (string)$needle;
4511
4512
    if (!isset($haystack[0]) || !isset($needle[0])) {
4513
      return false;
4514
    }
4515
4516
    // init
4517
    self::checkForSupport();
4518
4519
    if ($cleanUtf8 === true) {
4520
      $haystack = self::clean($haystack);
4521
      $needle = self::clean($needle);
4522
    }
4523
4524
    // INFO: this is only a fallback for old versions
4525
    if ($encoding === true || $encoding === false) {
4526
      $encoding = 'UTF-8';
4527
    }
4528
4529
    return mb_stripos($haystack, $needle, $offset, $encoding);
4530
  }
4531
4532
  /**
4533
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4534
   *
4535
   * @param string $str
4536
   * @param string $needle
4537
   * @param bool   $before_needle
4538
   *
4539
   * @return false|string
4540
   */
4541
  public static function stristr($str, $needle, $before_needle = false)
4542
  {
4543
    if ('' === $needle .= '') {
4544
      return false;
4545
    }
4546
4547
    // init
4548
    self::checkForSupport();
4549
4550
    return mb_stristr($str, $needle, $before_needle, 'UTF-8');
4551
  }
4552
4553
  /**
4554
   * Get the string length, not the byte-length!
4555
   *
4556
   * @link     http://php.net/manual/en/function.mb-strlen.php
4557
   *
4558
   * @param string  $str       The string being checked for length.
4559
   * @param string  $encoding  Set the charset for e.g. "mb_" function
4560
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4561
   *
4562
   * @return int the number of characters in
4563
   *           string str having character encoding
4564
   *           encoding. A multi-byte character is
4565
   *           counted as 1.
4566
   */
4567
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4568
  {
4569
    $str = (string)$str;
4570
4571
    if (!isset($str[0])) {
4572
      return 0;
4573
    }
4574
4575
    // init
4576
    self::checkForSupport();
4577
4578
    // INFO: this is only a fallback for old versions
4579
    if ($encoding === true || $encoding === false) {
4580
      $encoding = 'UTF-8';
4581
    }
4582
4583
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
4584
      $str = self::clean($str);
4585
    }
4586
4587
    return mb_strlen($str, $encoding);
4588
  }
4589
4590
  /**
4591
   * Case insensitive string comparisons using a "natural order" algorithm.
4592
   *
4593
   * @param string $str1
4594
   * @param string $str2
4595
   *
4596
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
4597
   *             str1 is greater than str2, and 0 if they are equal.
4598
   */
4599
  public static function strnatcasecmp($str1, $str2)
4600
  {
4601
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4602
  }
4603
4604
  /**
4605
   * String comparisons using a "natural order" algorithm.
4606
   *
4607
   * @param string $str1
4608
   * @param string $str2
4609
   *
4610
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
4611
   *             str1 is greater than str2, and 0 if they are equal.
4612
   */
4613
  public static function strnatcmp($str1, $str2)
4614
  {
4615
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4616
  }
4617
4618
  /**
4619
   * Case-insensitive string comparison of the first n characters.
4620
   *
4621
   * @param string $str1
4622
   * @param string $str2
4623
   * @param int    $len
4624
   *
4625
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4626
   */
4627
  public static function strncasecmp($str1, $str2, $len)
4628
  {
4629
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4630
  }
4631
4632
  /**
4633
   * Comparison of the first n characters.
4634
   *
4635
   * @param string $str1
4636
   * @param string $str2
4637
   * @param int    $len
4638
   *
4639
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4640
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4641
   *              <strong>0</strong> if they are equal
4642
   */
4643
  public static function strncmp($str1, $str2, $len)
4644
  {
4645
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
4646
  }
4647
4648
  /**
4649
   * Search a string for any of a set of characters.
4650
   *
4651
   * @param string $s
4652
   * @param string $charList
4653
   *
4654
   * @return string|false
4655
   */
4656
  public static function strpbrk($s, $charList)
4657
  {
4658
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
4659
      return substr($s, strpos($s, $m[0]));
4660
    } else {
4661
      return false;
4662
    }
4663
  }
4664
4665
  /**
4666
   * Find position of first occurrence of string in a string.
4667
   *
4668
   * @link http://php.net/manual/en/function.mb-strpos.php
4669
   *
4670
   * @param string  $haystack     <p>
4671
   *                              The string being checked.
4672
   *                              </p>
4673
   * @param string  $needle       <p>
4674
   *                              The position counted from the beginning of haystack.
4675
   *                              </p>
4676
   * @param int     $offset       [optional] <p>
4677
   *                              The search offset. If it is not specified, 0 is used.
4678
   *                              </p>
4679
   * @param string  $encoding
4680
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
4681
   *
4682
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
4683
   *             If needle is not found it returns false.
4684
   */
4685
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4686
  {
4687
    $haystack = (string)$haystack;
4688
    $needle = (string)$needle;
4689
4690
    if (!isset($haystack[0]) || !isset($needle[0])) {
4691
      return false;
4692
    }
4693
4694
    // init
4695
    self::checkForSupport();
4696
    $offset = (int)$offset;
4697
4698
    // iconv and mbstring do not support integer $needle
4699
4700
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4701
      $needle = self::chr($needle);
4702
    }
4703
4704
    if ($cleanUtf8 === true) {
4705
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
4706
      // iconv_strpos is not tolerant to invalid characters
4707
4708
      $needle = self::clean((string)$needle);
4709
      $haystack = self::clean($haystack);
4710
    }
4711
4712
    if (self::$support['mbstring'] === true) {
4713
4714
      // INFO: this is only a fallback for old versions
4715
      if ($encoding === true || $encoding === false) {
4716
        $encoding = 'UTF-8';
4717
      }
4718
4719
      return mb_strpos($haystack, $needle, $offset, $encoding);
4720
    }
4721
4722
    if (self::$support['iconv'] === true) {
4723
      return grapheme_strpos($haystack, $needle, $offset);
4724
    }
4725
4726
    if ($offset > 0) {
4727
      $haystack = self::substr($haystack, $offset);
4728
    }
4729
4730 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4731
      $left = substr($haystack, 0, $pos);
4732
4733
      // negative offset not supported in PHP strpos(), ignoring
4734
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4735
    }
4736
4737
    return false;
4738
  }
4739
4740
  /**
4741
   * Finds the last occurrence of a character in a string within another.
4742
   *
4743
   * @link http://php.net/manual/en/function.mb-strrchr.php
4744
   *
4745
   * @param string $haystack <p>
4746
   *                         The string from which to get the last occurrence
4747
   *                         of needle
4748
   *                         </p>
4749
   * @param string $needle   <p>
4750
   *                         The string to find in haystack
4751
   *                         </p>
4752
   * @param bool   $part     [optional] <p>
4753
   *                         Determines which portion of haystack
4754
   *                         this function returns.
4755
   *                         If set to true, it returns all of haystack
4756
   *                         from the beginning to the last occurrence of needle.
4757
   *                         If set to false, it returns all of haystack
4758
   *                         from the last occurrence of needle to the end,
4759
   *                         </p>
4760
   * @param string $encoding [optional] <p>
4761
   *                         Character encoding name to use.
4762
   *                         If it is omitted, internal character encoding is used.
4763
   *                         </p>
4764
   *
4765
   * @return string the portion of haystack.
4766
   * or false if needle is not found.
4767
   */
4768
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
4769
  {
4770
    self::checkForSupport();
4771
4772
    return mb_strrchr($haystack, $needle, $part, $encoding);
4773
  }
4774
4775
  /**
4776
   * Reverses characters order in the string.
4777
   *
4778
   * @param    string $str The input string
4779
   *
4780
   * @return   string The string with characters in the reverse sequence
4781
   */
4782
  public static function strrev($str)
4783
  {
4784
    return implode(array_reverse(self::split($str)));
4785
  }
4786
4787
  /**
4788
   * Finds the last occurrence of a character in a string within another, case insensitive.
4789
   *
4790
   * @link http://php.net/manual/en/function.mb-strrichr.php
4791
   *
4792
   * @param string $haystack <p>
4793
   *                         The string from which to get the last occurrence
4794
   *                         of needle
4795
   *                         </p>
4796
   * @param string $needle   <p>
4797
   *                         The string to find in haystack
4798
   *                         </p>
4799
   * @param bool   $part     [optional] <p>
4800
   *                         Determines which portion of haystack
4801
   *                         this function returns.
4802
   *                         If set to true, it returns all of haystack
4803
   *                         from the beginning to the last occurrence of needle.
4804
   *                         If set to false, it returns all of haystack
4805
   *                         from the last occurrence of needle to the end,
4806
   *                         </p>
4807
   * @param string $encoding [optional] <p>
4808
   *                         Character encoding name to use.
4809
   *                         If it is omitted, internal character encoding is used.
4810
   *                         </p>
4811
   *
4812
   * @return string the portion of haystack.
4813
   * or false if needle is not found.
4814
   */
4815
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
4816
  {
4817
    self::checkForSupport();
4818
4819
    return mb_strrichr($haystack, $needle, $part, $encoding);
4820
  }
4821
4822
  /**
4823
   * Find position of last occurrence of a case-insensitive string.
4824
   *
4825
   * @param    string $haystack The string to look in
4826
   * @param    string $needle   The string to look for
4827
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
4828
   *
4829
   * @return   int The position of offset
4830
   */
4831
  public static function strripos($haystack, $needle, $offset = 0)
4832
  {
4833
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
4834
  }
4835
4836
  /**
4837
   * Find position of last occurrence of a string in a string.
4838
   *
4839
   * @link http://php.net/manual/en/function.mb-strrpos.php
4840
   *
4841
   * @param string  $haystack     <p>
4842
   *                              The string being checked, for the last occurrence
4843
   *                              of needle
4844
   *                              </p>
4845
   * @param string  $needle       <p>
4846
   *                              The string to find in haystack.
4847
   *                              </p>
4848
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
4849
   *                              the string. Negative values will stop searching at an arbitrary point
4850
   *                              prior to the end of the string.
4851
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
4852
   *
4853
   * @return int the numeric position of
4854
   * the last occurrence of needle in the
4855
   * haystack string. If
4856
   * needle is not found, it returns false.
4857
   */
4858
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
4859
  {
4860
    $haystack = (string)$haystack;
4861
    $needle = (string)$needle;
4862
4863
    if (!isset($haystack[0]) || !isset($needle[0])) {
4864
      return false;
4865
    }
4866
4867
    // init
4868
    self::checkForSupport();
4869
4870
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4871
      $needle = self::chr($needle);
4872
    }
4873
4874
    $needle = (string)$needle;
4875
    $offset = (int)$offset;
4876
4877
    if ($cleanUtf8 === true) {
4878
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
4879
4880
      $needle = self::clean($needle);
4881
      $haystack = self::clean($haystack);
4882
    }
4883
4884
    if (self::$support['mbstring'] === true) {
4885
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
4886
    }
4887
4888
    if (self::$support['iconv'] === true) {
4889
      return grapheme_strrpos($haystack, $needle, $offset);
4890
    }
4891
4892
    // fallback
4893
4894
    if ($offset > 0) {
4895
      $haystack = self::substr($haystack, $offset);
4896
    } elseif ($offset < 0) {
4897
      $haystack = self::substr($haystack, 0, $offset);
4898
    }
4899
4900 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4901
      $left = substr($haystack, 0, $pos);
4902
4903
      // negative offset not supported in PHP strpos(), ignoring
4904
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4905
    }
4906
4907
    return false;
4908
  }
4909
4910
  /**
4911
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
4912
   * mask.
4913
   *
4914
   * @param string $s
4915
   * @param string $mask
4916
   * @param int    $start
4917
   * @param int    $len
4918
   *
4919
   * @return int|null
4920
   */
4921
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
4922
  {
4923
    if ($start || 2147483647 != $len) {
4924
      $s = self::substr($s, $start, $len);
4925
    }
4926
4927
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
4928
  }
4929
4930
  /**
4931
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
4932
   *
4933
   * @link http://php.net/manual/en/function.grapheme-strstr.php
4934
   *
4935
   * @param string $haystack      <p>
4936
   *                              The input string. Must be valid UTF-8.
4937
   *                              </p>
4938
   * @param string $needle        <p>
4939
   *                              The string to look for. Must be valid UTF-8.
4940
   *                              </p>
4941
   * @param bool   $before_needle [optional] <p>
4942
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
4943
   *                              haystack before the first occurrence of the needle (excluding the needle).
4944
   *                              </p>
4945
   *
4946
   * @return string the portion of string, or FALSE if needle is not found.
4947
   */
4948
  public static function strstr($haystack, $needle, $before_needle = false)
4949
  {
4950
    self::checkForSupport();
4951
4952
    return grapheme_strstr($haystack, $needle, $before_needle);
4953
  }
4954
4955
  /**
4956
   * Unicode transformation for case-less matching.
4957
   *
4958
   * @link http://unicode.org/reports/tr21/tr21-5.html
4959
   *
4960
   * @param string $str
4961
   * @param bool   $full
4962
   *
4963
   * @return string
4964
   */
4965
  public static function strtocasefold($str, $full = true)
4966
  {
4967
    static $fullCaseFold = null;
4968
    static $commonCaseFoldKeys = null;
4969
    static $commonCaseFoldValues = null;
4970
4971
    if ($commonCaseFoldKeys === null) {
4972
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
4973
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
4974
    }
4975
4976
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
4977
4978
    if ($full) {
4979
4980
      if ($fullCaseFold === null) {
4981
        $fullCaseFold = self::getData('caseFolding_full');
4982
      }
4983
4984
      /** @noinspection OffsetOperationsInspection */
4985
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
4986
    }
4987
4988
    $str = self::clean($str);
4989
4990
    return self::strtolower($str);
4991
  }
4992
4993
  /**
4994
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
4995
   * Make a string lowercase.
4996
   *
4997
   * @link http://php.net/manual/en/function.mb-strtolower.php
4998
   *
4999
   * @param string $str <p>
5000
   *                    The string being lowercased.
5001
   *                    </p>
5002
   * @param string $encoding
5003
   *
5004
   * @return string str with all alphabetic characters converted to lowercase.
5005
   */
5006
  public static function strtolower($str, $encoding = 'UTF-8')
5007
  {
5008
    $str = (string)$str;
5009
5010
    if (!isset($str[0])) {
5011
      return '';
5012
    }
5013
5014
    // init
5015
    self::checkForSupport();
5016
5017
    return mb_strtolower($str, $encoding);
5018
  }
5019
5020
  /**
5021
   * Generic case sensitive transformation for collation matching.
5022
   *
5023
   * @param string $s
5024
   *
5025
   * @return string
5026
   */
5027
  protected static function strtonatfold($s)
5028
  {
5029
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
5030
  }
5031
5032
  /**
5033
   * Make a string uppercase.
5034
   *
5035
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5036
   *
5037
   * @param string $str <p>
5038
   *                    The string being uppercased.
5039
   *                    </p>
5040
   * @param string $encoding
5041
   *
5042
   * @return string str with all alphabetic characters converted to uppercase.
5043
   */
5044
  public static function strtoupper($str, $encoding = 'UTF-8')
5045
  {
5046
    $str = (string)$str;
5047
5048
    if (!isset($str[0])) {
5049
      return '';
5050
    }
5051
5052
    // init
5053
    self::checkForSupport();
5054
5055
    if (self::$support['mbstring'] === true) {
5056
      return mb_strtoupper($str, $encoding);
5057
    } else {
5058
5059
      // fallback
5060
5061
      static $caseTableKeys = null;
5062
      static $caseTableValues = null;
5063
5064
      if ($caseTableKeys === null) {
5065
        $caseTable = self::case_table();
5066
        $caseTableKeys = array_keys($caseTable);
5067
        $caseTableValues = array_values($caseTable);
5068
      }
5069
5070
      $str = self::clean($str);
5071
5072
      return str_replace($caseTableKeys, $caseTableValues, $str);
5073
    }
5074
  }
5075
5076
  /**
5077
   * Translate characters or replace sub-strings.
5078
   *
5079
   * @param string $s
5080
   * @param string $from
5081
   * @param string $to
5082
   *
5083
   * @return string
5084
   */
5085
  public static function strtr($s, $from, $to = INF)
5086
  {
5087
    if (INF !== $to) {
5088
      $from = self::str_split($from);
5089
      $to = self::str_split($to);
5090
      $a = count($from);
5091
      $b = count($to);
5092
5093
      if ($a > $b) {
5094
        $from = array_slice($from, 0, $b);
5095
      } elseif ($a < $b) {
5096
        $to = array_slice($to, 0, $a);
5097
      }
5098
5099
      $from = array_combine($from, $to);
5100
    }
5101
5102
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5085 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5103
  }
5104
5105
  /**
5106
   * Return the width of a string.
5107
   *
5108
   * @param string $s
5109
   *
5110
   * @return int
5111
   */
5112
  public static function strwidth($s)
5113
  {
5114
    // init
5115
    self::checkForSupport();
5116
5117
    return mb_strwidth($s, 'UTF-8');
5118
  }
5119
5120
  /**
5121
   * Get part of a string.
5122
   *
5123
   * @link http://php.net/manual/en/function.mb-substr.php
5124
   *
5125
   * @param string  $str       <p>
5126
   *                           The string being checked.
5127
   *                           </p>
5128
   * @param int     $start     <p>
5129
   *                           The first position used in str.
5130
   *                           </p>
5131
   * @param int     $length    [optional] <p>
5132
   *                           The maximum length of the returned string.
5133
   *                           </p>
5134
   * @param string  $encoding
5135
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5136
   *
5137
   * @return string mb_substr returns the portion of
5138
   * str specified by the start and length parameters.
5139
   */
5140
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5141
  {
5142
    $str = (string)$str;
5143
5144
    if (!isset($str[0])) {
5145
      return '';
5146
    }
5147
5148
    // init
5149
    self::checkForSupport();
5150
5151
    if ($cleanUtf8 === true) {
5152
      // iconv and mbstring are not tolerant to invalid encoding
5153
      // further, their behaviour is inconsistent with that of PHP's substr
5154
5155
      $str = self::clean($str);
5156
    }
5157
5158
    if ($length === null) {
5159
      $length = (int)self::strlen($str);
5160
    } else {
5161
      $length = (int)$length;
5162
    }
5163
5164
    if (self::$support['mbstring'] === true) {
5165
5166
      // INFO: this is only a fallback for old versions
5167
      if ($encoding === true || $encoding === false) {
5168
        $encoding = 'UTF-8';
5169
      }
5170
5171
      return mb_substr($str, $start, $length, $encoding);
5172
    }
5173
5174
    if (self::$support['iconv'] === true) {
5175
      return (string)grapheme_substr($str, $start, $length);
5176
    }
5177
5178
    // fallback
5179
5180
    // split to array, and remove invalid characters
5181
    $array = self::split($str);
5182
5183
    // extract relevant part, and join to make sting again
5184
    return implode(array_slice($array, $start, $length));
5185
  }
5186
5187
  /**
5188
   * Binary safe comparison of two strings from an offset, up to length characters.
5189
   *
5190
   * @param string  $main_str           The main string being compared.
5191
   * @param string  $str                The secondary string being compared.
5192
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5193
   *                                    end of the string.
5194
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5195
   *                                    the str compared to the length of main_str less the offset.
5196
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5197
   *
5198 1
   * @return int
5199
   */
5200 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5201
  {
5202
    $main_str = self::substr($main_str, $offset, $length);
5203
    $str = self::substr($str, 0, self::strlen($main_str));
5204
5205
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5206
  }
5207
5208
  /**
5209
   * Count the number of sub-string occurrences.
5210
   *
5211
   * @param    string $haystack The string to search in.
5212
   * @param    string $needle   The string to search for.
5213
   * @param    int    $offset   The offset where to start counting.
5214
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
5215 1
   *
5216
   * @return   int number of occurrences of $needle
5217
   */
5218
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5219
  {
5220
    $offset = (int)$offset;
5221
5222
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5223
      $length = (int)$length;
5224
5225
      $haystack = self::substr($haystack, $offset, $length);
5226
    }
5227
5228
    if ($length === null) {
5229
      return substr_count($haystack, $needle, $offset);
5230
    } else {
5231 1
      return substr_count($haystack, $needle, $offset, $length);
5232
    }
5233 1
  }
5234 1
5235
  /**
5236 1
   * Replace text within a portion of a string.
5237
   *
5238
   * source: https://gist.github.com/stemar/8287074
5239
   *
5240
   * @param string|array $str
5241
   * @param string|array $replacement
5242
   * @param int          $start
5243
   * @param null|int     $length
5244
   *
5245
   * @return array|string
5246
   */
5247 8
  public static function substr_replace($str, $replacement, $start, $length = null)
5248
  {
5249 8
5250
    if (is_array($str)) {
5251
      $num = count($str);
5252
5253
      // $replacement
5254
      if (is_array($replacement)) {
5255
        $replacement = array_slice($replacement, 0, $num);
5256
      } else {
5257
        $replacement = array_pad(array($replacement), $num, $replacement);
5258
      }
5259
5260 7
      // $start
5261
      if (is_array($start)) {
5262 7
        $start = array_slice($start, 0, $num);
5263 2
        foreach ($start as &$valueTmp) {
5264
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5265
        }
5266
        unset($value);
5267 6
      } else {
5268 6
        $start = array_pad(array($start), $num, $start);
5269
      }
5270 6
5271 1
      // $length
5272 1
      if (!isset($length)) {
5273 6
        $length = array_fill(0, $num, 0);
5274
      } elseif (is_array($length)) {
5275
        $length = array_slice($length, 0, $num);
5276 6
        foreach ($length as &$valueTmpV2) {
5277
          if (isset($valueTmpV2)) {
5278 6
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5279
          } else {
5280
            $valueTmpV2 = 0;
5281
          }
5282 1
        }
5283 1
        unset($valueTmpV2);
5284 1
      } else {
5285 6
        $length = array_pad(array($length), $num, $length);
5286 6
      }
5287 6
5288 6
      // Recursive call
5289 6
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5290
    } else {
5291 6
      if (is_array($replacement)) {
5292
        if (count($replacement) > 0) {
5293
          $replacement = $replacement[0];
5294
        } else {
5295
          $replacement = '';
5296
        }
5297
      }
5298
    }
5299
5300
    preg_match_all('/./us', (string)$str, $smatches);
5301
    preg_match_all('/./us', (string)$replacement, $rmatches);
5302
5303
    if ($length === null) {
5304 1
      self::checkForSupport();
5305
5306 1
      $length = mb_strlen($str);
5307 1
    }
5308
5309
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5310
5311
    return join($smatches[0], null);
5312
  }
5313
5314
  /**
5315
   * Returns a case swapped version of the string.
5316
   *
5317
   * @param string $str
5318
   * @param string $encoding
5319
   *
5320 1
   * @return string each character's case swapped
5321
   */
5322 1
  public static function swapCase($str, $encoding = 'UTF-8')
5323
  {
5324
    $str = (string)$str;
5325
5326
    if (!isset($str[0])) {
5327
      return '';
5328
    }
5329
5330
    $str = self::clean($str);
5331
5332
    $strSwappedCase = preg_replace_callback(
5333
        '/[\S]/u',
5334
        function ($match) use ($encoding) {
5335
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5336
5337
          if ($match[0] == $marchToUpper) {
5338
            return UTF8::strtolower($match[0], $encoding);
5339
          } else {
5340
            return $marchToUpper;
5341
          }
5342
        },
5343
        $str
5344
    );
5345
5346
    return $strSwappedCase;
5347
  }
5348
5349
  /**
5350
   * alias for "UTF8::to_ascii()"
5351
   *
5352
   * @param string $s The input string e.g. a UTF-8 String
5353
   * @param string $subst_chr
5354
   *
5355 12
   * @return string
5356
   */
5357 12
  public static function toAscii($s, $subst_chr = '?')
5358
  {
5359
    return self::to_ascii($s, $subst_chr);
5360
  }
5361
5362
  /**
5363
   * alias for "UTF8::to_latin1()"
5364
   *
5365
   * @param $str
5366
   *
5367
   * @return string
5368
   */
5369
  public static function toLatin1($str)
5370 13
  {
5371
    return self::to_latin1($str);
5372 13
  }
5373
5374
  /**
5375 13
   * alias for "UTF8::to_utf8"
5376 13
   *
5377 1
   * @param string $str
5378 1
   *
5379 12
   * @return string
5380
   */
5381 13
  public static function toUTF8($str)
5382
  {
5383 13
    return self::to_utf8($str);
5384 13
  }
5385
5386 13
  /**
5387
   * convert to ASCII
5388
   *
5389
   * @param string $s The input string e.g. a UTF-8 String
5390
   * @param string $subst_chr
5391
   *
5392
   * @return string
5393
   */
5394
  public static function to_ascii($s, $subst_chr = '?')
5395
  {
5396 6
    static $translitExtra = null;
5397
5398 6
    $s = (string)$s;
5399
5400
    if (!isset($s[0])) {
5401
      return '';
5402
    }
5403
5404
    $s = self::clean($s);
5405
5406
    if (preg_match("/[\x80-\xFF]/", $s)) {
5407
      $s = Normalizer::normalize($s, Normalizer::NFKC);
5408
5409
      $glibc = 'glibc' === ICONV_IMPL;
5410 1
5411
      preg_match_all('/./u', $s, $s);
5412 1
5413
      /** @noinspection AlterInForeachInspection */
5414
      foreach ($s[0] as &$c) {
5415
5416
        if (!isset($c[1])) {
5417
          continue;
5418
        }
5419
5420
        if ($glibc) {
5421
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
5422
        } else {
5423
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
5424
5425
          if ($t !== false && is_string($t)) {
5426
            if (!isset($t[0])) {
5427
              $t = '?';
5428
            } elseif (isset($t[1])) {
5429
              $t = ltrim($t, '\'`"^~');
5430
            }
5431
          }
5432
        }
5433
5434
        if ('?' === $t) {
5435
5436
          if ($translitExtra === null) {
5437 10
            $translitExtra = (array)self::getData('translit_extra');
5438
          }
5439 10
5440 10
          if (isset($translitExtra[$c])) {
5441
            $t = $translitExtra[$c];
5442 10
          } else {
5443 2
            $t = Normalizer::normalize($c, Normalizer::NFD);
5444
5445
            if ($t[0] < "\x80") {
5446
              $t = $t[0];
5447 9
            } else {
5448
              $t = $subst_chr;
5449 9
            }
5450
          }
5451
        }
5452
5453 9
        if ('?' === $t) {
5454 9
          $t = self::str_transliterate($c, $subst_chr);
5455
        }
5456 9
5457
        $c = $t;
5458
      }
5459 1
5460 1
      $s = implode('', $s[0]);
5461 1
    }
5462
5463 9
    return $s;
5464 9
  }
5465
5466
  /**
5467
   * alias for "UTF8::to_win1252()"
5468
   *
5469
   * @param   string $str
5470
   *
5471
   * @return  array|string
5472
   */
5473
  public static function to_iso8859($str)
5474
  {
5475
    return self::to_win1252($str);
5476
  }
5477
5478
  /**
5479
   * alias for "UTF8::to_win1252()"
5480
   *
5481
   * @param string|array $str
5482
   *
5483
   * @return string|array
5484
   */
5485
  public static function to_latin1($str)
5486
  {
5487
    return self::to_win1252($str);
5488
  }
5489
5490
  /**
5491
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5492
   *
5493
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
5494
   *
5495
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5496
   *
5497
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5498
   *    are followed by any of these:  ("group B")
5499 1
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5500
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5501 1
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5502
   * is also a valid unicode character, and will be left unchanged.
5503
   *
5504
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5505
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5506
   *
5507
   * @param string $str Any string or array.
5508
   *
5509
   * @return string The same string, but UTF8 encoded.
5510
   */
5511
  public static function to_utf8($str)
5512
  {
5513
    if (is_array($str)) {
5514
      foreach ($str as $k => $v) {
5515
        /** @noinspection AlterInForeachInspection */
5516
        $str[$k] = self::to_utf8($v);
5517
      }
5518
5519
      return $str;
5520
    }
5521
5522
    $str = (string)$str;
5523
5524
    if (!isset($str[0])) {
5525
      return $str;
5526
    }
5527
5528
    $max = self::strlen($str, '8bit');
5529
5530
    $buf = '';
5531
    /** @noinspection ForeachInvariantsInspection */
5532
    for ($i = 0; $i < $max; $i++) {
5533
      $c1 = $str[$i];
5534
5535
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5536
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5537
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5538
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5539
5540
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5541
5542
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
5543
            $buf .= $c1 . $c2;
5544
            $i++;
5545
          } else { // not valid UTF8 - convert it
5546
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5547
            $cc2 = ($c1 & "\x3f") | "\x80";
5548
            $buf .= $cc1 . $cc2;
5549
          }
5550
5551 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5552
5553
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
5554
            $buf .= $c1 . $c2 . $c3;
5555
            $i += 2;
5556
          } else { // not valid UTF8 - convert it
5557
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5558
            $cc2 = ($c1 & "\x3f") | "\x80";
5559
            $buf .= $cc1 . $cc2;
5560
          }
5561
5562
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
5563
5564 1 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5565
            $buf .= $c1 . $c2 . $c3 . $c4;
5566
            $i += 3;
5567 1
          } else { // not valid UTF8 - convert it
5568
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5569 1
            $cc2 = ($c1 & "\x3f") | "\x80";
5570
            $buf .= $cc1 . $cc2;
5571 1
          }
5572 1
5573
        } else { // doesn't look like UTF8, but should be converted
5574
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
5575
          $cc2 = (($c1 & "\x3f") | "\x80");
5576
          $buf .= $cc1 . $cc2;
5577
        }
5578
5579
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
5580
5581 1
        $ordC1 = ord($c1);
5582
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
5583 1
          $buf .= self::$win1252ToUtf8[$ordC1];
5584
        } else {
5585 1
          $cc1 = (chr($ordC1 / 64) | "\xc0");
5586 1
          $cc2 = (($c1 & "\x3f") | "\x80");
5587
          $buf .= $cc1 . $cc2;
5588
        }
5589 1
5590
      } else { // it doesn't need conversion
5591 1
        $buf .= $c1;
5592 1
      }
5593 1
    }
5594 1
5595 1
    self::checkForSupport();
5596 1
5597 1
    // decode unicode escape sequences
5598 1
    $buf = preg_replace_callback(
5599 1
        '/\\\\u([0-9a-f]{4})/i',
5600 1
        function ($match) {
5601 1
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
5602
        },
5603
        $buf
5604
    );
5605
5606
    // decode UTF-8 codepoints
5607
    $buf = preg_replace_callback(
5608
        '/&#\d{2,4};/',
5609
        function ($match) {
5610
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
5611
        },
5612
        $buf
5613
    );
5614
5615
    return $buf;
5616
  }
5617
5618
  /**
5619
   * Convert a string into win1252.
5620
   *
5621 1
   * @param  string|array $str
5622 1
   *
5623
   * @return string|array
5624
   */
5625
  protected static function to_win1252($str)
5626
  {
5627
    if (is_array($str)) {
5628
5629
      foreach ($str as $k => $v) {
5630
        /** @noinspection AlterInForeachInspection */
5631
        $str[$k] = self::to_win1252($v);
5632
      }
5633
5634
      return $str;
5635
    } elseif (is_string($str)) {
5636
      return self::utf8_decode($str);
5637
    } else {
5638
      return $str;
5639
    }
5640
  }
5641
5642
  /**
5643
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
5644
   *
5645
   * INFO: This is slower then "trim()"
5646
   *
5647
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
5648
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
5649
   *
5650
   * @param    string $str   The string to be trimmed
5651
   * @param    string $chars Optional characters to be stripped
5652
   *
5653
   * @return   string The trimmed string
5654
   */
5655
  public static function trim($str = '', $chars = INF)
5656
  {
5657
    $str = (string)$str;
5658
5659
    if (!isset($str[0])) {
5660
      return '';
5661
    }
5662
5663
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
5664
    if ($chars === INF || !$chars) {
5665
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
5666
    }
5667
5668
    return self::rtrim(self::ltrim($str, $chars), $chars);
5669
  }
5670
5671
  /**
5672
   * Makes string's first char uppercase.
5673
   *
5674
   * @param    string $str The input string
5675
   *
5676
   * @return   string The resulting string
5677
   */
5678
  public static function ucfirst($str)
5679
  {
5680
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
5681 2
  }
5682
5683 2
  /**
5684 2
   * alias for "UTF8::ucfirst"
5685 2
   *
5686
   * @param $str
5687 2
   *
5688
   * @return string
5689 2
   */
5690
  public static function ucword($str)
5691
  {
5692 2
    return self::ucfirst($str);
5693
  }
5694 2
5695 2
  /**
5696 2
   * Uppercase for all words in the string.
5697
   *
5698 1
   * @param  string $str
5699 1
   * @param array   $exceptions
5700 1
   *
5701
   * @return string
5702
   */
5703
  public static function ucwords($str, $exceptions = array())
5704
  {
5705
    if (!$str) {
5706 2
      return '';
5707
    }
5708 2
5709 2
    // init
5710
    $words = explode(' ', $str);
5711 2
    $newwords = array();
5712
5713
    if (count($exceptions) > 0) {
5714
      $useExceptions = true;
5715
    } else {
5716
      $useExceptions = false;
5717
    }
5718
5719
    foreach ($words as $word) {
5720
      if (
5721 1
          ($useExceptions === false)
5722
          ||
5723 1
          (
5724
              $useExceptions === true
5725 1
              &&
5726 1
              !in_array($word, $exceptions, true)
5727 1
          )
5728
      ) {
5729 1
        $word = self::ucfirst($word);
5730
      }
5731
      $newwords[] = $word;
5732
    }
5733
5734
    return self::ucfirst(implode(' ', $newwords));
5735
  }
5736
5737
  /**
5738
   * Multi decode html entity & fix urlencoded-win1252-chars.
5739
   *
5740
   * e.g:
5741
   * 'D&#252;sseldorf'               => 'Düsseldorf'
5742
   * 'D%FCsseldorf'                  => 'Düsseldorf'
5743
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5744
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5745
   * 'Düsseldorf'                   => 'Düsseldorf'
5746
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5747
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5748
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5749
   *
5750
   * @param string $str
5751
   *
5752
   * @return string
5753
   */
5754
  public static function urldecode($str)
5755
  {
5756
    $str = (string)$str;
5757
5758
    if (!isset($str[0])) {
5759
      return '';
5760
    }
5761
5762
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
5763
5764
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
5765
5766
    $str = self::fix_simple_utf8(
5767
        rawurldecode(
5768
            self::html_entity_decode(
5769
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5770
                $flags
5771
            )
5772
        )
5773
    );
5774
5775
    return (string)$str;
5776
  }
5777
5778
  /**
5779
   * Return a array with "urlencoded"-win1252 -> UTF-8
5780
   *
5781
   * @return mixed
5782
   */
5783
  protected static function urldecode_fix_win1252_chars()
5784
  {
5785
    static $array = array(
5786
        '%20' => ' ',
5787
        '%21' => '!',
5788
        '%22' => '"',
5789
        '%23' => '#',
5790
        '%24' => '$',
5791
        '%25' => '%',
5792
        '%26' => '&',
5793
        '%27' => "'",
5794
        '%28' => '(',
5795
        '%29' => ')',
5796
        '%2A' => '*',
5797
        '%2B' => '+',
5798
        '%2C' => ',',
5799
        '%2D' => '-',
5800
        '%2E' => '.',
5801
        '%2F' => '/',
5802
        '%30' => '0',
5803
        '%31' => '1',
5804
        '%32' => '2',
5805
        '%33' => '3',
5806
        '%34' => '4',
5807
        '%35' => '5',
5808
        '%36' => '6',
5809
        '%37' => '7',
5810
        '%38' => '8',
5811
        '%39' => '9',
5812
        '%3A' => ':',
5813
        '%3B' => ';',
5814
        '%3C' => '<',
5815
        '%3D' => '=',
5816
        '%3E' => '>',
5817
        '%3F' => '?',
5818
        '%40' => '@',
5819
        '%41' => 'A',
5820
        '%42' => 'B',
5821
        '%43' => 'C',
5822
        '%44' => 'D',
5823
        '%45' => 'E',
5824
        '%46' => 'F',
5825
        '%47' => 'G',
5826
        '%48' => 'H',
5827
        '%49' => 'I',
5828
        '%4A' => 'J',
5829
        '%4B' => 'K',
5830
        '%4C' => 'L',
5831
        '%4D' => 'M',
5832
        '%4E' => 'N',
5833
        '%4F' => 'O',
5834
        '%50' => 'P',
5835
        '%51' => 'Q',
5836
        '%52' => 'R',
5837
        '%53' => 'S',
5838
        '%54' => 'T',
5839
        '%55' => 'U',
5840
        '%56' => 'V',
5841
        '%57' => 'W',
5842
        '%58' => 'X',
5843
        '%59' => 'Y',
5844
        '%5A' => 'Z',
5845
        '%5B' => '[',
5846
        '%5C' => '\\',
5847
        '%5D' => ']',
5848
        '%5E' => '^',
5849
        '%5F' => '_',
5850
        '%60' => '`',
5851
        '%61' => 'a',
5852
        '%62' => 'b',
5853
        '%63' => 'c',
5854
        '%64' => 'd',
5855
        '%65' => 'e',
5856
        '%66' => 'f',
5857
        '%67' => 'g',
5858
        '%68' => 'h',
5859
        '%69' => 'i',
5860
        '%6A' => 'j',
5861
        '%6B' => 'k',
5862
        '%6C' => 'l',
5863
        '%6D' => 'm',
5864
        '%6E' => 'n',
5865
        '%6F' => 'o',
5866
        '%70' => 'p',
5867
        '%71' => 'q',
5868
        '%72' => 'r',
5869
        '%73' => 's',
5870
        '%74' => 't',
5871
        '%75' => 'u',
5872
        '%76' => 'v',
5873
        '%77' => 'w',
5874
        '%78' => 'x',
5875
        '%79' => 'y',
5876
        '%7A' => 'z',
5877
        '%7B' => '{',
5878
        '%7C' => '|',
5879
        '%7D' => '}',
5880
        '%7E' => '~',
5881
        '%7F' => '',
5882
        '%80' => '`',
5883
        '%81' => '',
5884
        '%82' => '‚',
5885
        '%83' => 'ƒ',
5886
        '%84' => '„',
5887
        '%85' => '…',
5888
        '%86' => '†',
5889
        '%87' => '‡',
5890
        '%88' => 'ˆ',
5891
        '%89' => '‰',
5892
        '%8A' => 'Š',
5893
        '%8B' => '‹',
5894
        '%8C' => 'Œ',
5895
        '%8D' => '',
5896
        '%8E' => 'Ž',
5897
        '%8F' => '',
5898
        '%90' => '',
5899
        '%91' => '‘',
5900
        '%92' => '’',
5901
        '%93' => '“',
5902
        '%94' => '”',
5903
        '%95' => '•',
5904
        '%96' => '–',
5905
        '%97' => '—',
5906
        '%98' => '˜',
5907
        '%99' => '™',
5908
        '%9A' => 'š',
5909
        '%9B' => '›',
5910
        '%9C' => 'œ',
5911
        '%9D' => '',
5912
        '%9E' => 'ž',
5913
        '%9F' => 'Ÿ',
5914
        '%A0' => '',
5915
        '%A1' => '¡',
5916
        '%A2' => '¢',
5917
        '%A3' => '£',
5918
        '%A4' => '¤',
5919
        '%A5' => '¥',
5920
        '%A6' => '¦',
5921
        '%A7' => '§',
5922
        '%A8' => '¨',
5923
        '%A9' => '©',
5924
        '%AA' => 'ª',
5925
        '%AB' => '«',
5926
        '%AC' => '¬',
5927
        '%AD' => '',
5928
        '%AE' => '®',
5929
        '%AF' => '¯',
5930
        '%B0' => '°',
5931
        '%B1' => '±',
5932
        '%B2' => '²',
5933
        '%B3' => '³',
5934
        '%B4' => '´',
5935
        '%B5' => 'µ',
5936
        '%B6' => '¶',
5937
        '%B7' => '·',
5938
        '%B8' => '¸',
5939
        '%B9' => '¹',
5940
        '%BA' => 'º',
5941
        '%BB' => '»',
5942
        '%BC' => '¼',
5943
        '%BD' => '½',
5944
        '%BE' => '¾',
5945
        '%BF' => '¿',
5946
        '%C0' => 'À',
5947
        '%C1' => 'Á',
5948
        '%C2' => 'Â',
5949
        '%C3' => 'Ã',
5950
        '%C4' => 'Ä',
5951
        '%C5' => 'Å',
5952
        '%C6' => 'Æ',
5953
        '%C7' => 'Ç',
5954
        '%C8' => 'È',
5955
        '%C9' => 'É',
5956
        '%CA' => 'Ê',
5957
        '%CB' => 'Ë',
5958
        '%CC' => 'Ì',
5959
        '%CD' => 'Í',
5960
        '%CE' => 'Î',
5961
        '%CF' => 'Ï',
5962
        '%D0' => 'Ð',
5963
        '%D1' => 'Ñ',
5964
        '%D2' => 'Ò',
5965
        '%D3' => 'Ó',
5966
        '%D4' => 'Ô',
5967
        '%D5' => 'Õ',
5968
        '%D6' => 'Ö',
5969
        '%D7' => '×',
5970
        '%D8' => 'Ø',
5971
        '%D9' => 'Ù',
5972
        '%DA' => 'Ú',
5973
        '%DB' => 'Û',
5974
        '%DC' => 'Ü',
5975
        '%DD' => 'Ý',
5976
        '%DE' => 'Þ',
5977
        '%DF' => 'ß',
5978
        '%E0' => 'à',
5979
        '%E1' => 'á',
5980
        '%E2' => 'â',
5981
        '%E3' => 'ã',
5982
        '%E4' => 'ä',
5983
        '%E5' => 'å',
5984
        '%E6' => 'æ',
5985
        '%E7' => 'ç',
5986
        '%E8' => 'è',
5987
        '%E9' => 'é',
5988
        '%EA' => 'ê',
5989
        '%EB' => 'ë',
5990
        '%EC' => 'ì',
5991
        '%ED' => 'í',
5992
        '%EE' => 'î',
5993
        '%EF' => 'ï',
5994
        '%F0' => 'ð',
5995
        '%F1' => 'ñ',
5996
        '%F2' => 'ò',
5997
        '%F3' => 'ó',
5998
        '%F4' => 'ô',
5999
        '%F5' => 'õ',
6000
        '%F6' => 'ö',
6001
        '%F7' => '÷',
6002
        '%F8' => 'ø',
6003
        '%F9' => 'ù',
6004
        '%FA' => 'ú',
6005
        '%FB' => 'û',
6006
        '%FC' => 'ü',
6007
        '%FD' => 'ý',
6008
        '%FE' => 'þ',
6009
        '%FF' => 'ÿ',
6010
    );
6011
6012
    return $array;
6013
  }
6014
6015
  /**
6016
   * Decodes an UTF-8 string to ISO-8859-1.
6017
   *
6018
   * @param string $str
6019
   *
6020
   * @return string
6021
   */
6022
  public static function utf8_decode($str)
6023
  {
6024
    static $utf8ToWin1252Keys = null;
6025
    static $utf8ToWin1252Values = null;
6026
6027
    $str = (string)$str;
6028
6029
    if (!isset($str[0])) {
6030
      return '';
6031
    }
6032
6033
    // init
6034
    self::checkForSupport();
6035
6036
    $str = self::to_utf8($str);
6037
6038
    if ($utf8ToWin1252Keys === null) {
6039
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6040
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6041
    }
6042
6043
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6044
  }
6045
6046
  /**
6047
   * Encodes an ISO-8859-1 string to UTF-8.
6048
   *
6049
   * @param string $str
6050
   *
6051
   * @return string
6052
   */
6053
  public static function utf8_encode($str)
6054
  {
6055
    $str = utf8_encode($str);
6056
6057
    if (false === strpos($str, "\xC2")) {
6058
      return $str;
6059
    } else {
6060
6061
      static $cp1252ToUtf8Keys = null;
6062
      static $cp1252ToUtf8Values = null;
6063
6064
      if ($cp1252ToUtf8Keys === null) {
6065
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6066
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6067
      }
6068
6069
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6070
    }
6071
  }
6072
6073
  /**
6074
   * fix -> utf8-win1252 chars
6075
   *
6076
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
6077
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6078
   * See: http://en.wikipedia.org/wiki/Windows-1252
6079
   *
6080
   * @deprecated use "UTF8::fix_simple_utf8()"
6081
   *
6082
   * @param   string $str
6083
   *
6084
   * @return  string
6085
   */
6086
  public static function utf8_fix_win1252_chars($str)
6087
  {
6088
    return self::fix_simple_utf8($str);
6089
  }
6090
6091
  /**
6092
   * Returns an array with all utf8 whitespace characters.
6093
   *
6094
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6095
   *
6096
   * @author: Derek E. [email protected]
6097
   *
6098
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6099
   *         as defined in above URL
6100
   */
6101
  public static function whitespace_table()
6102
  {
6103
    return self::$whitespaceTable;
6104
  }
6105
6106
  /**
6107
   * Limit the number of words in a string.
6108
   *
6109
   * @param  string $str
6110
   * @param  int    $words
6111
   * @param  string $strAddOn
6112
   *
6113
   * @return string
6114
   */
6115
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6116
  {
6117
    if (!isset($str[0])) {
6118
      return '';
6119
    }
6120
6121
    $words = (int)$words;
6122
6123
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6124
6125
    if (
6126
        !isset($matches[0])
6127
        ||
6128
        self::strlen($str) === self::strlen($matches[0])
6129
    ) {
6130
      return $str;
6131
    }
6132
6133
    return self::rtrim($matches[0]) . $strAddOn;
6134
  }
6135
6136
  /**
6137
   * Wraps a string to a given number of characters.
6138
   *
6139
   * @param string $str
6140
   * @param int    $width
6141
   * @param string $break
6142
   * @param bool   $cut
6143
   *
6144
   * @return false|string Returns the given string wrapped at the specified length.
6145
   */
6146
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6147
  {
6148
    if (false === wordwrap('-', $width, $break, $cut)) {
6149
      return false;
6150
    }
6151
6152
    if (is_string($break)) {
6153
      $break = (string)$break;
6154
    }
6155
6156
    $w = '';
6157
    $str = explode($break, $str);
6158
    $iLen = count($str);
6159
    $chars = array();
6160
6161
    if (1 === $iLen && '' === $str[0]) {
6162
      return '';
6163
    }
6164
6165
    /** @noinspection ForeachInvariantsInspection */
6166
    for ($i = 0; $i < $iLen; ++$i) {
6167
6168
      if ($i) {
6169
        $chars[] = $break;
6170
        $w .= '#';
6171
      }
6172
6173
      $c = $str[$i];
6174
      unset($str[$i]);
6175
6176
      foreach (self::split($c) as $c) {
6177
        $chars[] = $c;
6178
        $w .= ' ' === $c ? ' ' : '?';
6179
      }
6180
    }
6181
6182
    $str = '';
6183
    $j = 0;
6184
    $b = $i = -1;
6185
    $w = wordwrap($w, $width, '#', $cut);
6186
6187
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6188
      for (++$i; $i < $b; ++$i) {
6189
        $str .= $chars[$j];
6190
        unset($chars[$j++]);
6191
      }
6192
6193
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6194
        unset($chars[$j++]);
6195
      }
6196
6197
      $str .= $break;
6198
    }
6199
6200
    return $str . implode('', $chars);
6201
  }
6202
6203
  /**
6204
   * Returns an array of Unicode White Space characters.
6205
   *
6206
   * @return   array An array with numeric code point as key and White Space Character as value.
6207
   */
6208
  public static function ws()
6209
  {
6210
    return self::$whitespace;
6211
  }
6212
6213
}
6214