Completed
Push — master ( 807838...d46683 )
by Lars
08:06 queued 21s
created

UTF8::chr_to_hex()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2
Metric Value
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 2
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
use Patchwork\PHP\Shim\Intl;
6
use Patchwork\PHP\Shim\Normalizer;
7
use Patchwork\PHP\Shim\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric Code Point => UTF-8 Character
84
   *
85
   * @var array
86
   */
87
  protected static $whitespace = array(
88
      0     => "\x0",
89
      //NUL Byte
90
      9     => "\x9",
91
      //Tab
92
      10    => "\xa",
93
      //New Line
94
      11    => "\xb",
95
      //Vertical Tab
96
      13    => "\xd",
97
      //Carriage Return
98
      32    => "\x20",
99
      //Ordinary Space
100
      160   => "\xc2\xa0",
101
      //NO-BREAK SPACE
102
      5760  => "\xe1\x9a\x80",
103
      //OGHAM SPACE MARK
104
      6158  => "\xe1\xa0\x8e",
105
      //MONGOLIAN VOWEL SEPARATOR
106
      8192  => "\xe2\x80\x80",
107
      //EN QUAD
108
      8193  => "\xe2\x80\x81",
109
      //EM QUAD
110
      8194  => "\xe2\x80\x82",
111
      //EN SPACE
112
      8195  => "\xe2\x80\x83",
113
      //EM SPACE
114
      8196  => "\xe2\x80\x84",
115
      //THREE-PER-EM SPACE
116
      8197  => "\xe2\x80\x85",
117
      //FOUR-PER-EM SPACE
118
      8198  => "\xe2\x80\x86",
119
      //SIX-PER-EM SPACE
120
      8199  => "\xe2\x80\x87",
121
      //FIGURE SPACE
122
      8200  => "\xe2\x80\x88",
123
      //PUNCTUATION SPACE
124
      8201  => "\xe2\x80\x89",
125
      //THIN SPACE
126
      8202  => "\xe2\x80\x8a",
127
      //HAIR SPACE
128
      8232  => "\xe2\x80\xa8",
129
      //LINE SEPARATOR
130
      8233  => "\xe2\x80\xa9",
131
      //PARAGRAPH SEPARATOR
132
      8239  => "\xe2\x80\xaf",
133
      //NARROW NO-BREAK SPACE
134
      8287  => "\xe2\x81\x9f",
135
      //MEDIUM MATHEMATICAL SPACE
136
      12288 => "\xe3\x80\x80"
137
      //IDEOGRAPHIC SPACE
138
  );
139
140
  /**
141
   * @var array
142
   */
143
  protected static $whitespaceTable = array(
144
      'SPACE'                     => "\x20",
145
      'NO-BREAK SPACE'            => "\xc2\xa0",
146
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
147
      'EN QUAD'                   => "\xe2\x80\x80",
148
      'EM QUAD'                   => "\xe2\x80\x81",
149
      'EN SPACE'                  => "\xe2\x80\x82",
150
      'EM SPACE'                  => "\xe2\x80\x83",
151
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
152
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
153
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
154
      'FIGURE SPACE'              => "\xe2\x80\x87",
155
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
156
      'THIN SPACE'                => "\xe2\x80\x89",
157
      'HAIR SPACE'                => "\xe2\x80\x8a",
158
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
159
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
160
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
161
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  protected static $commonCaseFold = array(
168
      'ſ'            => 's',
169
      "\xCD\x85"     => 'ι',
170
      'ς'            => 'σ',
171
      "\xCF\x90"     => 'β',
172
      "\xCF\x91"     => 'θ',
173
      "\xCF\x95"     => 'φ',
174
      "\xCF\x96"     => 'π',
175
      "\xCF\xB0"     => 'κ',
176
      "\xCF\xB1"     => 'ρ',
177
      "\xCF\xB5"     => 'ε',
178
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
179
      "\xE1\xBE\xBE" => 'ι',
180
  );
181
182
  /**
183
   * @var array
184
   */
185
  protected static $brokenUtf8ToUtf8 = array(
186
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
187
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
188
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
189
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
190
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
191
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
192
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
193
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
194
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
195
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
196
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
197
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
198
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
199
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
200
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
201
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
202
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
203
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
204
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
205
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
206
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
207
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
208
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
209
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
210
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
211
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
212
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
213
      'ü'       => 'ü',
214
      'ä'       => 'ä',
215
      'ö'       => 'ö',
216
      'Ö'       => 'Ö',
217
      'ß'       => 'ß',
218
      'Ã '       => 'à',
219
      'á'       => 'á',
220
      'â'       => 'â',
221
      'ã'       => 'ã',
222
      'ù'       => 'ù',
223
      'ú'       => 'ú',
224
      'û'       => 'û',
225
      'Ù'       => 'Ù',
226
      'Ú'       => 'Ú',
227
      'Û'       => 'Û',
228
      'Ü'       => 'Ü',
229
      'ò'       => 'ò',
230
      'ó'       => 'ó',
231
      'ô'       => 'ô',
232
      'è'       => 'è',
233
      'é'       => 'é',
234
      'ê'       => 'ê',
235
      'ë'       => 'ë',
236
      'À'       => 'À',
237
      'Á'       => 'Á',
238
      'Â'       => 'Â',
239
      'Ã'       => 'Ã',
240
      'Ä'       => 'Ä',
241
      'Ã…'       => 'Å',
242
      'Ç'       => 'Ç',
243
      'È'       => 'È',
244
      'É'       => 'É',
245
      'Ê'       => 'Ê',
246
      'Ë'       => 'Ë',
247
      'ÃŒ'       => 'Ì',
248
      'Í'       => 'Í',
249
      'ÃŽ'       => 'Î',
250
      'Ï'       => 'Ï',
251
      'Ñ'       => 'Ñ',
252
      'Ã’'       => 'Ò',
253
      'Ó'       => 'Ó',
254
      'Ô'       => 'Ô',
255
      'Õ'       => 'Õ',
256
      'Ø'       => 'Ø',
257
      'Ã¥'       => 'å',
258
      'æ'       => 'æ',
259
      'ç'       => 'ç',
260
      'ì'       => 'ì',
261
      'í'       => 'í',
262
      'î'       => 'î',
263
      'ï'       => 'ï',
264
      'ð'       => 'ð',
265
      'ñ'       => 'ñ',
266
      'õ'       => 'õ',
267
      'ø'       => 'ø',
268
      'ý'       => 'ý',
269
      'ÿ'       => 'ÿ',
270
      '€'      => '€',
271
  );
272
273
  /**
274
   * @var array
275
   */
276
  protected static $utf8ToWin1252 = array(
277
      "\xe2\x82\xac" => "\x80", // EURO SIGN
278
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
279
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
280
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
281
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
282
      "\xe2\x80\xa0" => "\x86", // DAGGER
283
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
284
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
285
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
286
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
287
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
288
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
289
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
290
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
291
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
292
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
293
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
294
      "\xe2\x80\xa2" => "\x95", // BULLET
295
      "\xe2\x80\x93" => "\x96", // EN DASH
296
      "\xe2\x80\x94" => "\x97", // EM DASH
297
      "\xcb\x9c"     => "\x98", // SMALL TILDE
298
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
299
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
300
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
301
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
302
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
303
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
304
  );
305
306
  /**
307
   * @var array
308
   */
309
  protected static $utf8MSWord = array(
310
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
311
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
312
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
313
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
314
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
315
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
316
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
317
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
318
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
319
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
320
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
321
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
322
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
323
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
324
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
325
  );
326
327
  /**
328
   * @var array
329
   */
330
  private static $support = array();
331
332
  /**
333
   * __construct()
334
   */
335 1
  public function __construct()
336
  {
337 1
    self::checkForSupport();
338 1
  }
339
340
  /**
341
   * check for UTF8-Support
342
   */
343 151
  public static function checkForSupport()
344
  {
345 151
    if (!isset(self::$support['mbstring'])) {
346
347 1
      self::$support['mbstring'] = self::mbstring_loaded();
348 1
      self::$support['iconv'] = self::iconv_loaded();
349 1
      self::$support['intl'] = self::intl_loaded();
350 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
351
352 1
      Bootup::initAll(); // Enables the portablity layer and configures PHP for UTF-8
353 1
      Bootup::filterRequestUri(); // Redirects to an UTF-8 encoded URL if it's not already the case
354 1
      Bootup::filterRequestInputs(); // Normalizes HTTP inputs to UTF-8 NFC
355
    }
356 151
  }
357
358
  /**
359
   * checks whether mbstring is available on the server
360
   *
361
   * @return   bool True if available, False otherwise
362
   */
363 2
  public static function mbstring_loaded()
364
  {
365 2
    $return = extension_loaded('mbstring');
366
367 2
    if ($return === true) {
368 2
      mb_internal_encoding('UTF-8');
369
    }
370
371 2
    return $return;
372
  }
373
374
  /**
375
   * checks whether iconv is available on the server
376
   *
377
   * @return   bool True if available, False otherwise
378
   */
379 1
  public static function iconv_loaded()
380
  {
381 1
    return extension_loaded('iconv') ? true : false;
382
  }
383
384
  /**
385
   * checks whether intl is available on the server
386
   *
387
   * @return   bool True if available, False otherwise
388
   */
389 1
  public static function intl_loaded()
390
  {
391 1
    return extension_loaded('intl') ? true : false;
392
  }
393
394
  /**
395
   * checks if \u modifier is available that enables Unicode support in PCRE.
396
   *
397
   * @return   bool True if support is available, false otherwise
398
   */
399 30
  public static function pcre_utf8_support()
400
  {
401
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
402 30
    return (bool)@preg_match('//u', '');
403
  }
404
405
  /**
406
   * alias for "UTF8::to_ascii()"
407
   *
408
   * @param string $s The input string e.g. a UTF-8 String
409
   * @param string $subst_chr
410
   *
411
   * @return string
412
   */
413 6
  public static function toAscii($s, $subst_chr = '?')
414
  {
415 6
    return self::to_ascii($s, $subst_chr);
416
  }
417
418
  /**
419
   * convert to ASCII
420
   *
421
   * @param string $s The input string e.g. a UTF-8 String
422
   * @param string $subst_chr
423
   *
424
   * @return string
425
   */
426 7
  public static function to_ascii($s, $subst_chr = '?')
427
  {
428 7
    static $translitExtra = null;
429
430 7
    $s = (string)$s;
431
432 7
    if (!isset($s[0])) {
433 2
      return '';
434
    }
435
436 6
    $s = self::clean($s);
437
438 6
    if (preg_match("/[\x80-\xFF]/", $s)) {
439 3
      $s = Normalizer::normalize($s, Normalizer::NFKC);
440
441 3
      $glibc = 'glibc' === ICONV_IMPL;
442
443 3
      preg_match_all('/./u', $s, $s);
444
445
      /** @noinspection AlterInForeachInspection */
446 3
      foreach ($s[0] as &$c) {
447
448 3
        if (!isset($c[1])) {
449 3
          continue;
450
        }
451
452 3
        if ($glibc) {
453 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
454
        } else {
455
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
456
457
          if ($t !== false && is_string($t)) {
458
            if (!isset($t[0])) {
459
              $t = '?';
460
            } elseif (isset($t[1])) {
461
              $t = ltrim($t, '\'`"^~');
462
            }
463
          }
464
        }
465
466 3
        if ('?' === $t) {
467
468 1
          if ($translitExtra === null) {
469 1
            $translitExtra = (array)self::getData('translit_extra');
470
          }
471
472 1
          if (isset($translitExtra[$c])) {
473 1
            $t = $translitExtra[$c];
474
          } else {
475 1
            $t = Normalizer::normalize($c, Normalizer::NFD);
476
477 1
            if ($t[0] < "\x80") {
478
              $t = $t[0];
479
            } else {
480 1
              $t = $subst_chr;
481
            }
482
          }
483
        }
484
485 3
        if ('?' === $t) {
486 1
          $t = self::str_transliterate($c, $subst_chr);
487
        }
488
489 3
        $c = $t;
490
      }
491
492 3
      $s = implode('', $s[0]);
493
    }
494
495 6
    return $s;
496
  }
497
498
  /**
499
   * accepts a string and removes all non-UTF-8 characters from it.
500
   *
501
   * @param string $str              The string to be sanitized.
502
   * @param bool   $remove_bom
503
   * @param bool   $normalize_whitespace
504
   * @param bool   $normalize_msword e.g.: "…" => "..."
505
   *
506
   * @return string Clean UTF-8 encoded string
507
   */
508 24
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false)
509
  {
510
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
511
    // caused connection reset problem on larger strings
512
513
    $regx = '/
514
       (
515
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
516
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
517
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
518
        |   [\xE1-\xEC][\x80-\xBF]{2}
519
        |   \xED[\x80-\x9F][\x80-\xBF]
520
        |   [\xEE-\xEF][\x80-\xBF]{2}
521
        ){1,50}                          # ...one or more times
522
       )
523
       | .                                  # anything else
524 24
       /x';
525 24
    $str = preg_replace($regx, '$1', $str);
526
527 24
    $str = self::replace_diamond_question_mark($str, '');
528 24
    $str = self::remove_invisible_characters($str);
529
530 24
    if ($normalize_whitespace === true) {
531 7
      $str = self::normalize_whitespace($str);
532
    }
533
534 24
    if ($normalize_msword === true) {
535 1
      $str = self::normalize_msword($str);
536
    }
537
538 24
    if ($remove_bom === true) {
539 6
      $str = self::removeBOM($str);
540
    }
541
542 24
    return $str;
543
  }
544
545
  /**
546
   * replace diamond question mark (�)
547
   *
548
   * @param string $str
549
   * @param string $unknown
550
   *
551
   * @return string
552
   */
553 25
  public static function replace_diamond_question_mark($str, $unknown = '?')
554
  {
555 25
    return str_replace(
556
        array(
557 25
            "\xEF\xBF\xBD",
558
            '�',
559
        ),
560
        array(
561 25
            $unknown,
562 25
            $unknown,
563
        ),
564
        $str
565
    );
566
  }
567
568
  /**
569
   * Remove Invisible Characters
570
   *
571
   * This prevents sandwiching null characters
572
   * between ascii characters, like Java\0script.
573
   *
574
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
575
   *
576
   * @param  string $str
577
   * @param  bool   $url_encoded
578
   *
579
   * @return  string
580
   */
581 25
  public static function remove_invisible_characters($str, $url_encoded = true)
582
  {
583
    // init
584 25
    $non_displayables = array();
585
586
    // every control character except newline (dec 10),
587
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
588 25
    if ($url_encoded) {
589 25
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
590 25
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
591
    }
592
593 25
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
594
595
    do {
596 25
      $str = preg_replace($non_displayables, '', $str, -1, $count);
597 25
    } while ($count !== 0);
598
599 25
    return $str;
600
  }
601
602
  /**
603
   * normalize whitespace
604
   *
605
   * @param string $str The string to be normalized.
606
   *
607
   * @return string
608
   */
609 8
  public static function normalize_whitespace($str)
610
  {
611 8
    static $whitespaces = null;
612
613 8
    if ($whitespaces === null) {
614 1
      $whitespaces = array_values(self::$whitespaceTable);
615
    }
616
617 8
    return str_replace($whitespaces, ' ', $str);
618
  }
619
620
  /**
621
   * returns an array with all utf8 whitespace characters as per
622
   * http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
623
   *
624
   * @author: Derek E. [email protected]
625
   *
626
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
627
   *         as defined in above URL
628
   */
629 1
  public static function whitespace_table()
630
  {
631 1
    return self::$whitespaceTable;
632
  }
633
634
  /**
635
   * normalize MS Word Special Chars
636
   *
637
   * @param string $str The string to be normalized.
638
   *
639
   * @return string
640
   */
641 2
  public static function normalize_msword($str)
642
  {
643 2
    static $utf8MSWordKeys = null;
644 2
    static $utf8MSWordValues = null;
645
646 2
    if ($utf8MSWordKeys === null) {
647 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
648 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
649
    }
650
651 2
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
652
  }
653
654
  /**
655
   * remove the BOM from UTF-8 / UTF-16 / UTF-32
656
   *
657
   * @param string $str
658
   *
659
   * @return string
660
   */
661 7
  public static function removeBOM($str = '')
662
  {
663
664
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
665 7 View Code Duplication
    if (substr($str, 0, 4) == pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
666 1
      $str = substr($str, 4);
667
    }
668
669
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
670 7 View Code Duplication
    if (substr($str, 0, 4) == pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
671 1
      $str = substr($str, 4);
672
    }
673
674
    // UTF-8
675 7 View Code Duplication
    if (substr($str, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
676 2
      $str = substr($str, 3);
677
    }
678
679
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
680 7 View Code Duplication
    if (substr($str, 0, 2) == pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
681 1
      $str = substr($str, 2);
682
    }
683
684
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
685 7 View Code Duplication
    if (substr($str, 0, 2) == pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
686 1
      $str = substr($str, 2);
687
    }
688
689 7
    return $str;
690
  }
691
692
  /**
693
   * get data
694
   *
695
   * @param string $file
696
   *
697
   * @return bool|string|array|int false on error
698
   */
699 2
  protected static function getData($file)
700
  {
701 2
    $file = __DIR__ . '/data/' . $file . '.ser';
702 2
    if (file_exists($file)) {
703 2
      return unserialize(file_get_contents($file));
704
    } else {
705
      return false;
706
    }
707
  }
708
709
  /**
710
   * US-ASCII transliterations of Unicode text
711
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
712
   * Warning: you should only pass this well formed UTF-8!
713
   * Be aware it works by making a copy of the input string which it appends transliterated
714
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
715
   * requiring up to the same amount again as the input string
716
   *
717
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
718
   *
719
   * @author <[email protected]>
720
   *
721
   * @param string $str     UTF-8 string to convert
722
   * @param string $unknown Character use if character unknown (default to ?)
723
   *
724
   * @return string US-ASCII string
725
   */
726 8
  public static function str_transliterate($str, $unknown = '?')
727
  {
728 8
    static $UTF8_TO_ASCII;
729
730 8
    $str = (string)$str;
731
732 8
    if (!isset($str[0])) {
733 2
      return '';
734
    }
735
736 7
    $str = self::clean($str);
737
738 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
739 7
    $chars = $ar[0];
740 7
    foreach ($chars as &$c) {
741
742 7
      $ordC0 = ord($c[0]);
743
744 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
745 6
        continue;
746
      }
747
748 4
      $ordC1 = ord($c[1]);
749
750
      // ASCII - next please
751 4
      if ($ordC0 >= 192 && $ordC0 <= 223) {
752 4
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
753
      }
754
755 4
      if ($ordC0 >= 224) {
756 3
        $ordC2 = ord($c[2]);
757
758 3
        if ($ordC0 <= 239) {
759 3
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
760
        }
761
762 3
        if ($ordC0 >= 240) {
763
          $ordC3 = ord($c[3]);
764
765
          if ($ordC0 <= 247) {
766
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
767
          }
768
769
          if ($ordC0 >= 248) {
770
            $ordC4 = ord($c[4]);
771
772 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
773
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
774
            }
775
776
            if ($ordC0 >= 252) {
777
              $ordC5 = ord($c[5]);
778
779 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
780
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
781
              }
782
            }
783
          }
784
        }
785
      }
786
787 4
      if ($ordC0 >= 254 && $ordC0 <= 255) {
788
        $c = $unknown;
789
        continue;
790
      }
791
792 4
      if (!isset($ord)) {
793
        $c = $unknown;
794
        continue;
795
      }
796
797 4
      $bank = $ord >> 8;
798 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
799 2
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
800 2
        if (file_exists($bankfile)) {
801
          /** @noinspection PhpIncludeInspection */
802 2
          include $bankfile;
803
        } else {
804
          $UTF8_TO_ASCII[$bank] = array();
805
        }
806
      }
807
808 4
      $newchar = $ord & 255;
809 4
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
810 4
        $c = $UTF8_TO_ASCII[$bank][$newchar];
811
      } else {
812 4
        $c = $unknown;
813
      }
814
    }
815
816 7
    return implode('', $chars);
817
  }
818
819
  /**
820
   * echo native UTF8-Support libs
821
   */
822
  public static function showSupport()
823
  {
824
    foreach (self::$support as $utf8Support) {
825
      echo $utf8Support . "\n<br>";
826
    }
827
  }
828
829
  /**
830
   * UTF-8 version of htmlentities()
831
   *
832
   * Convert all applicable characters to HTML entities
833
   *
834
   * @link http://php.net/manual/en/function.htmlentities.php
835
   *
836
   * @param string $string        <p>
837
   *                              The input string.
838
   *                              </p>
839
   * @param int    $flags         [optional] <p>
840
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
841
   *                              invalid code unit sequences and the used document type. The default is
842
   *                              ENT_COMPAT | ENT_HTML401.
843
   *                              <table>
844
   *                              Available <i>flags</i> constants
845
   *                              <tr valign="top">
846
   *                              <td>Constant Name</td>
847
   *                              <td>Description</td>
848
   *                              </tr>
849
   *                              <tr valign="top">
850
   *                              <td><b>ENT_COMPAT</b></td>
851
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
852
   *                              </tr>
853
   *                              <tr valign="top">
854
   *                              <td><b>ENT_QUOTES</b></td>
855
   *                              <td>Will convert both double and single quotes.</td>
856
   *                              </tr>
857
   *                              <tr valign="top">
858
   *                              <td><b>ENT_NOQUOTES</b></td>
859
   *                              <td>Will leave both double and single quotes unconverted.</td>
860
   *                              </tr>
861
   *                              <tr valign="top">
862
   *                              <td><b>ENT_IGNORE</b></td>
863
   *                              <td>
864
   *                              Silently discard invalid code unit sequences instead of returning
865
   *                              an empty string. Using this flag is discouraged as it
866
   *                              may have security implications.
867
   *                              </td>
868
   *                              </tr>
869
   *                              <tr valign="top">
870
   *                              <td><b>ENT_SUBSTITUTE</b></td>
871
   *                              <td>
872
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
873
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
874
   *                              </td>
875
   *                              </tr>
876
   *                              <tr valign="top">
877
   *                              <td><b>ENT_DISALLOWED</b></td>
878
   *                              <td>
879
   *                              Replace invalid code points for the given document type with a
880
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
881
   *                              (otherwise) instead of leaving them as is. This may be useful, for
882
   *                              instance, to ensure the well-formedness of XML documents with
883
   *                              embedded external content.
884
   *                              </td>
885
   *                              </tr>
886
   *                              <tr valign="top">
887
   *                              <td><b>ENT_HTML401</b></td>
888
   *                              <td>
889
   *                              Handle code as HTML 4.01.
890
   *                              </td>
891
   *                              </tr>
892
   *                              <tr valign="top">
893
   *                              <td><b>ENT_XML1</b></td>
894
   *                              <td>
895
   *                              Handle code as XML 1.
896
   *                              </td>
897
   *                              </tr>
898
   *                              <tr valign="top">
899
   *                              <td><b>ENT_XHTML</b></td>
900
   *                              <td>
901
   *                              Handle code as XHTML.
902
   *                              </td>
903
   *                              </tr>
904
   *                              <tr valign="top">
905
   *                              <td><b>ENT_HTML5</b></td>
906
   *                              <td>
907
   *                              Handle code as HTML 5.
908
   *                              </td>
909
   *                              </tr>
910
   *                              </table>
911
   *                              </p>
912
   * @param string $encoding      [optional] <p>
913
   *                              Like <b>htmlspecialchars</b>,
914
   *                              <b>htmlentities</b> takes an optional third argument
915
   *                              <i>encoding</i> which defines encoding used in
916
   *                              conversion.
917
   *                              Although this argument is technically optional, you are highly
918
   *                              encouraged to specify the correct value for your code.
919
   *                              </p>
920
   * @param bool   $double_encode [optional] <p>
921
   *                              When <i>double_encode</i> is turned off PHP will not
922
   *                              encode existing html entities. The default is to convert everything.
923
   *                              </p>
924
   *
925
   *
926
   * @return string the encoded string.
927
   * </p>
928
   * <p>
929
   * If the input <i>string</i> contains an invalid code unit
930
   * sequence within the given <i>encoding</i> an empty string
931
   * will be returned, unless either the <b>ENT_IGNORE</b> or
932
   * <b>ENT_SUBSTITUTE</b> flags are set.
933
   */
934 1
  public static function htmlentities($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
935
  {
936 1
    return htmlentities($string, $flags, $encoding, $double_encode);
937
  }
938
939
  /**
940
   * UTF-8 version of htmlspecialchars()
941
   *
942
   * Convert special characters to HTML entities
943
   *
944
   * @link http://php.net/manual/en/function.htmlspecialchars.php
945
   *
946
   * @param string $string        <p>
947
   *                              The string being converted.
948
   *                              </p>
949
   * @param int    $flags         [optional] <p>
950
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
951
   *                              invalid code unit sequences and the used document type. The default is
952
   *                              ENT_COMPAT | ENT_HTML401.
953
   *                              <table>
954
   *                              Available <i>flags</i> constants
955
   *                              <tr valign="top">
956
   *                              <td>Constant Name</td>
957
   *                              <td>Description</td>
958
   *                              </tr>
959
   *                              <tr valign="top">
960
   *                              <td><b>ENT_COMPAT</b></td>
961
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
962
   *                              </tr>
963
   *                              <tr valign="top">
964
   *                              <td><b>ENT_QUOTES</b></td>
965
   *                              <td>Will convert both double and single quotes.</td>
966
   *                              </tr>
967
   *                              <tr valign="top">
968
   *                              <td><b>ENT_NOQUOTES</b></td>
969
   *                              <td>Will leave both double and single quotes unconverted.</td>
970
   *                              </tr>
971
   *                              <tr valign="top">
972
   *                              <td><b>ENT_IGNORE</b></td>
973
   *                              <td>
974
   *                              Silently discard invalid code unit sequences instead of returning
975
   *                              an empty string. Using this flag is discouraged as it
976
   *                              may have security implications.
977
   *                              </td>
978
   *                              </tr>
979
   *                              <tr valign="top">
980
   *                              <td><b>ENT_SUBSTITUTE</b></td>
981
   *                              <td>
982
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
983
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
984
   *                              </td>
985
   *                              </tr>
986
   *                              <tr valign="top">
987
   *                              <td><b>ENT_DISALLOWED</b></td>
988
   *                              <td>
989
   *                              Replace invalid code points for the given document type with a
990
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
991
   *                              (otherwise) instead of leaving them as is. This may be useful, for
992
   *                              instance, to ensure the well-formedness of XML documents with
993
   *                              embedded external content.
994
   *                              </td>
995
   *                              </tr>
996
   *                              <tr valign="top">
997
   *                              <td><b>ENT_HTML401</b></td>
998
   *                              <td>
999
   *                              Handle code as HTML 4.01.
1000
   *                              </td>
1001
   *                              </tr>
1002
   *                              <tr valign="top">
1003
   *                              <td><b>ENT_XML1</b></td>
1004
   *                              <td>
1005
   *                              Handle code as XML 1.
1006
   *                              </td>
1007
   *                              </tr>
1008
   *                              <tr valign="top">
1009
   *                              <td><b>ENT_XHTML</b></td>
1010
   *                              <td>
1011
   *                              Handle code as XHTML.
1012
   *                              </td>
1013
   *                              </tr>
1014
   *                              <tr valign="top">
1015
   *                              <td><b>ENT_HTML5</b></td>
1016
   *                              <td>
1017
   *                              Handle code as HTML 5.
1018
   *                              </td>
1019
   *                              </tr>
1020
   *                              </table>
1021
   *                              </p>
1022
   * @param string $encoding      [optional] <p>
1023
   *                              Defines encoding used in conversion.
1024
   *                              </p>
1025
   *                              <p>
1026
   *                              For the purposes of this function, the encodings
1027
   *                              ISO-8859-1, ISO-8859-15,
1028
   *                              UTF-8, cp866,
1029
   *                              cp1251, cp1252, and
1030
   *                              KOI8-R are effectively equivalent, provided the
1031
   *                              <i>string</i> itself is valid for the encoding, as
1032
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1033
   *                              the same positions in all of these encodings.
1034
   *                              </p>
1035
   * @param bool   $double_encode [optional] <p>
1036
   *                              When <i>double_encode</i> is turned off PHP will not
1037
   *                              encode existing html entities, the default is to convert everything.
1038
   *                              </p>
1039
   *
1040
   * @return string The converted string.
1041
   * </p>
1042
   * <p>
1043
   * If the input <i>string</i> contains an invalid code unit
1044
   * sequence within the given <i>encoding</i> an empty string
1045
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1046
   * <b>ENT_SUBSTITUTE</b> flags are set.
1047
   */
1048 1
  public static function htmlspecialchars($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1049
  {
1050 1
    return htmlspecialchars($string, $flags, $encoding, $double_encode);
1051
  }
1052
1053
  /**
1054
   * alias for "UTF8::is_utf8"
1055
   *
1056
   * @param string $str
1057
   *
1058
   * @return bool
1059
   */
1060 16
  public static function isUtf8($str)
1061
  {
1062 16
    return self::is_utf8($str);
1063
  }
1064
1065
  /**
1066
   * checks whether the passed string contains only byte sequances that
1067
   * appear valid UTF-8 characters.
1068
   *
1069
   * @see    http://hsivonen.iki.fi/php-utf8/
1070
   *
1071
   * @since  1.0
1072
   *
1073
   * @param    string $str The string to be checked
1074
   *
1075
   * @return   bool True if the check succeeds, False Otherwise
1076
   */
1077 31
  public static function is_utf8($str)
1078
  {
1079 31
    $str = (string)$str;
1080
1081 31
    if (!isset($str[0])) {
1082 3
      return true;
1083
    }
1084
1085 29
    if (self::pcre_utf8_support() !== true) {
1086
      // If even just the first character can be matched, when the /u
1087
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
1088
      // invalid, nothing at all will match, even if the string contains
1089
      // some valid sequences
1090
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
1091
    } else {
1092 29
      $mState = 0; // cached expected number of octets after the current octet
1093
      // until the beginning of the next UTF8 character sequence
1094 29
      $mUcs4 = 0; // cached Unicode character
1095 29
      $mBytes = 1; // cached expected number of octets in the current sequence
1096 29
      $len = strlen($str);
1097 29
      for ($i = 0; $i < $len; $i++) {
1098 29
        $in = ord($str[$i]);
1099 29
        if ($mState == 0) {
1100
          // When mState is zero we expect either a US-ASCII character or a
1101
          // multi-octet sequence.
1102 29
          if (0 == (0x80 & ($in))) {
1103
            // US-ASCII, pass straight through.
1104 27
            $mBytes = 1;
1105 27 View Code Duplication
          } elseif (0xC0 == (0xE0 & ($in))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1106
            // First octet of 2 octet sequence
1107 25
            $mUcs4 = ($in);
1108 25
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
1109 25
            $mState = 1;
1110 25
            $mBytes = 2;
1111 21
          } elseif (0xE0 == (0xF0 & ($in))) {
1112
            // First octet of 3 octet sequence
1113 11
            $mUcs4 = ($in);
1114 11
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
1115 11
            $mState = 2;
1116 11
            $mBytes = 3;
1117 11 View Code Duplication
          } elseif (0xF0 == (0xF8 & ($in))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1118
            // First octet of 4 octet sequence
1119 5
            $mUcs4 = ($in);
1120 5
            $mUcs4 = ($mUcs4 & 0x07) << 18;
1121 5
            $mState = 3;
1122 5
            $mBytes = 4;
1123 7
          } elseif (0xF8 == (0xFC & ($in))) {
1124
            /* First octet of 5 octet sequence.
1125
            *
1126
            * This is illegal because the encoded codepoint must be either
1127
            * (a) not the shortest form or
1128
            * (b) outside the Unicode range of 0-0x10FFFF.
1129
            * Rather than trying to resynchronize, we will carry on until the end
1130
            * of the sequence and let the later error handling code catch it.
1131
            */
1132 3
            $mUcs4 = ($in);
1133 3
            $mUcs4 = ($mUcs4 & 0x03) << 24;
1134 3
            $mState = 4;
1135 3
            $mBytes = 5;
1136 5 View Code Duplication
          } elseif (0xFC == (0xFE & ($in))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1137
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
1138 3
            $mUcs4 = ($in);
1139 3
            $mUcs4 = ($mUcs4 & 1) << 30;
1140 3
            $mState = 5;
1141 3
            $mBytes = 6;
1142
          } else {
1143
            /* Current octet is neither in the US-ASCII range nor a legal first
1144
             * octet of a multi-octet sequence.
1145
             */
1146 29
            return false;
1147
          }
1148
        } else {
1149
          // When mState is non-zero, we expect a continuation of the multi-octet
1150
          // sequence
1151 27
          if (0x80 == (0xC0 & ($in))) {
1152
            // Legal continuation.
1153 25
            $shift = ($mState - 1) * 6;
1154 25
            $tmp = $in;
1155 25
            $tmp = ($tmp & 0x0000003F) << $shift;
1156 25
            $mUcs4 |= $tmp;
1157
            /**
1158
             * End of the multi-octet sequence. mUcs4 now contains the final
1159
             * Unicode codepoint to be output
1160
             */
1161 25
            if (0 == --$mState) {
1162
              /*
1163
              * Check for illegal sequences and codepoints.
1164
              */
1165
              // From Unicode 3.1, non-shortest form is illegal
1166
              if (
1167 25
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
1168 25
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
1169 25
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
1170 25
                  (4 < $mBytes) ||
1171
                  // From Unicode 3.2, surrogate characters are illegal
1172 25
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
1173
                  // Codepoints outside the Unicode range are illegal
1174 25
                  ($mUcs4 > 0x10FFFF)
1175
              ) {
1176 5
                return false;
1177
              }
1178
              //initialize UTF8 cache
1179 25
              $mState = 0;
1180 25
              $mUcs4 = 0;
1181 25
              $mBytes = 1;
1182
            }
1183
          } else {
1184
            /**
1185
             *((0xC0 & (*in) != 0x80) && (mState != 0))
1186
             * Incomplete multi-octet sequence.
1187
             */
1188 13
            return false;
1189
          }
1190
        }
1191
      }
1192
1193 11
      return true;
1194
    }
1195
  }
1196
1197
  /**
1198
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
1199
   * mask.
1200
   *
1201
   * @param string $s
1202
   * @param string $mask
1203
   * @param int    $start
1204
   * @param int    $len
1205
   *
1206
   * @return int|null
1207
   */
1208 6
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
1209
  {
1210 6
    if ($start || 2147483647 != $len) {
1211
      $s = self::substr($s, $start, $len);
1212
    }
1213
1214 6
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
1215
  }
1216
1217
  /**
1218
   * Get part of string
1219
   *
1220
   * @link http://php.net/manual/en/function.mb-substr.php
1221
   *
1222
   * @param string  $str       <p>
1223
   *                           The string being checked.
1224
   *                           </p>
1225
   * @param int     $start     <p>
1226
   *                           The first position used in str.
1227
   *                           </p>
1228
   * @param int     $length    [optional] <p>
1229
   *                           The maximum length of the returned string.
1230
   *                           </p>
1231
   * @param string  $encoding
1232
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1233
   *
1234
   * @return string mb_substr returns the portion of
1235
   * str specified by the start and length parameters.
1236
   */
1237 37
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
1238
  {
1239 37
    static $bug62759;
1240
1241 37
    $str = (string)$str;
1242
1243 37
    if (!isset($str[0])) {
1244 9
      return '';
1245
    }
1246
1247
    // init
1248 35
    self::checkForSupport();
1249
1250 35
    if ($cleanUtf8 === true) {
1251
      // iconv and mbstring are not tolerant to invalid encoding
1252
      // further, their behaviour is inconsistent with that of PHP's substr
1253
1254 1
      $str = self::clean($str);
1255
    }
1256
1257 35
    if ($length === null) {
1258 21
      $length = (int)self::strlen($str);
1259
    } else {
1260 31
      $length = (int)$length;
1261
    }
1262
1263 35
    if (self::$support['mbstring'] === true) {
1264
1265
      // INFO: this is only a fallback for old versions
1266 35
      if ($encoding === true || $encoding === false) {
1267 1
        $encoding = 'UTF-8';
1268
      }
1269
1270 35
      return mb_substr($str, $start, $length, $encoding);
1271
    }
1272
1273
    if (self::$support['iconv'] === true) {
1274
1275
      if (!isset($bug62759)) {
1276
        $bug62759 = ('à' === grapheme_substr('éà', 1, -2));
1277
      }
1278
1279
      if ($bug62759) {
1280
        return (string)Intl::grapheme_substr_workaround62759($str, $start, $length);
1281
      } else {
1282
        return (string)grapheme_substr($str, $start, $length);
1283
      }
1284
    }
1285
1286
    // fallback
1287
1288
    // split to array, and remove invalid characters
1289
    $array = self::split($str);
1290
1291
    // extract relevant part, and join to make sting again
1292
    return implode(array_slice($array, $start, $length));
1293
  }
1294
1295
  /**
1296
   * Get string length
1297
   *
1298
   * @link     http://php.net/manual/en/function.mb-strlen.php
1299
   *
1300
   * @param string  $string    The string being checked for length.
1301
   * @param string  $encoding  Set the charset for e.g. "mb_" function
1302
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1303
   *
1304
   * @return int the number of characters in
1305
   *           string str having character encoding
1306
   *           encoding. A multi-byte character is
1307
   *           counted as 1.
1308
   */
1309 63
  public static function strlen($string, $encoding = 'UTF-8', $cleanUtf8 = false)
1310
  {
1311 63
    $string = (string)$string;
1312
1313 63
    if (!isset($string[0])) {
1314 4
      return 0;
1315
    }
1316
1317
    // init
1318 62
    self::checkForSupport();
1319
1320
    // INFO: this is only a fallback for old versions
1321 62
    if ($encoding === true || $encoding === false) {
1322
      $encoding = 'UTF-8';
1323
    }
1324
1325 62
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
1326
      $str = self::clean($string);
1327
    } else {
1328 62
      $str = $string;
1329
    }
1330
1331 62
    return mb_strlen($str, $encoding);
1332
  }
1333
1334
  /**
1335
   * convert a string to an array of Unicode characters.
1336
   *
1337
   * @param    string  $str       The string to split into array.
1338
   * @param    int     $length    Max character length of each array element
1339
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string
1340
   *
1341
   * @return   array An array containing chunks of the string
1342
   */
1343 24
  public static function split($str, $length = 1, $cleanUtf8 = false)
1344
  {
1345 24
    $str = (string)$str;
1346
1347 24
    if (!isset($str[0])) {
1348 5
      return array();
1349
    }
1350
1351
    // init
1352 23
    self::checkForSupport();
1353 23
    $str = (string)$str;
1354 23
    $ret = array();
1355
1356 23
    if (self::$support['pcre_utf8'] === true) {
1357
1358 23
      if ($cleanUtf8 === true) {
1359
        $str = self::clean($str);
1360
      }
1361
1362 23
      preg_match_all('/./us', $str, $retArray);
1363 23
      if (isset($retArray[0])) {
1364 23
        $ret = $retArray[0];
1365
      }
1366 23
      unset($retArray);
1367
1368
    } else {
1369
1370
      // fallback
1371
1372
      $len = strlen($str);
1373
1374
      for ($i = 0; $i < $len; $i++) {
1375
        if (($str[$i] & "\x80") === "\x00") {
1376
          $ret[] = $str[$i];
1377
        } elseif ((($str[$i] & "\xE0") === "\xC0") && (isset($str[$i + 1]))) {
1378
          if (($str[$i + 1] & "\xC0") === "\x80") {
1379
            $ret[] = $str[$i] . $str[$i + 1];
1380
1381
            $i++;
1382
          }
1383 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && (isset($str[$i + 2]))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1384
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
1385
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
1386
1387
            $i += 2;
1388
          }
1389
        } elseif ((($str[$i] & "\xF8") === "\xF0") && (isset($str[$i + 3]))) {
1390 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1391
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
1392
1393
            $i += 3;
1394
          }
1395
        }
1396
      }
1397
    }
1398
1399 23
    if ($length > 1) {
1400 5
      $ret = array_chunk($ret, $length);
1401
1402 5
      $ret = array_map('implode', $ret);
1403
    }
1404
1405 23
    if (isset($ret[0]) && $ret[0] === '') {
1406
      return array();
1407
    }
1408
1409 23
    return $ret;
1410
  }
1411
1412
  /**
1413
   * rxClass
1414
   *
1415
   * @param string $s
1416
   * @param string $class
1417
   *
1418
   * @return string
1419
   */
1420 40
  protected static function rxClass($s, $class = '')
1421
  {
1422 40
    static $rxClassCache = array();
1423
1424 40
    $cacheKey = $s . $class;
1425
1426 40
    if (isset($rxClassCache[$cacheKey])) {
1427 30
      return $rxClassCache[$cacheKey];
1428
    }
1429
1430 16
    $class = array($class);
1431
1432 16
    foreach (self::str_split($s) as $s) {
1433 15
      if ('-' === $s) {
1434
        $class[0] = '-' . $class[0];
1435 15
      } elseif (!isset($s[2])) {
1436 14
        $class[0] .= preg_quote($s, '/');
1437 1
      } elseif (1 === self::strlen($s)) {
1438 1
        $class[0] .= $s;
1439
      } else {
1440 15
        $class[] = $s;
1441
      }
1442
    }
1443
1444 16
    $class[0] = '[' . $class[0] . ']';
1445
1446 16
    if (1 === count($class)) {
1447 16
      $return = $class[0];
1448
    } else {
1449
      $return = '(?:' . implode('|', $class) . ')';
1450
    }
1451
1452 16
    $rxClassCache[$cacheKey] = $return;
1453
1454 16
    return $return;
1455
  }
1456
1457
  /**
1458
   * Convert a string to an array
1459
   *
1460
   * @param string $string
1461
   * @param int    $len
1462
   *
1463
   * @return array
1464
   */
1465 17
  public static function str_split($string, $len = 1)
1466
  {
1467
    // init
1468 17
    self::checkForSupport();
1469
1470 17
    if (1 > $len = (int)$len) {
1471
      $len = func_get_arg(1);
1472
1473
      return str_split($string, $len);
1474
    }
1475
1476 17
    if (self::$support['intl'] === true) {
1477 17
      $a = array();
1478 17
      $p = 0;
1479 17
      $l = strlen($string);
1480 17
      while ($p < $l) {
1481 16
        $a[] = grapheme_extract($string, 1, GRAPHEME_EXTR_COUNT, $p, $p);
1482
      }
1483
    } else {
1484
      preg_match_all('/' . GRAPHEME_CLUSTER_RX . '/u', $string, $a);
1485
      $a = $a[0];
1486
    }
1487
1488 17
    if (1 == $len) {
1489 17
      return $a;
1490
    }
1491
1492 1
    $arrayOutput = array();
1493 1
    $p = -1;
1494
1495
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
1496 1
    foreach ($a as $l => $a) {
1497 1
      if ($l % $len) {
1498 1
        $arrayOutput[$p] .= $a;
1499
      } else {
1500 1
        $arrayOutput[++$p] = $a;
1501
      }
1502
    }
1503
1504 1
    return $arrayOutput;
1505
  }
1506
1507
  /**
1508
   * return width of string
1509
   *
1510
   * @param string $s
1511
   *
1512
   * @return int
1513
   */
1514 1
  public static function strwidth($s)
1515
  {
1516
    // init
1517 1
    self::checkForSupport();
1518
1519 1
    return mb_strwidth($s, 'UTF-8');
1520
  }
1521
1522
  /**
1523
   * Find length of initial segment not matching mask
1524
   *
1525
   * @param string $str
1526
   * @param string $charlist
1527
   * @param int    $start
1528
   * @param int    $len
1529
   *
1530
   * @return int|null
1531
   */
1532 5
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
1533
  {
1534 5
    if ('' === $charlist .= '') {
1535
      return null;
1536
    }
1537
1538 5
    if ($start || 2147483647 != $len) {
1539
      $str = (string)self::substr($str, $start, $len);
1540
    } else {
1541 5
      $str = (string)$str;
1542
    }
1543
1544
    /* @var $len array */
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
1545 5
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
1546 5
      return self::strlen($len[1]);
1547
    } else {
1548
      return self::strlen($str);
1549
    }
1550
  }
1551
1552
  /**
1553
   * checks if the number of Unicode characters in a string are not
1554
   * more than the specified integer.
1555
   *
1556
   * @param    string $str      The original string to be checked.
1557
   * @param    int    $box_size The size in number of chars to be checked against string.
1558
   *
1559
   * @return   bool true if string is less than or equal to $box_size The
1560
   *           false otherwise
1561
   */
1562 1
  public static function fits_inside($str, $box_size)
1563
  {
1564 1
    return (self::strlen($str) <= $box_size);
1565
  }
1566
1567
  /**
1568
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
1569
   *
1570
   * @param string $string
1571
   * @param string $needle
1572
   * @param bool   $before_needle
1573
   *
1574
   * @return false|string
1575
   */
1576 7
  public static function stristr($string, $needle, $before_needle = false)
1577
  {
1578 7
    if ('' === $needle .= '') {
1579 2
      return false;
1580
    }
1581
1582
    // init
1583 5
    self::checkForSupport();
1584
1585 5
    return mb_stristr($string, $needle, $before_needle, 'UTF-8');
1586
  }
1587
1588
  /**
1589
   * Case insensitive string comparisons using a "natural order" algorithm
1590
   *
1591
   * @param string $str1
1592
   * @param string $str2
1593
   *
1594
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
1595
   *             str1 is greater than str2, and 0 if they are equal.
1596
   */
1597 1
  public static function strnatcasecmp($str1, $str2)
1598
  {
1599 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
1600
  }
1601
1602
  /**
1603
   * String comparisons using a "natural order" algorithm
1604
   *
1605
   * @param string $str1
1606
   * @param string $str2
1607
   *
1608
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
1609
   *             str1 is greater than str2, and 0 if they are equal.
1610
   */
1611 2
  public static function strnatcmp($str1, $str2)
1612
  {
1613 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
1614
  }
1615
1616
  /**
1617
   * generic case sensitive transformation for collation matching
1618
   *
1619
   * @param string $s
1620
   *
1621
   * @return string
1622
   */
1623 3
  protected static function strtonatfold($s)
1624
  {
1625 3
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
1626
  }
1627
1628
  /**
1629
   * Unicode transformation for caseless matching
1630
   *
1631
   * @link http://unicode.org/reports/tr21/tr21-5.html
1632
   *
1633
   * @param string $string
1634
   * @param bool   $full
1635
   *
1636
   * @return string
1637
   */
1638 10
  public static function strtocasefold($string, $full = true)
1639
  {
1640 10
    static $fullCaseFold = null;
1641 10
    static $commonCaseFoldKeys = null;
1642 10
    static $commonCaseFoldValues = null;
1643
1644 10
    if ($commonCaseFoldKeys === null) {
1645 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
1646 1
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
1647
    }
1648
1649 10
    $string = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $string);
1650
1651 10
    if ($full) {
1652
1653 10
      if ($fullCaseFold === null) {
1654 1
        $fullCaseFold = self::getData('caseFolding_full');
1655
      }
1656
1657
      /** @noinspection OffsetOperationsInspection */
1658 10
      $string = str_replace($fullCaseFold[0], $fullCaseFold[1], $string);
1659
    }
1660
1661 10
    return self::strtolower($string);
1662
  }
1663
1664
  /**
1665
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
1666
   * Make a string lowercase
1667
   *
1668
   * @link http://php.net/manual/en/function.mb-strtolower.php
1669
   *
1670
   * @param string $str <p>
1671
   *                    The string being lowercased.
1672
   *                    </p>
1673
   * @param string $encoding
1674
   *
1675
   * @return string str with all alphabetic characters converted to lowercase.
1676
   */
1677 19
  public static function strtolower($str, $encoding = 'UTF-8')
1678
  {
1679 19
    $str = (string)$str;
1680
1681 19
    if (!isset($str[0])) {
1682 5
      return '';
1683
    }
1684
1685
    // init
1686 17
    self::checkForSupport();
1687
1688 17
    return mb_strtolower($str, $encoding);
1689
  }
1690
1691
  /**
1692
   * urldecode & fixing urlencoded-win1252-chars
1693
   *
1694
   * @since 1.0.4
1695
   *
1696
   * @param string $str
1697
   *
1698
   * @return string
1699
   */
1700 1
  public static function urldecode($str)
1701
  {
1702 1
    $str = (string)$str;
1703
1704 1
    if (!isset($str[0])) {
1705 1
      return '';
1706
    }
1707
1708 1
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
1709
1710 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
1711
1712 1
    $str = self::fix_simple_utf8(
1713
        rawurldecode(
1714 1
            self::html_entity_decode(
1715 1
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
1716
                $flags
1717
            )
1718
        )
1719
    );
1720
1721 1
    return (string)$str;
1722
  }
1723
1724
  /**
1725
   * fixed a broken UTF-8 string
1726
   *
1727
   * @param string $str
1728
   *
1729
   * @return string
1730
   */
1731 8
  public static function fix_simple_utf8($str)
1732
  {
1733 8
    static $brokenUtf8ToUtf8Keys = null;
1734 8
    static $brokenUtf8ToUtf8Values = null;
1735
1736 8
    $str = (string)$str;
1737
1738 8
    if (!isset($str[0])) {
1739 2
      return '';
1740
    }
1741
1742 8
    if ($brokenUtf8ToUtf8Keys === null) {
1743 1
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
1744 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
1745
    }
1746
1747 8
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
1748
  }
1749
1750
  /**
1751
   *
1752
   * UTF-8 version of html_entity_decode()
1753
   *
1754
   * The reason we are not using html_entity_decode() by itself is because
1755
   * while it is not technically correct to leave out the semicolon
1756
   * at the end of an entity most browsers will still interpret the entity
1757
   * correctly. html_entity_decode() does not convert entities without
1758
   * semicolons, so we are left with our own little solution here. Bummer.
1759
   *
1760
   * Convert all HTML entities to their applicable characters
1761
   *
1762
   * @link http://php.net/manual/en/function.html-entity-decode.php
1763
   *
1764
   * @param string $string   <p>
1765
   *                         The input string.
1766
   *                         </p>
1767
   * @param int    $flags    [optional] <p>
1768
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1769
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1770
   *                         <table>
1771
   *                         Available <i>flags</i> constants
1772
   *                         <tr valign="top">
1773
   *                         <td>Constant Name</td>
1774
   *                         <td>Description</td>
1775
   *                         </tr>
1776
   *                         <tr valign="top">
1777
   *                         <td><b>ENT_COMPAT</b></td>
1778
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1779
   *                         </tr>
1780
   *                         <tr valign="top">
1781
   *                         <td><b>ENT_QUOTES</b></td>
1782
   *                         <td>Will convert both double and single quotes.</td>
1783
   *                         </tr>
1784
   *                         <tr valign="top">
1785
   *                         <td><b>ENT_NOQUOTES</b></td>
1786
   *                         <td>Will leave both double and single quotes unconverted.</td>
1787
   *                         </tr>
1788
   *                         <tr valign="top">
1789
   *                         <td><b>ENT_HTML401</b></td>
1790
   *                         <td>
1791
   *                         Handle code as HTML 4.01.
1792
   *                         </td>
1793
   *                         </tr>
1794
   *                         <tr valign="top">
1795
   *                         <td><b>ENT_XML1</b></td>
1796
   *                         <td>
1797
   *                         Handle code as XML 1.
1798
   *                         </td>
1799
   *                         </tr>
1800
   *                         <tr valign="top">
1801
   *                         <td><b>ENT_XHTML</b></td>
1802
   *                         <td>
1803
   *                         Handle code as XHTML.
1804
   *                         </td>
1805
   *                         </tr>
1806
   *                         <tr valign="top">
1807
   *                         <td><b>ENT_HTML5</b></td>
1808
   *                         <td>
1809
   *                         Handle code as HTML 5.
1810
   *                         </td>
1811
   *                         </tr>
1812
   *                         </table>
1813
   *                         </p>
1814
   * @param string $encoding [optional] <p>
1815
   *                         Encoding to use.
1816
   *                         </p>
1817
   *
1818
   * @return string the decoded string.
1819
   */
1820 13
  public static function html_entity_decode($string, $flags = null, $encoding = 'UTF-8')
1821
  {
1822 13
    $string = (string)$string;
1823
1824 13
    if (!isset($string[0])) {
1825 3
      return '';
1826
    }
1827
1828 13
    if (strpos($string, '&') === false) {
1829 4
      return $string;
1830
    }
1831
1832 13
    if ($flags === null) {
1833 1
      if (Bootup::is_php('5.4') === true) {
1834 1
        $flags = ENT_COMPAT | ENT_HTML5;
1835
      } else {
1836
        $flags = ENT_COMPAT;
1837
      }
1838
    }
1839
1840
    do {
1841 13
      $str_compare = $string;
1842
1843
      // decode numeric & UTF16 two byte entities
1844 13
      $string = html_entity_decode(
1845 13
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $string),
1846
          $flags,
1847
          $encoding
1848
      );
1849 13
    } while ($str_compare !== $string);
1850
1851 13
    return $string;
1852
  }
1853
1854
  /**
1855
   * Function UTF8::to_utf8
1856
   *
1857
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
1858
   *
1859
   * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
1860
   *
1861
   * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
1862
   *
1863
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
1864
   *    are followed by any of these:  ("group B")
1865
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
1866
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
1867
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
1868
   * is also a valid unicode character, and will be left unchanged.
1869
   *
1870
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
1871
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
1872
   *
1873
   * @name               to_utf8
1874
   *
1875
   * @param string       $text Any string or array.
1876
   *
1877
   * @return string The same string, UTF8 encoded
1878
   *
1879
   */
1880 20
  public static function to_utf8($text)
1881
  {
1882 20
    if (is_array($text)) {
1883 2
      foreach ($text as $k => $v) {
1884
        /** @noinspection AlterInForeachInspection */
1885 2
        $text[$k] = self::to_utf8($v);
1886
      }
1887
1888 2
      return $text;
1889
    }
1890
1891 20
    $text = (string)$text;
1892
1893 20
    if (!isset($text[0])) {
1894 9
      return $text;
1895
    }
1896
1897 20
    $max = self::strlen($text, '8bit');
1898
1899 20
    $buf = '';
1900 20
    for ($i = 0; $i < $max; $i++) {
1901 20
      $c1 = $text[$i];
1902
1903 20
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
1904 20
        $c2 = $i + 1 >= $max ? "\x00" : $text[$i + 1];
1905 20
        $c3 = $i + 2 >= $max ? "\x00" : $text[$i + 2];
1906 20
        $c4 = $i + 3 >= $max ? "\x00" : $text[$i + 3];
1907
1908 20
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
1909
1910 18
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
1911 17
            $buf .= $c1 . $c2;
1912 17
            $i++;
1913
          } else { // not valid UTF8 - convert it
1914 5
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1915 5
            $cc2 = ($c1 & "\x3f") | "\x80";
1916 18
            $buf .= $cc1 . $cc2;
1917
          }
1918
1919 19 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1920
1921 18
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
1922 14
            $buf .= $c1 . $c2 . $c3;
1923 14
            $i += 2;
1924
          } else { // not valid UTF8 - convert it
1925 8
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1926 8
            $cc2 = ($c1 & "\x3f") | "\x80";
1927 18
            $buf .= $cc1 . $cc2;
1928
          }
1929
1930 8
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
1931
1932 7 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1933 1
            $buf .= $c1 . $c2 . $c3 . $c4;
1934 1
            $i += 3;
1935
          } else { // not valid UTF8 - convert it
1936 6
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1937 6
            $cc2 = ($c1 & "\x3f") | "\x80";
1938 7
            $buf .= $cc1 . $cc2;
1939
          }
1940
1941
        } else { // doesn't look like UTF8, but should be converted
1942 6
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
1943 6
          $cc2 = (($c1 & "\x3f") | "\x80");
1944 20
          $buf .= $cc1 . $cc2;
1945
        }
1946
1947 18
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
1948
1949 2
        $ordC1 = ord($c1);
1950 2
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
1951
          $buf .= self::$win1252ToUtf8[$ordC1];
1952
        } else {
1953 2
          $cc1 = (chr($ordC1 / 64) | "\xc0");
1954 2
          $cc2 = (($c1 & "\x3f") | "\x80");
1955 2
          $buf .= $cc1 . $cc2;
1956
        }
1957
1958
      } else { // it doesn't need conversion
1959 18
        $buf .= $c1;
1960
      }
1961
    }
1962
1963 20
    self::checkForSupport();
1964
1965
    // decode unicode escape sequences
1966 20
    $buf = preg_replace_callback(
1967 20
        '/\\\\u([0-9a-f]{4})/i',
1968
        function ($match) {
1969 3
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
1970 20
        },
1971
        $buf
1972
    );
1973
1974
    // decode UTF-8 codepoints
1975 20
    $buf = preg_replace_callback(
1976 20
        '/&#\d{2,4};/',
1977 20
        function ($match) {
1978 2
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
1979 20
        },
1980
        $buf
1981
    );
1982
1983 20
    return $buf;
1984
  }
1985
1986
  /**
1987
   * alias for "UTF8::to_utf8"
1988
   *
1989
   * @param string $text
1990
   *
1991
   * @return string
1992
   */
1993
  public static function toUTF8($text)
1994
  {
1995
    return self::to_utf8($text);
1996
  }
1997
1998
  /**
1999
   * try to check if a string is a json-string
2000
   *
2001
   * @param $string
2002
   *
2003
   * @return bool
2004
   *
2005
   * @deprecated
2006
   */
2007
  public static function isJson($string)
2008
  {
2009
    $string = (string)$string;
2010
2011
    if (!isset($string[0])) {
2012
      return false;
2013
    }
2014
2015
    if (
2016
        is_object(json_decode($string))
2017
        &&
2018
        json_last_error() == JSON_ERROR_NONE
2019
    ) {
2020
      return true;
2021
    } else {
2022
      return false;
2023
    }
2024
  }
2025
2026
  /**
2027
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
2028
   *
2029
   * @link http://php.net/manual/en/function.grapheme-strstr.php
2030
   *
2031
   * @param string $haystack      <p>
2032
   *                              The input string. Must be valid UTF-8.
2033
   *                              </p>
2034
   * @param string $needle        <p>
2035
   *                              The string to look for. Must be valid UTF-8.
2036
   *                              </p>
2037
   * @param bool   $before_needle [optional] <p>
2038
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
2039
   *                              haystack before the first occurrence of the needle (excluding the needle).
2040
   *                              </p>
2041
   *
2042
   * @return string the portion of string, or FALSE if needle is not found.
2043
   */
2044 1
  public static function strstr($haystack, $needle, $before_needle = false)
2045
  {
2046 1
    self::checkForSupport();
2047
2048 1
    return grapheme_strstr($haystack, $needle, $before_needle);
2049
  }
2050
2051
  /**
2052
   * Reads entire file into a string | !!! WARNING: do not use UTF-8 Option fir binary-files (e.g.: images)
2053
   *
2054
   * @link http://php.net/manual/en/function.file-get-contents.php
2055
   *
2056
   * @param string   $filename      <p>
2057
   *                                Name of the file to read.
2058
   *                                </p>
2059
   * @param int      $flags         [optional] <p>
2060
   *                                Prior to PHP 6, this parameter is called
2061
   *                                use_include_path and is a bool.
2062
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2063
   *                                to trigger include path
2064
   *                                search.
2065
   *                                </p>
2066
   *                                <p>
2067
   *                                The value of flags can be any combination of
2068
   *                                the following flags (with some restrictions), joined with the
2069
   *                                binary OR (|)
2070
   *                                operator.
2071
   *                                </p>
2072
   *                                <p>
2073
   *                                <table>
2074
   *                                Available flags
2075
   *                                <tr valign="top">
2076
   *                                <td>Flag</td>
2077
   *                                <td>Description</td>
2078
   *                                </tr>
2079
   *                                <tr valign="top">
2080
   *                                <td>
2081
   *                                FILE_USE_INCLUDE_PATH
2082
   *                                </td>
2083
   *                                <td>
2084
   *                                Search for filename in the include directory.
2085
   *                                See include_path for more
2086
   *                                information.
2087
   *                                </td>
2088
   *                                </tr>
2089
   *                                <tr valign="top">
2090
   *                                <td>
2091
   *                                FILE_TEXT
2092
   *                                </td>
2093
   *                                <td>
2094
   *                                As of PHP 6, the default encoding of the read
2095
   *                                data is UTF-8. You can specify a different encoding by creating a
2096
   *                                custom context or by changing the default using
2097
   *                                stream_default_encoding. This flag cannot be
2098
   *                                used with FILE_BINARY.
2099
   *                                </td>
2100
   *                                </tr>
2101
   *                                <tr valign="top">
2102
   *                                <td>
2103
   *                                FILE_BINARY
2104
   *                                </td>
2105
   *                                <td>
2106
   *                                With this flag, the file is read in binary mode. This is the default
2107
   *                                setting and cannot be used with FILE_TEXT.
2108
   *                                </td>
2109
   *                                </tr>
2110
   *                                </table>
2111
   *                                </p>
2112
   * @param resource $context       [optional] <p>
2113
   *                                A valid context resource created with
2114
   *                                stream_context_create. If you don't need to use a
2115
   *                                custom context, you can skip this parameter by &null;.
2116
   *                                </p>
2117
   * @param int      $offset        [optional] <p>
2118
   *                                The offset where the reading starts.
2119
   *                                </p>
2120
   * @param int      $maxlen        [optional] <p>
2121
   *                                Maximum length of data read. The default is to read until end
2122
   *                                of file is reached.
2123
   *                                </p>
2124
   * @param int      $timeout
2125
   *
2126
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2127
   *                                default utf-8 chars
2128
   *
2129
   * @return string The function returns the read data or false on failure.
2130
   */
2131 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2132
  {
2133
    // init
2134 2
    $timeout = (int)$timeout;
2135 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2136
2137 2
    if ($timeout && $context === null) {
2138 2
      $context = stream_context_create(
2139
          array(
2140
              'http' =>
2141
                  array(
2142 2
                      'timeout' => $timeout,
2143
                  ),
2144
          )
2145
      );
2146
    }
2147
2148 2
    if (is_int($maxlen)) {
2149 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2150
    } else {
2151 2
      $data = file_get_contents($filename, $flags, $context, $offset);
2152
    }
2153
2154
    // return false on error
2155 2
    if ($data === false) {
2156
      return false;
2157
    }
2158
2159 2
    if ($convertToUtf8 === true) {
2160 2
      self::checkForSupport();
2161
2162 2
      $encoding = self::str_detect_encoding($data);
2163 2
      if ($encoding != 'UTF-8') {
2164 1
        $data = mb_convert_encoding($data, 'UTF-8', $encoding);
2165
      }
2166
2167 2
      $data = self::cleanup($data);
2168
    }
2169
2170
    // clean utf-8 string
2171 2
    return $data;
2172
  }
2173
2174
  /**
2175
   * optimized "mb_detect_encoding()"-function -> with UTF-16 and UTF-32 support
2176
   *
2177
   * @param string $str
2178
   *
2179
   * @return bool|string false if we can't detect the string-encoding
2180
   */
2181 2
  public static function str_detect_encoding($str)
2182
  {
2183
    // init
2184 2
    $encoding = '';
2185
2186
    // UTF-8
2187
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2188 2
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
2189
      return 'UTF-8';
2190
    }
2191
2192
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2193
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2194 2
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
2195
      return 'UTF-16BE';
2196
    }
2197
2198
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2199
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2200 2
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
2201
      return 'UTF-16LE';
2202
    }
2203
2204
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2205
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2206 2
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
2207
      return 'UTF-32BE';
2208
    }
2209
2210
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2211
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2212 2
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
2213
      return 'UTF32LE';
2214
    }
2215
2216 2
    if (!$encoding) {
2217 2
      self::checkForSupport();
2218
2219
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
2220
      $detectOrder = array(
2221 2
          'UTF-8',
2222
          'windows-1251',
2223
          'ISO-8859-1',
2224
      );
2225 2
      $encoding = mb_detect_encoding($str, $detectOrder, true);
2226
    }
2227
2228 2
    if (self::is_binary($str)) {
2229 1
      if (self::is_utf16($str) == 1) {
2230 1
        return 'UTF-16LE';
2231 1
      } elseif (self::is_utf16($str) == 2) {
2232 1
        return 'UTF-16BE';
2233 1
      } elseif (self::is_utf32($str) == 1) {
2234
        return 'UTF-32LE';
2235 1
      } elseif (self::is_utf32($str) == 2) {
2236
        return 'UTF-32BE';
2237
      }
2238
    }
2239
2240 2
    if (!$encoding) {
2241
      $encoding = false;
2242
    }
2243
2244 2
    return $encoding;
2245
  }
2246
2247
  /**
2248
   * check if the input is binary (is look like a hack)
2249
   *
2250
   * @param string $input
2251
   *
2252
   * @return bool
2253
   */
2254 3
  public static function is_binary($input)
2255
  {
2256
2257 3
    $testLength = strlen($input);
2258
2259
    if (
2260 3
        preg_match('~^[01]+$~', $input)
2261
        ||
2262 3
        substr_count($input, "\x00") > 0
2263
        ||
2264 3
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2265
    ) {
2266 2
      return true;
2267
    } else {
2268 3
      return false;
2269
    }
2270
  }
2271
2272
  /**
2273
   * is_utf16
2274
   *
2275
   * @param string $string
2276
   *
2277
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE
2278
   */
2279 1 View Code Duplication
  public static function is_utf16($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2280
  {
2281 1
    if (self::is_binary($string)) {
2282 1
      self::checkForSupport();
2283
2284 1
      $maybeUTF16LE = 0;
2285 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16LE');
2286 1
      if ($test !== false && strlen($test) > 1) {
2287 1
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2288 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2289 1
        if ($test3 == $test) {
2290 1
          $stringChars = self::count_chars($string);
2291 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2292 1
            if (in_array($test3char, $stringChars, true) === true) {
2293 1
              $maybeUTF16LE++;
2294
            }
2295
          }
2296
        }
2297
      }
2298
2299 1
      $maybeUTF16BE = 0;
2300 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16BE');
2301 1
      if ($test !== false && strlen($test) > 1) {
2302 1
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2303 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2304 1
        if ($test3 == $test) {
2305 1
          $stringChars = self::count_chars($string);
2306 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2307 1
            if (in_array($test3char, $stringChars, true) === true) {
2308 1
              $maybeUTF16BE++;
2309
            }
2310
          }
2311
        }
2312
      }
2313
2314 1
      if ($maybeUTF16BE != $maybeUTF16LE) {
2315 1
        if ($maybeUTF16LE > $maybeUTF16BE) {
2316 1
          return 1;
2317
        } else {
2318 1
          return 2;
2319
        }
2320
      }
2321
2322
    }
2323
2324 1
    return false;
2325
  }
2326
2327
  /**
2328
   * returns count of characters used in a string
2329
   *
2330
   * @param    string $str The input string
2331
   *
2332
   * @return   array An associative array of Character as keys and
2333
   *           their count as values
2334
   */
2335 2
  public static function count_chars($str) //there is no $mode parameters
2336
  {
2337 2
    $array = array_count_values(self::split($str));
2338
2339 2
    ksort($array);
2340
2341 2
    return $array;
2342
  }
2343
2344
  /**
2345
   * is_utf32
2346
   *
2347
   * @param string $string
2348
   *
2349
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE
2350
   */
2351 1 View Code Duplication
  public static function is_utf32($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2352
  {
2353 1
    if (self::is_binary($string)) {
2354 1
      self::checkForSupport();
2355
2356 1
      $maybeUTF32LE = 0;
2357 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32LE');
2358 1
      if ($test !== false && strlen($test) > 1) {
2359 1
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2360 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2361 1
        if ($test3 == $test) {
2362 1
          $stringChars = self::count_chars($string);
2363 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2364 1
            if (in_array($test3char, $stringChars, true) === true) {
2365 1
              $maybeUTF32LE++;
2366
            }
2367
          }
2368
        }
2369
      }
2370
2371 1
      $maybeUTF32BE = 0;
2372 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32BE');
2373 1
      if ($test !== false && strlen($test) > 1) {
2374
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2375
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2376
        if ($test3 == $test) {
2377
          $stringChars = self::count_chars($string);
2378
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2379
            if (in_array($test3char, $stringChars, true) === true) {
2380
              $maybeUTF32BE++;
2381
            }
2382
          }
2383
        }
2384
      }
2385
2386 1
      if ($maybeUTF32BE != $maybeUTF32LE) {
2387
        if ($maybeUTF32LE > $maybeUTF32BE) {
2388
          return 1;
2389
        } else {
2390
          return 2;
2391
        }
2392
      }
2393
2394
    }
2395
2396 1
    return false;
2397
  }
2398
2399
  /**
2400
   * clean-up a UTF-8 string and show only printable chars at the end
2401
   *
2402
   * @param string|false $text
2403
   *
2404
   * @return string
2405
   */
2406 5
  public static function cleanup($text)
2407
  {
2408 5
    $text = (string)$text;
2409
2410 5
    if (!isset($text[0])) {
2411
      return '';
2412
    }
2413
2414
    // init
2415 5
    self::checkForSupport();
2416
2417
    // fixed ISO <-> UTF-8 Errors
2418 5
    $text = self::fix_simple_utf8($text);
2419
2420
    // remove all none UTF-8 symbols
2421
    // && remove diamond question mark (�)
2422
    // && remove remove invisible characters (e.g. "\0")
2423
    // && remove BOM
2424
    // && normalize whitespace chars
2425 5
    $text = self::clean($text, true, true, false);
2426
2427 5
    return (string)$text;
2428
  }
2429
2430
  /**
2431
   * is_binary_file
2432
   *
2433
   * @param string $file
2434
   *
2435
   * @return boolean
2436
   */
2437
  public static function is_binary_file($file)
2438
  {
2439
    try {
2440
      $fp = fopen($file, 'r');
2441
      $block = fread($fp, 512);
2442
      fclose($fp);
2443
    } catch (\Exception $e) {
2444
      $block = '';
2445
    }
2446
2447
    return self::is_binary($block);
2448
  }
2449
2450
  /**
2451
   * Finds the last occurrence of a character in a string within another
2452
   *
2453
   * @link http://php.net/manual/en/function.mb-strrchr.php
2454
   *
2455
   * @param string $haystack <p>
2456
   *                         The string from which to get the last occurrence
2457
   *                         of needle
2458
   *                         </p>
2459
   * @param string $needle   <p>
2460
   *                         The string to find in haystack
2461
   *                         </p>
2462
   * @param bool   $part     [optional] <p>
2463
   *                         Determines which portion of haystack
2464
   *                         this function returns.
2465
   *                         If set to true, it returns all of haystack
2466
   *                         from the beginning to the last occurrence of needle.
2467
   *                         If set to false, it returns all of haystack
2468
   *                         from the last occurrence of needle to the end,
2469
   *                         </p>
2470
   * @param string $encoding [optional] <p>
2471
   *                         Character encoding name to use.
2472
   *                         If it is omitted, internal character encoding is used.
2473
   *                         </p>
2474
   *
2475
   * @return string the portion of haystack.
2476
   * or false if needle is not found.
2477
   */
2478 1
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2479
  {
2480 1
    self::checkForSupport();
2481
2482 1
    return mb_strrchr($haystack, $needle, $part, $encoding);
2483
  }
2484
2485
  /**
2486
   * Finds the last occurrence of a character in a string within another, case insensitive
2487
   *
2488
   * @link http://php.net/manual/en/function.mb-strrichr.php
2489
   *
2490
   * @param string $haystack <p>
2491
   *                         The string from which to get the last occurrence
2492
   *                         of needle
2493
   *                         </p>
2494
   * @param string $needle   <p>
2495
   *                         The string to find in haystack
2496
   *                         </p>
2497
   * @param bool   $part     [optional] <p>
2498
   *                         Determines which portion of haystack
2499
   *                         this function returns.
2500
   *                         If set to true, it returns all of haystack
2501
   *                         from the beginning to the last occurrence of needle.
2502
   *                         If set to false, it returns all of haystack
2503
   *                         from the last occurrence of needle to the end,
2504
   *                         </p>
2505
   * @param string $encoding [optional] <p>
2506
   *                         Character encoding name to use.
2507
   *                         If it is omitted, internal character encoding is used.
2508
   *                         </p>
2509
   *
2510
   * @return string the portion of haystack.
2511
   * or false if needle is not found.
2512
   */
2513 1
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2514
  {
2515 1
    self::checkForSupport();
2516
2517 1
    return mb_strrichr($haystack, $needle, $part, $encoding);
2518
  }
2519
2520
  /**
2521
   * filter var
2522
   *
2523
   * @param      $var
2524
   * @param int  $filter
2525
   * @param null $option
2526
   *
2527
   * @return mixed|string
2528
   */
2529 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2530
  {
2531 1
    if (3 > func_num_args()) {
2532 1
      $var = filter_var($var, $filter);
2533
    } else {
2534 1
      $var = filter_var($var, $filter, $option);
2535
    }
2536
2537 1
    return self::filter($var);
2538
  }
2539
2540
  /**
2541
   * normalizes to UTF-8 NFC, converting from CP-1252 when needed
2542
   *
2543
   * @param        $var
2544
   * @param int    $normalization_form
2545
   * @param string $leading_combining
2546
   *
2547
   * @return mixed|string
2548
   */
2549 6
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2550
  {
2551 6
    switch (gettype($var)) {
2552 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2553 1
        foreach ($var as $k => $v) {
2554
          /** @noinspection AlterInForeachInspection */
2555
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2556
        }
2557 1
        break;
2558 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2559 1
        foreach ($var as $k => $v) {
2560 1
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2561
        }
2562 1
        break;
2563 2
      case 'string':
2564 6 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2565
          // Workaround https://bugs.php.net/65732
2566
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2567
        }
2568 6 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2569 6
          if (Normalizer::isNormalized($var, $normalization_form)) {
2570 1
            $n = '-';
2571
          } else {
2572 6
            $n = Normalizer::normalize($var, $normalization_form);
2573
2574 6
            if (isset($n[0])) {
2575 4
              $var = $n;
2576
            } else {
2577 4
              $var = self::encode('UTF-8', $var);
2578
            }
2579
2580
          }
2581 6
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2582
            // Prevent leading combining chars
2583
            // for NFC-safe concatenations.
2584
            $var = $leading_combining . $var;
2585
          }
2586
        }
2587 6
        break;
2588
    }
2589
2590 6
    return $var;
2591
  }
2592
2593
  /**
2594
   * encode to UTF8 or LATIN1
2595
   *
2596
   * INFO:  the different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2597
   *        so you can call this function also on a UTF-8 String and you don't mess the string
2598
   *
2599
   * @param string $encodingLabel ISO-8859-1 || UTF-8
2600
   * @param string $text
2601
   *
2602
   * @return string will return false on error
2603
   */
2604 11
  public static function encode($encodingLabel, $text)
2605
  {
2606 11
    $encodingLabel = self::normalizeEncoding($encodingLabel);
2607
2608 11
    if ($encodingLabel === 'UTF-8') {
2609 11
      return self::to_utf8($text);
2610
    }
2611
2612 1
    if ($encodingLabel === 'ISO-8859-1') {
2613 1
      return self::to_latin1($text);
2614
    }
2615
2616
    return false;
2617
  }
2618
2619
  /**
2620
   * normalize encoding-name
2621
   *
2622
   * @param string $encodingLabel e.g.: ISO, UTF8, ISO88591, WIN1252 ...
2623
   *
2624
   * @return string
2625
   */
2626 11
  protected static function normalizeEncoding($encodingLabel)
2627
  {
2628 11
    $encoding = strtoupper($encodingLabel);
2629 11
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2630
    $equivalences = array(
2631 11
        'ISO88591'    => 'ISO-8859-1',
2632
        'ISO8859'     => 'ISO-8859-1',
2633
        'ISO'         => 'ISO-8859-1',
2634
        'LATIN1'      => 'ISO-8859-1',
2635
        'LATIN'       => 'ISO-8859-1',
2636
        'UTF8'        => 'UTF-8',
2637
        'UTF'         => 'UTF-8',
2638
        'WIN1252'     => 'ISO-8859-1',
2639
        'WINDOWS1252' => 'ISO-8859-1',
2640
    );
2641 11
    if (empty($equivalences[$encoding])) {
2642
      return 'UTF-8';
2643
    }
2644
2645 11
    return $equivalences[$encoding];
2646
  }
2647
2648
  /**
2649
   * convert to latin1
2650
   *
2651
   * @param $text
2652
   *
2653
   * @return string
2654
   */
2655 2
  public static function to_latin1($text)
2656
  {
2657 2
    return self::to_win1252($text);
2658
  }
2659
2660
  /**
2661
   * convert to win1252
2662
   *
2663
   * @param  string|array $text
2664
   *
2665
   * @return string
2666
   */
2667 2
  protected static function to_win1252($text)
2668
  {
2669 2
    if (is_array($text)) {
2670
2671 1
      foreach ($text as $k => $v) {
2672
        /** @noinspection AlterInForeachInspection */
2673 1
        $text[$k] = self::to_win1252($v);
2674
      }
2675
2676 1
      return $text;
2677 2
    } elseif (is_string($text)) {
2678 2
      return self::utf8_decode($text);
2679
    } else {
2680
      return $text;
2681
    }
2682
  }
2683
2684
  /**
2685
   * utf8 - decode
2686
   *
2687
   * @param string $string
2688
   *
2689
   * @return string
2690
   */
2691 6
  public static function utf8_decode($string)
2692
  {
2693 6
    static $utf8ToWin1252Keys = null;
2694 6
    static $utf8ToWin1252Values = null;
2695
2696 6
    $string = (string)$string;
2697
2698 6
    if (!isset($string[0])) {
2699 5
      return '';
2700
    }
2701
2702
    // init
2703 6
    self::checkForSupport();
2704
2705 6
    $string = self::to_utf8($string);
2706
2707 6
    if ($utf8ToWin1252Keys === null) {
2708 1
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
2709 1
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
2710
    }
2711
2712 6
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $string));
2713
  }
2714
2715
  /**
2716
   * filter input
2717
   *
2718
   * @param      $type
2719
   * @param      $var
2720
   * @param int  $filter
2721
   * @param null $option
2722
   *
2723
   * @return mixed|string
2724
   */
2725 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2726
  {
2727
    if (4 > func_num_args()) {
2728
      $var = filter_input($type, $var, $filter);
2729
    } else {
2730
      $var = filter_input($type, $var, $filter, $option);
2731
    }
2732
2733
    return self::filter($var);
2734
  }
2735
2736
  /**
2737
   * utf8_encode
2738
   *
2739
   * @param string $string
2740
   *
2741
   * @return string
2742
   */
2743 6
  public static function utf8_encode($string)
2744
  {
2745 6
    $string = utf8_encode($string);
2746
2747 6
    if (false === strpos($string, "\xC2")) {
2748 6
      return $string;
2749
    } else {
2750
2751 5
      static $cp1252ToUtf8Keys = null;
2752 5
      static $cp1252ToUtf8Values = null;
2753
2754 5
      if ($cp1252ToUtf8Keys === null) {
2755 1
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
2756 1
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
2757
      }
2758
2759 5
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $string);
2760
    }
2761
  }
2762
2763
  /**
2764
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2765
   * Returns the JSON representation of a value
2766
   *
2767
   * @link http://php.net/manual/en/function.json-encode.php
2768
   *
2769
   * @param mixed $value   <p>
2770
   *                       The <i>value</i> being encoded. Can be any type except
2771
   *                       a resource.
2772
   *                       </p>
2773
   *                       <p>
2774
   *                       All string data must be UTF-8 encoded.
2775
   *                       </p>
2776
   *                       <p>PHP implements a superset of
2777
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2778
   *                       only supports these values when they are nested inside an array or an object.
2779
   *                       </p>
2780
   * @param int   $options [optional] <p>
2781
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2782
   *                       <b>JSON_HEX_TAG</b>,
2783
   *                       <b>JSON_HEX_AMP</b>,
2784
   *                       <b>JSON_HEX_APOS</b>,
2785
   *                       <b>JSON_NUMERIC_CHECK</b>,
2786
   *                       <b>JSON_PRETTY_PRINT</b>,
2787
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2788
   *                       <b>JSON_FORCE_OBJECT</b>,
2789
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2790
   *                       constants is described on
2791
   *                       the JSON constants page.
2792
   *                       </p>
2793
   * @param int   $depth   [optional] <p>
2794
   *                       Set the maximum depth. Must be greater than zero.
2795
   *                       </p>
2796
   *
2797
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2798
   */
2799 1
  public static function json_encode($value, $options = 0, $depth = 512)
2800
  {
2801 1
    $value = self::filter($value);
2802
2803 1
    if (Bootup::is_php('5.5')) {
2804 1
      $json = json_encode($value, $options, $depth);
2805
    } else {
2806
      $json = json_encode($value, $options);
2807
    }
2808
2809 1
    return $json;
2810
  }
2811
2812
  /**
2813
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2814
   * Decodes a JSON string
2815
   *
2816
   * @link http://php.net/manual/en/function.json-decode.php
2817
   *
2818
   * @param string $json    <p>
2819
   *                        The <i>json</i> string being decoded.
2820
   *                        </p>
2821
   *                        <p>
2822
   *                        This function only works with UTF-8 encoded strings.
2823
   *                        </p>
2824
   *                        <p>PHP implements a superset of
2825
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2826
   *                        only supports these values when they are nested inside an array or an object.
2827
   *                        </p>
2828
   * @param bool   $assoc   [optional] <p>
2829
   *                        When <b>TRUE</b>, returned objects will be converted into
2830
   *                        associative arrays.
2831
   *                        </p>
2832
   * @param int    $depth   [optional] <p>
2833
   *                        User specified recursion depth.
2834
   *                        </p>
2835
   * @param int    $options [optional] <p>
2836
   *                        Bitmask of JSON decode options. Currently only
2837
   *                        <b>JSON_BIGINT_AS_STRING</b>
2838
   *                        is supported (default is to cast large integers as floats)
2839
   *                        </p>
2840
   *
2841
   * @return mixed the value encoded in <i>json</i> in appropriate
2842
   * PHP type. Values true, false and
2843
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2844
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2845
   * <i>json</i> cannot be decoded or if the encoded
2846
   * data is deeper than the recursion limit.
2847
   */
2848 2
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
2849
  {
2850 2
    $json = self::filter($json);
2851
2852 2
    if (Bootup::is_php('5.4') === true) {
2853 2
      $json = json_decode($json, $assoc, $depth, $options);
2854
    } else {
2855
      $json = json_decode($json, $assoc, $depth);
2856
    }
2857
2858 2
    return $json;
2859
  }
2860
2861
  /**
2862
   * filter input array
2863
   *
2864
   * @param      $type
2865
   * @param null $def
2866
   * @param bool $add_empty
2867
   *
2868
   * @return mixed|string
2869
   */
2870
  public static function filter_input_array($type, $def = null, $add_empty = true)
2871
  {
2872
    if (2 > func_num_args()) {
2873
      $a = filter_input_array($type);
2874
    } else {
2875
      $a = filter_input_array($type, $def, $add_empty);
2876
    }
2877
2878
    return self::filter($a);
2879
  }
2880
2881
  /**
2882
   * Search a string for any of a set of characters
2883
   *
2884
   * @param string $s
2885
   * @param string $charlist
2886
   *
2887
   * @return string|false
2888
   */
2889
  public static function strpbrk($s, $charlist)
2890
  {
2891
    if (preg_match('/' . self::rxClass($charlist) . '/us', $s, $m)) {
2892
      return substr($s, strpos($s, $m[0]));
2893
    } else {
2894
      return false;
2895
    }
2896
  }
2897
2898
  /**
2899
   * case-insensitive string comparison of the first n characters
2900
   *
2901
   * @param string $str1
2902
   * @param string $str2
2903
   * @param int    $len
2904
   *
2905
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
2906
   */
2907
  public static function strncasecmp($str1, $str2, $len)
2908
  {
2909
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
2910
  }
2911
2912
  /**
2913
   * comparison of the first n characters
2914
   *
2915
   * @param string $str1
2916
   * @param string $str2
2917
   * @param int    $len
2918
   *
2919
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
2920
   */
2921
  public static function strncmp($str1, $str2, $len)
2922
  {
2923
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
2924
  }
2925
2926
  /**
2927
   * string comparison
2928
   *
2929
   * @param string $a
2930
   * @param string $b
2931
   *
2932
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
2933
   */
2934 8
  public static function strcmp($a, $b)
2935
  {
2936 8
    return $a . '' === $b . '' ? 0 : strcmp(
2937 5
        Normalizer::normalize($a, Normalizer::NFD),
2938 8
        Normalizer::normalize($b, Normalizer::NFD)
2939
    );
2940
  }
2941
2942
  /**
2943
   * calculates and returns the maximum number of bytes taken by any
2944
   * UTF-8 encoded character in the given string
2945
   *
2946
   * @param    string $str The original Unicode string
2947
   *
2948
   * @return   int An array of byte lengths of each character.
2949
   */
2950 1
  public static function max_chr_width($str)
2951
  {
2952 1
    $bytes = self::chr_size_list($str);
2953 1
    if (count($bytes) > 0) {
2954 1
      return (int)max($bytes);
2955
    } else {
2956 1
      return 0;
2957
    }
2958
  }
2959
2960
  /**
2961
   * generates an array of byte length of each character of a Unicode string.
2962
   *
2963
   * 1 byte => U+0000  - U+007F
2964
   * 2 byte => U+0080  - U+07FF
2965
   * 3 byte => U+0800  - U+FFFF
2966
   * 4 byte => U+10000 - U+10FFFF
2967
   *
2968
   * @param    string $str The original Unicode string
2969
   *
2970
   * @return   array An array of byte lengths of each character.
2971
   */
2972 2
  public static function chr_size_list($str)
2973
  {
2974 2
    if (!$str) {
2975 2
      return array();
2976
    }
2977
2978 2
    return array_map('strlen', self::split($str));
2979
  }
2980
2981
  /**
2982
   * converts a UTF-8 character to HTML Numbered Entity like &#123;
2983
   *
2984
   * @param    string $chr The Unicode character to be encoded as numbered entity
2985
   *
2986
   * @return   string HTML numbered entity
2987
   */
2988 2
  public static function single_chr_html_encode($chr)
2989
  {
2990 2
    if (!$chr) {
2991 1
      return '';
2992
    }
2993
2994 2
    return '&#' . self::ord($chr) . ';';
2995
  }
2996
2997
  /**
2998
   * calculates Unicode Code Point of the given UTF-8 encoded character
2999
   *
3000
   * @param    string $s The character of which to calculate Code Point
3001
   *
3002
   * @return   int Unicode Code Point of the given character
3003
   *           0 on invalid UTF-8 byte sequence
3004
   */
3005 15
  public static function ord($s)
3006
  {
3007 15
    if (!$s) {
3008 2
      return 0;
3009
    }
3010
3011 14
    $s = unpack('C*', substr($s, 0, 4));
3012 14
    $a = $s ? $s[1] : 0;
3013
3014 14
    if (0xF0 <= $a && isset($s[4])) {
3015 2
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3016
    }
3017
3018 13
    if (0xE0 <= $a && isset($s[3])) {
3019 7
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3020
    }
3021
3022 12
    if (0xC0 <= $a && isset($s[2])) {
3023 8
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3024
    }
3025
3026 10
    return $a;
3027
  }
3028
3029
  /**
3030
   * converts a UTF-8 string to a series of
3031
   *
3032
   * INFO: HTML Numbered Entities like &#123;&#39;&#1740;...
3033
   *
3034
   * @param    string $str The Unicode string to be encoded as numbered entities
3035
   *
3036
   * @return   string HTML numbered entities
3037
   */
3038 1
  public static function html_encode($str)
3039
  {
3040 1
    return implode(
3041
        array_map(
3042
            array(
3043 1
                '\\voku\\helper\\UTF8',
3044
                'single_chr_html_encode',
3045
            ),
3046 1
            self::split($str)
3047
        )
3048
    );
3049
  }
3050
3051
  /**
3052
   * checks if a file starts with BOM character
3053
   *
3054
   * @param    string $file_path Path to a valid file
3055
   *
3056
   * @return   bool True if the file has BOM at the start, False otherwise
3057
   */
3058 1
  public static function file_has_bom($file_path)
3059
  {
3060 1
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
3061
  }
3062
3063
  /**
3064
   * checks if the given string is exactly "UTF8 - Byte Order Mark"
3065
   *
3066
   * WARNING: use "UTF8::string_has_bom()" if you will check BOM in a string
3067
   *
3068
   * @param    string $utf8_chr The input string
3069
   *
3070
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise
3071
   */
3072 2
  public static function is_bom($utf8_chr)
3073
  {
3074 2
    return ($utf8_chr === self::bom());
3075
  }
3076
3077
  /**
3078
   * returns the Byte Order Mark Character
3079
   *
3080
   * @return   string Byte Order Mark
3081
   */
3082 2
  public static function bom()
3083
  {
3084 2
    return "\xEF\xBB\xBF";
3085
  }
3086
3087
  /**
3088
   * alias for "UTF8::is_bom"
3089
   *
3090
   * @param string $utf8_chr
3091
   *
3092
   * @return boolean
3093
   */
3094
  public static function isBom($utf8_chr)
3095
  {
3096
    return self::is_bom($utf8_chr);
3097
  }
3098
3099
  /**
3100
   * checks if string starts with "UTF-8 BOM" character
3101
   *
3102
   * @param    string $str The input string
3103
   *
3104
   * @return   bool True if the string has BOM at the start, False otherwise
3105
   */
3106 1
  public static function string_has_bom($str)
3107
  {
3108 1
    return self::is_bom(substr($str, 0, 3));
3109
  }
3110
3111
  /**
3112
   * prepends BOM character to the string and returns the whole string.
3113
   *
3114
   * INFO: If BOM already existed there, the Input string is returned.
3115
   *
3116
   * @param    string $str The input string
3117
   *
3118
   * @return   string The output string that contains BOM
3119
   */
3120
  public static function add_bom_to_string($str)
3121
  {
3122
    if (!self::is_bom(substr($str, 0, 3))) {
3123
      $str = self::bom() . $str;
3124
    }
3125
3126
    return $str;
3127
  }
3128
3129
  /**
3130
   * shuffles all the characters in the string.
3131
   *
3132
   * @param    string $str The input string
3133
   *
3134
   * @return   string The shuffled string
3135
   */
3136
  public static function str_shuffle($str)
3137
  {
3138
    $array = self::split($str);
3139
3140
    shuffle($array);
3141
3142
    return implode('', $array);
3143
  }
3144
3145
  /**
3146
   * Wraps a string to a given number of characters
3147
   *
3148
   * @param string $string
3149
   * @param int    $width
3150
   * @param string $break
3151
   * @param bool   $cut
3152
   *
3153
   * @return false|string Returns the given string wrapped at the specified length.
3154
   */
3155 4
  public static function wordwrap($string, $width = 75, $break = "\n", $cut = false)
3156
  {
3157 4
    if (false === wordwrap('-', $width, $break, $cut)) {
3158
      return false;
3159
    }
3160
3161 4
    if (is_string($break)) {
3162 4
      $break = (string)$break;
3163
    }
3164
3165 4
    $w = '';
3166 4
    $string = explode($break, $string);
3167 4
    $iLen = count($string);
3168 4
    $chars = array();
3169
3170 4
    if (1 === $iLen && '' === $string[0]) {
3171
      return '';
3172
    }
3173
3174 4
    for ($i = 0; $i < $iLen; ++$i) {
3175
3176 4
      if ($i) {
3177
        $chars[] = $break;
3178
        $w .= '#';
3179
      }
3180
3181 4
      $c = $string[$i];
3182 4
      unset($string[$i]);
3183
3184 4
      foreach (self::split($c) as $c) {
3185 4
        $chars[] = $c;
3186 4
        $w .= ' ' === $c ? ' ' : '?';
3187
      }
3188
    }
3189
3190 4
    $string = '';
3191 4
    $j = 0;
3192 4
    $b = $i = -1;
3193 4
    $w = wordwrap($w, $width, '#', $cut);
3194
3195 4
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
3196 3
      for (++$i; $i < $b; ++$i) {
3197 3
        $string .= $chars[$j];
3198 3
        unset($chars[$j++]);
3199
      }
3200
3201 3
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
3202
        unset($chars[$j++]);
3203
      }
3204
3205 3
      $string .= $break;
3206
    }
3207
3208 4
    return $string . implode('', $chars);
3209
  }
3210
3211
  /**
3212
   * Find position of first occurrence of string in a string
3213
   *
3214
   * @link http://php.net/manual/en/function.mb-strpos.php
3215
   *
3216
   * @param string  $haystack     <p>
3217
   *                              The string being checked.
3218
   *                              </p>
3219
   * @param string  $needle       <p>
3220
   *                              The position counted from the beginning of haystack.
3221
   *                              </p>
3222
   * @param int     $offset       [optional] <p>
3223
   *                              The search offset. If it is not specified, 0 is used.
3224
   *                              </p>
3225
   * @param string  $encoding
3226
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
3227
   *
3228
   * @return int the numeric position of
3229
   * the first occurrence of needle in the
3230
   * haystack string. If
3231
   * needle is not found, it returns false.
3232
   */
3233 11
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
3234
  {
3235 11
    $haystack = (string)$haystack;
3236 11
    $needle = (string)$needle;
3237
3238 11
    if (!isset($haystack[0]) || !isset($needle[0])) {
3239 2
      return false;
3240
    }
3241
3242
    // init
3243 10
    self::checkForSupport();
3244 10
    $offset = (int)$offset;
3245
3246
    // iconv and mbstring do not support integer $needle
3247
3248 10
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
3249
      $needle = self::chr($needle);
3250
    }
3251
3252 10
    if ($cleanUtf8 === true) {
3253
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
3254
      // iconv_strpos is not tolerant to invalid characters
3255
3256 1
      $needle = self::clean((string)$needle);
3257 1
      $haystack = self::clean($haystack);
3258
    }
3259
3260 10
    if (self::$support['mbstring'] === true) {
3261
3262
      // INFO: this is only a fallback for old versions
3263 10
      if ($encoding === true || $encoding === false) {
3264 1
        $encoding = 'UTF-8';
3265
      }
3266
3267 10
      return mb_strpos($haystack, $needle, $offset, $encoding);
3268
    }
3269
3270
    if (self::$support['iconv'] === true) {
3271
      return grapheme_strpos($haystack, $needle, $offset);
3272
    }
3273
3274
    if ($offset > 0) {
3275
      $haystack = self::substr($haystack, $offset);
3276
    }
3277
3278 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3279
      $left = substr($haystack, 0, $pos);
3280
3281
      // negative offset not supported in PHP strpos(), ignoring
3282
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
3283
    }
3284
3285
    return false;
3286
  }
3287
3288
  /**
3289
   * generates a UTF-8 encoded character from the given Code Point
3290
   *
3291
   * @param    int $code_point The code point for which to generate a character
3292
   *
3293
   * @return   string Multi-Byte character
3294
   *           returns empty string on failure to encode
3295
   */
3296 8
  public static function chr($code_point)
3297
  {
3298 8
    self::checkForSupport();
3299
3300 8
    if (($i = (int)$code_point) !== $code_point) {
3301
      // $code_point is a string, lets extract int code point from it
3302
      if (!($i = (int)self::hex_to_int($code_point))) {
3303
        return '';
3304
      }
3305
    }
3306
3307 8
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
3308
  }
3309
3310
  /**
3311
   * converts hexadecimal U+xxxx code point representation to Integer
3312
   *
3313
   * INFO: opposite to UTF8::int_to_hex( )
3314
   *
3315
   * @param    string $str The Hexadecimal Code Point representation
3316
   *
3317
   * @return   int The Code Point, or 0 on failure
3318
   */
3319
  public static function hex_to_int($str)
3320
  {
3321
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
3322
      return intval($match[1], 16);
3323
    }
3324
3325
    return 0;
3326
  }
3327
3328
  /**
3329
   * reverses characters order in the string
3330
   *
3331
   * @param    string $str The input string
3332
   *
3333
   * @return   string The string with characters in the reverse sequence
3334
   */
3335 4
  public static function strrev($str)
3336
  {
3337 4
    return implode(array_reverse(self::split($str)));
3338
  }
3339
3340
  /**
3341
   * returns the UTF-8 character with the maximum code point in the given data
3342
   *
3343
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings
3344
   *
3345
   * @return   string The character with the highest code point than others
3346
   */
3347 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3348
  {
3349 1
    if (is_array($arg)) {
3350
      $arg = implode($arg);
3351
    }
3352
3353 1
    return self::chr(max(self::codepoints($arg)));
3354
  }
3355
3356
  /**
3357
   * accepts a string and returns an array of Unicode Code Points
3358
   *
3359
   * @since 1.0
3360
   *
3361
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings
3362
   * @param    bool  $u_style If True, will return Code Points in U+xxxx format,
3363
   *                          default, Code Points will be returned as integers
3364
   *
3365
   * @return   array The array of code points
3366
   */
3367 3
  public static function codepoints($arg, $u_style = false)
3368
  {
3369 3
    if (is_string($arg)) {
3370 3
      $arg = self::split($arg);
3371
    }
3372
3373 3
    $arg = array_map(
3374
        array(
3375 3
            '\\voku\\helper\\UTF8',
3376
            'ord',
3377
        ),
3378
        $arg
3379
    );
3380
3381 3
    if ($u_style) {
3382
      $arg = array_map(
3383
          array(
3384
              '\\voku\\helper\\UTF8',
3385
              'int_to_hex',
3386
          ),
3387
          $arg
3388
      );
3389
    }
3390
3391 3
    return $arg;
3392
  }
3393
3394
  /**
3395
   * returns the UTF-8 character with the minimum code point in the given data
3396
   *
3397
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings
3398
   *
3399
   * @return   string The character with the lowest code point than others
3400
   */
3401 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3402
  {
3403 1
    if (is_array($arg)) {
3404
      $arg = implode($arg);
3405
    }
3406
3407 1
    return self::chr(min(self::codepoints($arg)));
3408
  }
3409
3410
  /**
3411
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
3412
   *
3413
   * @param    string $chr The input character
3414
   * @param    string $pfix
3415
   *
3416
   * @return   string The Code Point encoded as U+xxxx
3417
   */
3418
  public static function chr_to_hex($chr, $pfix = 'U+')
3419
  {
3420
    return self::int_to_hex(self::ord($chr), $pfix);
3421
  }
3422
3423
  /**
3424
   * Converts Integer to hexadecimal U+xxxx code point representation.
3425
   *
3426
   * @param    int    $int The integer to be converted to hexadecimal code point
3427
   * @param    string $pfix
3428
   *
3429
   * @return   string The Code Point, or empty string on failure
3430
   */
3431
  public static function int_to_hex($int, $pfix = 'U+')
3432
  {
3433
    if (ctype_digit((string)$int)) {
3434
      $hex = dechex((int)$int);
3435
3436
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3437
3438
      return $pfix . $hex;
3439
    }
3440
3441
    return '';
3442
  }
3443
3444
  /**
3445
   * Get a binary representation of a specific character.
3446
   *
3447
   * @param   string $string The input character.
3448
   *
3449
   * @return  string
3450
   */
3451 1
  public static function str_to_binary($string)
3452
  {
3453 1
    $string = (string)$string;
3454
3455 1
    if (!isset($string[0])) {
3456
      return '';
3457
    }
3458
3459
    // init
3460 1
    $out = null;
3461 1
    $max = strlen($string);
3462
3463 1
    for ($i = 0; $i < $max; ++$i) {
3464 1
      $out .= vsprintf('%08b', (array)self::ord($string[$i]));
3465
    }
3466
3467 1
    return $out;
3468
  }
3469
3470
  /**
3471
   * counts number of words in the UTF-8 string
3472
   *
3473
   * @param string $s The input string
3474
   * @param int    $format
3475
   * @param string $charlist
3476
   *
3477
   * @return array|float|string The number of words in the string
3478
   */
3479 1
  public static function str_word_count($s, $format = 0, $charlist = '')
3480
  {
3481 1
    $charlist = self::rxClass($charlist, '\pL');
3482 1
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
3483 1
    $charlist = array();
3484 1
    $len = count($s);
3485
3486 1
    if (1 == $format) {
3487
      for ($i = 1; $i < $len; $i += 2) {
3488
        $charlist[] = $s[$i];
3489
      }
3490 1
    } elseif (2 == $format) {
3491
      self::checkForSupport();
3492
3493
      $offset = self::strlen($s[0]);
3494
      for ($i = 1; $i < $len; $i += 2) {
3495
        $charlist[$offset] = $s[$i];
3496
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
3497
      }
3498
    } else {
3499 1
      $charlist = ($len - 1) / 2;
3500
    }
3501
3502 1
    return $charlist;
3503
  }
3504
3505
  /**
3506
   * strip whitespace or other characters from beginning or end of a UTF-8 string
3507
   *
3508
   * INFO: this is slower then "trim()"
3509
   *
3510
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
3511
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
3512
   *
3513
   * @param    string $string The string to be trimmed
3514
   * @param    string $chars  Optional characters to be stripped
3515
   *
3516
   * @return   string The trimmed string
3517
   */
3518 26
  public static function trim($string = '', $chars = INF)
3519
  {
3520 26
    $string = (string)$string;
3521
3522 26
    if (!isset($string[0])) {
3523 5
      return '';
3524
    }
3525
3526
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3527 22
    if ($chars === INF || !$chars) {
3528 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $string);
3529
    }
3530
3531 16
    return self::rtrim(self::ltrim($string, $chars), $chars);
3532
  }
3533
3534
  /**
3535
   * strip whitespace or other characters from end of a UTF-8 string
3536
   *
3537
   * WARNING: this is much slower then "rtrim()" !!!!
3538
   *
3539
   * @param    string $string The string to be trimmed
3540
   * @param    string $chars  Optional characters to be stripped
3541
   *
3542
   * @return   string The string with unwanted characters stripped from the right
3543
   */
3544 22 View Code Duplication
  public static function rtrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3545
  {
3546 22
    $string = (string)$string;
3547
3548 22
    if (!isset($string[0])) {
3549 5
      return '';
3550
    }
3551
3552 18
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3553
3554 18
    return preg_replace("/{$chars}+$/u", '', $string);
3555
  }
3556
3557
  /**
3558
   * strip whitespace or other characters from beginning of a UTF-8 string
3559
   *
3560
   * WARNING: this is much slower then "ltrim()" !!!!
3561
   *
3562
   * @param    string $string The string to be trimmed
3563
   * @param    string $chars  Optional characters to be stripped
3564
   *
3565
   * @return   string The string with unwanted characters stripped from the left
3566
   */
3567 24 View Code Duplication
  public static function ltrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3568
  {
3569 24
    $string = (string)$string;
3570
3571 24
    if (!isset($string[0])) {
3572 2
      return '';
3573
    }
3574
3575 23
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3576
3577 23
    return preg_replace("/^{$chars}+/u", '', $string);
3578
  }
3579
3580
  /**
3581
   * Replace text within a portion of a string
3582
   *
3583
   * source: https://gist.github.com/stemar/8287074
3584
   *
3585
   * @param string|array $string
3586
   * @param string|array $replacement
3587
   * @param int          $start
3588
   * @param null|int     $length
3589
   *
3590
   * @return array|string
3591
   */
3592 6
  public static function substr_replace($string, $replacement, $start, $length = null)
3593
  {
3594
3595 6
    if (is_array($string)) {
3596 1
      $num = count($string);
3597
3598
      // $replacement
3599 1
      if (is_array($replacement)) {
3600
        $replacement = array_slice($replacement, 0, $num);
3601
      } else {
3602 1
        $replacement = array_pad(array($replacement), $num, $replacement);
3603
      }
3604
3605
      // $start
3606 1 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3607
        $start = array_slice($start, 0, $num);
3608
        foreach ($start as $key => $value) {
3609
          $start[$key] = is_int($value) ? $value : 0;
3610
        }
3611
      } else {
3612 1
        $start = array_pad(array($start), $num, $start);
3613
      }
3614
3615
      // $length
3616 1
      if (!isset($length)) {
3617 1
        $length = array_fill(0, $num, 0);
3618 1 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3619
        $length = array_slice($length, 0, $num);
3620
        foreach ($length as $key => $value) {
3621
          if (isset($value)) {
3622
            $length[$key] = (is_int($value) ? $value : $num);
3623
          } else {
3624
            $length[$key] = 0;
3625
          }
3626
        }
3627
      } else {
3628 1
        $length = array_pad(array($length), $num, $length);
3629
      }
3630
3631
      // Recursive call
3632 1
      return array_map(array(__CLASS__, 'substr_replace'), $string, $replacement, $start, $length);
3633
    } else {
3634 6
      if (is_array($replacement)) {
3635 1
        if (count($replacement) > 0) {
3636 1
          $replacement = $replacement[0];
3637
        } else {
3638 1
          $replacement = '';
3639
        }
3640
      }
3641
    }
3642
3643 6
    preg_match_all('/./us', (string)$string, $smatches);
3644 6
    preg_match_all('/./us', (string)$replacement, $rmatches);
3645
3646 6
    if ($length === null) {
3647 4
      self::checkForSupport();
3648
3649 4
      $length = mb_strlen($string);
3650
    }
3651
3652 6
    array_splice($smatches[0], $start, $length, $rmatches[0]);
3653
3654 6
    return join($smatches[0], null);
3655
  }
3656
3657
  /**
3658
   * alias for "UTF8::to_latin1()"
3659
   *
3660
   * @param $text
3661
   *
3662
   * @return string
3663
   */
3664
  public static function toLatin1($text)
3665
  {
3666
    return self::to_latin1($text);
3667
  }
3668
3669
  /**
3670
   * count the number of sub string occurrences
3671
   *
3672
   * @param    string $haystack The string to search in
3673
   * @param    string $needle   The string to search for
3674
   * @param    int    $offset   The offset where to start counting
3675
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
3676
   *
3677
   * @return   int number of occurrences of $needle
3678
   */
3679
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
3680
  {
3681
    $offset = (int)$offset;
3682
3683
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
3684
      $length = (int)$length;
3685
3686
      $haystack = self::substr($haystack, $offset, $length);
3687
    }
3688
3689
    if ($length === null) {
3690
      return substr_count($haystack, $needle, $offset);
3691
    } else {
3692
      return substr_count($haystack, $needle, $offset, $length);
3693
    }
3694
  }
3695
3696
  /**
3697
   * alias for "UTF8::is_ascii()"
3698
   *
3699
   * @param string $str
3700
   *
3701
   * @return boolean
3702
   */
3703 1
  public static function isAscii($str)
3704
  {
3705 1
    return self::is_ascii($str);
3706
  }
3707
3708
  /**
3709
   * checks if a string is 7 bit ASCII
3710
   *
3711
   * @param    string $str The string to check
3712
   *
3713
   * @return   bool True if ASCII, False otherwise
3714
   */
3715 1
  public static function is_ascii($str)
3716
  {
3717 1
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3718
  }
3719
3720
  /**
3721
   * create an array containing a range of UTF-8 characters
3722
   *
3723
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from
3724
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at
3725
   *
3726
   * @return   array Array of UTF-8 characters
3727
   */
3728 1
  public static function range($var1, $var2)
3729
  {
3730 1
    if (!$var1 || !$var2) {
3731 1
      return array();
3732
    }
3733
3734 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3735
      $start = (int)$var1;
3736 1
    } elseif (ctype_xdigit($var1)) {
3737
      $start = (int)self::hex_to_int($var1);
3738
    } else {
3739 1
      $start = self::ord($var1);
3740
    }
3741
3742 1
    if (!$start) {
3743
      return array();
3744
    }
3745
3746 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3747
      $end = (int)$var2;
3748 1
    } elseif (ctype_xdigit($var2)) {
3749
      $end = (int)self::hex_to_int($var2);
3750
    } else {
3751 1
      $end = self::ord($var2);
3752
    }
3753
3754 1
    if (!$end) {
3755
      return array();
3756
    }
3757
3758 1
    return array_map(
3759
        array(
3760 1
            '\\voku\\helper\\UTF8',
3761
            'chr',
3762
        ),
3763
        range($start, $end)
3764
    );
3765
  }
3766
3767
  /**
3768
   * creates a random string of UTF-8 characters
3769
   *
3770
   * @param    int $len The length of string in characters
3771
   *
3772
   * @return   string String consisting of random characters
3773
   */
3774 1
  public static function hash($len = 8)
3775
  {
3776 1
    static $chars = array();
3777 1
    static $chars_len = null;
3778
3779 1
    if ($len <= 0) {
3780 1
      return '';
3781
    }
3782
3783
    // init
3784 1
    self::checkForSupport();
3785
3786 1
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
3787 1
      if (self::$support['pcre_utf8'] === true) {
3788 1
        $chars = array_map(
3789
            array(
3790 1
                '\\voku\\helper\\UTF8',
3791
                'chr',
3792
            ),
3793 1
            range(48, 79)
3794
        );
3795
3796 1
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
3797
3798 1
        $chars = array_values(array_filter($chars));
3799
      } else {
3800
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
3801
      }
3802
3803 1
      $chars_len = count($chars);
3804
    }
3805
3806 1
    $hash = '';
3807
3808 1
    for (; $len; --$len) {
3809 1
      $hash .= $chars[mt_rand() % $chars_len];
3810
    }
3811
3812 1
    return $hash;
3813
  }
3814
3815
  /**
3816
   * callback( )
3817
   *
3818
   * @alias of UTF8::chr_map( )
3819
   *
3820
   * @param $callback
3821
   * @param $str
3822
   *
3823
   * @return array
3824
   */
3825 1
  public static function callback($callback, $str)
3826
  {
3827 1
    return self::chr_map($callback, $str);
3828
  }
3829
3830
  /**
3831
   * applies callback to all characters of a string
3832
   *
3833
   * @param    string $callback The callback function
3834
   * @param    string $str      UTF-8 string to run callback on
3835
   *
3836
   * @return   array The outcome of callback
3837
   */
3838
3839 1
  public static function chr_map($callback, $str)
3840
  {
3841 1
    $chars = self::split($str);
3842
3843 1
    return array_map($callback, $chars);
3844
  }
3845
3846
  /**
3847
   * returns a single UTF-8 character from string.
3848
   *
3849
   * @param    string $string UTF-8 string
3850
   * @param    int    $pos    The position of character to return.
3851
   *
3852
   * @return   string Single Multi-Byte character
3853
   */
3854 1
  public static function access($string, $pos)
3855
  {
3856
    //return the character at the specified position: $str[1] like functionality
3857
3858 1
    return self::substr($string, $pos, 1);
3859
  }
3860
3861
  /**
3862
   * sort all characters according to code points
3863
   *
3864
   * @param    string $str    UTF-8 string
3865
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored
3866
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
3867
   *
3868
   * @return   string String of sorted characters
3869
   */
3870 1
  public static function str_sort($str, $unique = false, $desc = false)
3871
  {
3872 1
    $array = self::codepoints($str);
3873
3874 1
    if ($unique) {
3875 1
      $array = array_flip(array_flip($array));
3876
    }
3877
3878 1
    if ($desc) {
3879 1
      arsort($array);
3880
    } else {
3881 1
      asort($array);
3882
    }
3883
3884 1
    return self::string($array);
3885
  }
3886
3887
  /**
3888
   * makes a UTF-8 string from code points
3889
   *
3890
   * @param    array $array Integer or Hexadecimal codepoints
3891
   *
3892
   * @return   string UTF-8 encoded string
3893
   */
3894 2
  public static function string($array)
3895
  {
3896 2
    return implode(
3897
        array_map(
3898
            array(
3899 2
                '\\voku\\helper\\UTF8',
3900
                'chr',
3901
            ),
3902
            $array
3903
        )
3904
    );
3905
  }
3906
3907
  /**
3908
   * Strip HTML and PHP tags from a string
3909
   *
3910
   * @link http://php.net/manual/en/function.strip-tags.php
3911
   *
3912
   * @param string $str            <p>
3913
   *                               The input string.
3914
   *                               </p>
3915
   * @param string $allowable_tags [optional] <p>
3916
   *                               You can use the optional second parameter to specify tags which should
3917
   *                               not be stripped.
3918
   *                               </p>
3919
   *                               <p>
3920
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
3921
   *                               can not be changed with allowable_tags.
3922
   *                               </p>
3923
   *
3924
   * @return string the stripped string.
3925
   */
3926 2
  public static function strip_tags($str, $allowable_tags = null)
3927
  {
3928
    //clean broken utf8
3929 2
    $str = self::clean($str);
3930
3931 2
    return strip_tags($str, $allowable_tags);
3932
  }
3933
3934
  /**
3935
   * pad a UTF-8 string to given length with another string
3936
   *
3937
   * @param    string $input      The input string
3938
   * @param    int    $pad_length The length of return string
3939
   * @param    string $pad_string String to use for padding the input string
3940
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
3941
   *
3942
   * @return   string Returns the padded string
3943
   */
3944 2
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
3945
  {
3946 2
    $input_length = self::strlen($input);
3947
3948 2
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
3949 2
      $ps_length = self::strlen($pad_string);
3950
3951 2
      $diff = $pad_length - $input_length;
3952
3953
      switch ($pad_type) {
3954 2 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3955 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3956 2
          $pre = self::substr($pre, 0, $diff);
3957 2
          $post = '';
3958 2
          break;
3959
3960 2
        case STR_PAD_BOTH:
3961 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3962 2
          $pre = self::substr($pre, 0, (int)$diff / 2);
3963 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3964 2
          $post = self::substr($post, 0, (int)ceil($diff / 2));
3965 2
          break;
3966
3967 2
        case STR_PAD_RIGHT:
3968 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3969 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3970 2
          $post = self::substr($post, 0, $diff);
3971 2
          $pre = '';
3972
      }
3973
3974 2
      return $pre . $input . $post;
3975
    }
3976
3977 2
    return $input;
3978
  }
3979
3980
  /**
3981
   * Repeat a string
3982
   *
3983
   * @param string $input      <p>
3984
   *                           The string to be repeated.
3985
   *                           </p>
3986
   * @param int    $multiplier <p>
3987
   *                           Number of time the input string should be
3988
   *                           repeated.
3989
   *                           </p>
3990
   *                           <p>
3991
   *                           multiplier has to be greater than or equal to 0.
3992
   *                           If the multiplier is set to 0, the function
3993
   *                           will return an empty string.
3994
   *                           </p>
3995
   *
3996
   * @return string the repeated string.
3997
   */
3998 1
  public static function str_repeat($input, $multiplier)
3999
  {
4000 1
    $input = self::filter($input);
4001
4002 1
    return str_repeat($input, $multiplier);
4003
  }
4004
4005
  /**
4006
   * removes duplicate occurrences of a string in another string
4007
   *
4008
   * @param    string       $str  The base string
4009
   * @param    string|array $what String to search for in the base string
4010
   *
4011
   * @return   string The result string with removed duplicates
4012
   */
4013 1
  public static function remove_duplicates($str, $what = ' ')
4014
  {
4015 1
    if (is_string($what)) {
4016 1
      $what = array($what);
4017
    }
4018
4019 1
    if (is_array($what)) {
4020 1
      foreach ($what as $item) {
4021 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4022
      }
4023
    }
4024
4025 1
    return $str;
4026
  }
4027
4028
  /**
4029
   * Finds position of first occurrence of a string within another, case insensitive
4030
   *
4031
   * @link http://php.net/manual/en/function.mb-stripos.php
4032
   *
4033
   * @param string  $haystack  <p>
4034
   *                           The string from which to get the position of the first occurrence
4035
   *                           of needle
4036
   *                           </p>
4037
   * @param string  $needle    <p>
4038
   *                           The string to find in haystack
4039
   *                           </p>
4040
   * @param int     $offset    [optional] <p>
4041
   *                           The position in haystack
4042
   *                           to start searching
4043
   *                           </p>
4044
   * @param string  $encoding
4045
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4046
   *
4047
   * @return int Return the numeric position of the first occurrence of
4048
   * needle in the haystack
4049
   * string, or false if needle is not found.
4050
   */
4051 8
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4052
  {
4053 8
    $haystack = (string)$haystack;
4054 8
    $needle = (string)$needle;
4055
4056 8
    if (!isset($haystack[0]) || !isset($needle[0])) {
4057 2
      return false;
4058
    }
4059
4060
    // init
4061 7
    self::checkForSupport();
4062
4063 7
    if ($cleanUtf8 === true) {
4064 1
      $haystack = self::clean($haystack);
4065 1
      $needle = self::clean($needle);
4066
    }
4067
4068
    // INFO: this is only a fallback for old versions
4069 7
    if ($encoding === true || $encoding === false) {
4070 1
      $encoding = 'UTF-8';
4071
    }
4072
4073 7
    return mb_stripos($haystack, $needle, $offset, $encoding);
4074
  }
4075
4076
  /**
4077
   * fix a double (or multiple) encoded UTF8 string
4078
   *
4079
   * @param array|string $text
4080
   *
4081
   * @return string
4082
   */
4083 1
  public static function fix_utf8($text)
4084
  {
4085 1
    if (is_array($text)) {
4086
4087
      foreach ($text as $k => $v) {
4088
        /** @noinspection AlterInForeachInspection */
4089
        $text[$k] = self::fix_utf8($v);
4090
      }
4091
4092
      return $text;
4093
    }
4094
4095 1
    $last = '';
4096 1
    while ($last <> $text) {
4097 1
      $last = $text;
4098 1
      $text = self::to_utf8(self::utf8_decode($text));
0 ignored issues
show
Bug introduced by
It seems like $text defined by self::to_utf8(self::utf8_decode($text)) on line 4098 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
4099
    }
4100
4101 1
    return $text;
4102
  }
4103
4104
  /**
4105
   * alias for "UTF8::ucfirst"
4106
   *
4107
   * @param $str
4108
   *
4109
   * @return string
4110
   */
4111
  public static function ucword($str)
4112
  {
4113
    return self::ucfirst($str);
4114
  }
4115
4116
  /**
4117
   * makes string's first char uppercase
4118
   *
4119
   * @param    string $str The input string
4120
   *
4121
   * @return   string The resulting string
4122
   */
4123 13
  public static function ucfirst($str)
4124
  {
4125 13
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
4126
  }
4127
4128
  /**
4129
   * make a string uppercase
4130
   *
4131
   * @link http://php.net/manual/en/function.mb-strtoupper.php
4132
   *
4133
   * @param string $str <p>
4134
   *                    The string being uppercased.
4135
   *                    </p>
4136
   * @param string $encoding
4137
   *
4138
   * @return string str with all alphabetic characters converted to uppercase.
4139
   */
4140 14
  public static function strtoupper($str, $encoding = 'UTF-8')
4141
  {
4142 14
    $str = (string)$str;
4143
4144 14
    if (!isset($str[0])) {
4145 4
      return '';
4146
    }
4147
4148
    // init
4149 13
    self::checkForSupport();
4150
4151 13
    if (self::$support['mbstring'] === true) {
4152 13
      return mb_strtoupper($str, $encoding);
4153
    } else {
4154
4155
      // fallback
4156
4157
      static $caseTableKeys = null;
4158
      static $caseTableValues = null;
4159
4160
      if ($caseTableKeys === null) {
4161
        $caseTable = self::case_table();
4162
        $caseTableKeys = array_keys($caseTable);
4163
        $caseTableValues = array_values($caseTable);
4164
      }
4165
4166
      $str = self::clean($str);
4167
4168
      return str_replace($caseTableKeys, $caseTableValues, $str);
4169
    }
4170
  }
4171
4172
  /**
4173
   * returns an array of all lower and upper case UTF-8 encoded characters
4174
   *
4175
   * @return   string An array with lower case chars as keys and upper chars as values
4176
   */
4177
  protected static function case_table()
4178
  {
4179
    static $case = array(
4180
4181
      //lower => upper
4182
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
4183
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
4184
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
4185
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
4186
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
4187
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
4188
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
4189
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
4190
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
4191
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
4192
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
4193
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
4194
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
4195
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
4196
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
4197
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
4198
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
4199
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
4200
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
4201
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
4202
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
4203
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
4204
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
4205
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
4206
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
4207
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
4208
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
4209
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
4210
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
4211
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
4212
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
4213
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
4214
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
4215
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
4216
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
4217
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
4218
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
4219
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
4220
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
4221
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
4222
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
4223
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
4224
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
4225
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
4226
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
4227
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
4228
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
4229
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
4230
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
4231
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
4232
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
4233
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
4234
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
4235
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
4236
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
4237
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
4238
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
4239
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
4240
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
4241
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
4242
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
4243
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
4244
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
4245
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
4246
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
4247
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
4248
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
4249
      "\xea\x9e\x87"     => "\xea\x9e\x86",
4250
      "\xea\x9e\x85"     => "\xea\x9e\x84",
4251
      "\xea\x9e\x83"     => "\xea\x9e\x82",
4252
      "\xea\x9e\x81"     => "\xea\x9e\x80",
4253
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
4254
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
4255
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
4256
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
4257
      "\xea\x9d\xad"     => "\xea\x9d\xac",
4258
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
4259
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
4260
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
4261
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
4262
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
4263
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
4264
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
4265
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
4266
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
4267
      "\xea\x9d\x99"     => "\xea\x9d\x98",
4268
      "\xea\x9d\x97"     => "\xea\x9d\x96",
4269
      "\xea\x9d\x95"     => "\xea\x9d\x94",
4270
      "\xea\x9d\x93"     => "\xea\x9d\x92",
4271
      "\xea\x9d\x91"     => "\xea\x9d\x90",
4272
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
4273
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
4274
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
4275
      "\xea\x9d\x89"     => "\xea\x9d\x88",
4276
      "\xea\x9d\x87"     => "\xea\x9d\x86",
4277
      "\xea\x9d\x85"     => "\xea\x9d\x84",
4278
      "\xea\x9d\x83"     => "\xea\x9d\x82",
4279
      "\xea\x9d\x81"     => "\xea\x9d\x80",
4280
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
4281
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
4282
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
4283
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
4284
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
4285
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
4286
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
4287
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
4288
      "\xea\x9c\xad"     => "\xea\x9c\xac",
4289
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
4290
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
4291
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
4292
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
4293
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
4294
      "\xea\x9a\x97"     => "\xea\x9a\x96",
4295
      "\xea\x9a\x95"     => "\xea\x9a\x94",
4296
      "\xea\x9a\x93"     => "\xea\x9a\x92",
4297
      "\xea\x9a\x91"     => "\xea\x9a\x90",
4298
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
4299
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
4300
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
4301
      "\xea\x9a\x89"     => "\xea\x9a\x88",
4302
      "\xea\x9a\x87"     => "\xea\x9a\x86",
4303
      "\xea\x9a\x85"     => "\xea\x9a\x84",
4304
      "\xea\x9a\x83"     => "\xea\x9a\x82",
4305
      "\xea\x9a\x81"     => "\xea\x9a\x80",
4306
      "\xea\x99\xad"     => "\xea\x99\xac",
4307
      "\xea\x99\xab"     => "\xea\x99\xaa",
4308
      "\xea\x99\xa9"     => "\xea\x99\xa8",
4309
      "\xea\x99\xa7"     => "\xea\x99\xa6",
4310
      "\xea\x99\xa5"     => "\xea\x99\xa4",
4311
      "\xea\x99\xa3"     => "\xea\x99\xa2",
4312
      "\xea\x99\x9f"     => "\xea\x99\x9e",
4313
      "\xea\x99\x9d"     => "\xea\x99\x9c",
4314
      "\xea\x99\x9b"     => "\xea\x99\x9a",
4315
      "\xea\x99\x99"     => "\xea\x99\x98",
4316
      "\xea\x99\x97"     => "\xea\x99\x96",
4317
      "\xea\x99\x95"     => "\xea\x99\x94",
4318
      "\xea\x99\x93"     => "\xea\x99\x92",
4319
      "\xea\x99\x91"     => "\xea\x99\x90",
4320
      "\xea\x99\x8f"     => "\xea\x99\x8e",
4321
      "\xea\x99\x8d"     => "\xea\x99\x8c",
4322
      "\xea\x99\x8b"     => "\xea\x99\x8a",
4323
      "\xea\x99\x89"     => "\xea\x99\x88",
4324
      "\xea\x99\x87"     => "\xea\x99\x86",
4325
      "\xea\x99\x85"     => "\xea\x99\x84",
4326
      "\xea\x99\x83"     => "\xea\x99\x82",
4327
      "\xea\x99\x81"     => "\xea\x99\x80",
4328
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
4329
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
4330
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
4331
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
4332
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
4333
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
4334
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
4335
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
4336
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
4337
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
4338
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
4339
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
4340
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
4341
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
4342
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
4343
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
4344
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
4345
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
4346
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
4347
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
4348
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
4349
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
4350
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
4351
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
4352
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
4353
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
4354
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
4355
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
4356
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
4357
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
4358
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
4359
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
4360
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
4361
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
4362
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
4363
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
4364
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
4365
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
4366
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
4367
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
4368
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
4369
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
4370
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
4371
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
4372
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
4373
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
4374
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
4375
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
4376
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
4377
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
4378
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
4379
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
4380
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
4381
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
4382
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
4383
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
4384
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
4385
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
4386
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
4387
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
4388
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
4389
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
4390
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
4391
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
4392
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
4393
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
4394
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
4395
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
4396
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
4397
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
4398
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
4399
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
4400
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
4401
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
4402
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
4403
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
4404
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
4405
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
4406
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
4407
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
4408
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
4409
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
4410
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
4411
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
4412
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
4413
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
4414
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
4415
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
4416
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
4417
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
4418
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
4419
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
4420
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
4421
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
4422
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
4423
      "\xe2\xb1\xa6"     => "\xc8\xbe",
4424
      "\xe2\xb1\xa5"     => "\xc8\xba",
4425
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
4426
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
4427
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
4428
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
4429
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
4430
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
4431
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
4432
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
4433
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
4434
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
4435
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
4436
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
4437
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
4438
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
4439
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
4440
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
4441
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
4442
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
4443
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
4444
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
4445
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
4446
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
4447
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
4448
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
4449
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
4450
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
4451
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
4452
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
4453
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
4454
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
4455
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
4456
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
4457
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
4458
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
4459
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
4460
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
4461
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
4462
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
4463
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
4464
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
4465
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
4466
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
4467
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
4468
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
4469
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
4470
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
4471
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
4472
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
4473
      "\xe2\x86\x84"     => "\xe2\x86\x83",
4474
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
4475
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
4476
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
4477
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
4478
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
4479
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
4480
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
4481
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
4482
      "\xe1\xbe\xbe"     => "\xce\x99",
4483
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
4484
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
4485
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
4486
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
4487
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
4488
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
4489
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
4490
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
4491
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
4492
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
4493
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
4494
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
4495
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
4496
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
4497
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
4498
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
4499
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
4500
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
4501
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
4502
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
4503
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
4504
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
4505
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
4506
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
4507
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
4508
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
4509
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
4510
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
4511
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
4512
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
4513
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
4514
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
4515
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
4516
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
4517
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
4518
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
4519
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
4520
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
4521
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
4522
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
4523
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
4524
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
4525
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
4526
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
4527
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
4528
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
4529
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
4530
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
4531
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
4532
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
4533
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
4534
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
4535
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
4536
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
4537
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
4538
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
4539
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
4540
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
4541
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
4542
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
4543
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
4544
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
4545
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
4546
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
4547
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
4548
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
4549
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
4550
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
4551
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
4552
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
4553
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
4554
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
4555
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
4556
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
4557
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
4558
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
4559
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
4560
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
4561
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
4562
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
4563
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
4564
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
4565
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
4566
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
4567
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
4568
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
4569
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
4570
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
4571
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
4572
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
4573
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
4574
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
4575
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
4576
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
4577
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
4578
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
4579
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
4580
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
4581
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
4582
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
4583
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
4584
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
4585
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
4586
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
4587
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
4588
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
4589
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
4590
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
4591
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
4592
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
4593
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
4594
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
4595
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
4596
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
4597
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
4598
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
4599
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
4600
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
4601
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
4602
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
4603
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
4604
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
4605
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
4606
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
4607
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
4608
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
4609
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
4610
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
4611
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
4612
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
4613
      "\xe1\xba\xad"     => "\xe1\xba\xac",
4614
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
4615
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
4616
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
4617
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
4618
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
4619
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
4620
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
4621
      "\xe1\xba\x95"     => "\xe1\xba\x94",
4622
      "\xe1\xba\x93"     => "\xe1\xba\x92",
4623
      "\xe1\xba\x91"     => "\xe1\xba\x90",
4624
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
4625
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
4626
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
4627
      "\xe1\xba\x89"     => "\xe1\xba\x88",
4628
      "\xe1\xba\x87"     => "\xe1\xba\x86",
4629
      "\xe1\xba\x85"     => "\xe1\xba\x84",
4630
      "\xe1\xba\x83"     => "\xe1\xba\x82",
4631
      "\xe1\xba\x81"     => "\xe1\xba\x80",
4632
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
4633
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
4634
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
4635
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
4636
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
4637
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
4638
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
4639
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
4640
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
4641
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
4642
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
4643
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
4644
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
4645
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
4646
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
4647
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
4648
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
4649
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
4650
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
4651
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
4652
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
4653
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
4654
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
4655
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
4656
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
4657
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
4658
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
4659
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
4660
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
4661
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
4662
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
4663
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
4664
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
4665
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
4666
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
4667
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
4668
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
4669
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
4670
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
4671
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
4672
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
4673
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
4674
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
4675
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
4676
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
4677
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
4678
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
4679
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
4680
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
4681
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
4682
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
4683
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
4684
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
4685
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
4686
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
4687
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
4688
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
4689
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
4690
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
4691
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
4692
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
4693
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
4694
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
4695
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
4696
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
4697
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
4698
      "\xd6\x86"         => "\xd5\x96",
4699
      "\xd6\x85"         => "\xd5\x95",
4700
      "\xd6\x84"         => "\xd5\x94",
4701
      "\xd6\x83"         => "\xd5\x93",
4702
      "\xd6\x82"         => "\xd5\x92",
4703
      "\xd6\x81"         => "\xd5\x91",
4704
      "\xd6\x80"         => "\xd5\x90",
4705
      "\xd5\xbf"         => "\xd5\x8f",
4706
      "\xd5\xbe"         => "\xd5\x8e",
4707
      "\xd5\xbd"         => "\xd5\x8d",
4708
      "\xd5\xbc"         => "\xd5\x8c",
4709
      "\xd5\xbb"         => "\xd5\x8b",
4710
      "\xd5\xba"         => "\xd5\x8a",
4711
      "\xd5\xb9"         => "\xd5\x89",
4712
      "\xd5\xb8"         => "\xd5\x88",
4713
      "\xd5\xb7"         => "\xd5\x87",
4714
      "\xd5\xb6"         => "\xd5\x86",
4715
      "\xd5\xb5"         => "\xd5\x85",
4716
      "\xd5\xb4"         => "\xd5\x84",
4717
      "\xd5\xb3"         => "\xd5\x83",
4718
      "\xd5\xb2"         => "\xd5\x82",
4719
      "\xd5\xb1"         => "\xd5\x81",
4720
      "\xd5\xb0"         => "\xd5\x80",
4721
      "\xd5\xaf"         => "\xd4\xbf",
4722
      "\xd5\xae"         => "\xd4\xbe",
4723
      "\xd5\xad"         => "\xd4\xbd",
4724
      "\xd5\xac"         => "\xd4\xbc",
4725
      "\xd5\xab"         => "\xd4\xbb",
4726
      "\xd5\xaa"         => "\xd4\xba",
4727
      "\xd5\xa9"         => "\xd4\xb9",
4728
      "\xd5\xa8"         => "\xd4\xb8",
4729
      "\xd5\xa7"         => "\xd4\xb7",
4730
      "\xd5\xa6"         => "\xd4\xb6",
4731
      "\xd5\xa5"         => "\xd4\xb5",
4732
      "\xd5\xa4"         => "\xd4\xb4",
4733
      "\xd5\xa3"         => "\xd4\xb3",
4734
      "\xd5\xa2"         => "\xd4\xb2",
4735
      "\xd5\xa1"         => "\xd4\xb1",
4736
      "\xd4\xa5"         => "\xd4\xa4",
4737
      "\xd4\xa3"         => "\xd4\xa2",
4738
      "\xd4\xa1"         => "\xd4\xa0",
4739
      "\xd4\x9f"         => "\xd4\x9e",
4740
      "\xd4\x9d"         => "\xd4\x9c",
4741
      "\xd4\x9b"         => "\xd4\x9a",
4742
      "\xd4\x99"         => "\xd4\x98",
4743
      "\xd4\x97"         => "\xd4\x96",
4744
      "\xd4\x95"         => "\xd4\x94",
4745
      "\xd4\x93"         => "\xd4\x92",
4746
      "\xd4\x91"         => "\xd4\x90",
4747
      "\xd4\x8f"         => "\xd4\x8e",
4748
      "\xd4\x8d"         => "\xd4\x8c",
4749
      "\xd4\x8b"         => "\xd4\x8a",
4750
      "\xd4\x89"         => "\xd4\x88",
4751
      "\xd4\x87"         => "\xd4\x86",
4752
      "\xd4\x85"         => "\xd4\x84",
4753
      "\xd4\x83"         => "\xd4\x82",
4754
      "\xd4\x81"         => "\xd4\x80",
4755
      "\xd3\xbf"         => "\xd3\xbe",
4756
      "\xd3\xbd"         => "\xd3\xbc",
4757
      "\xd3\xbb"         => "\xd3\xba",
4758
      "\xd3\xb9"         => "\xd3\xb8",
4759
      "\xd3\xb7"         => "\xd3\xb6",
4760
      "\xd3\xb5"         => "\xd3\xb4",
4761
      "\xd3\xb3"         => "\xd3\xb2",
4762
      "\xd3\xb1"         => "\xd3\xb0",
4763
      "\xd3\xaf"         => "\xd3\xae",
4764
      "\xd3\xad"         => "\xd3\xac",
4765
      "\xd3\xab"         => "\xd3\xaa",
4766
      "\xd3\xa9"         => "\xd3\xa8",
4767
      "\xd3\xa7"         => "\xd3\xa6",
4768
      "\xd3\xa5"         => "\xd3\xa4",
4769
      "\xd3\xa3"         => "\xd3\xa2",
4770
      "\xd3\xa1"         => "\xd3\xa0",
4771
      "\xd3\x9f"         => "\xd3\x9e",
4772
      "\xd3\x9d"         => "\xd3\x9c",
4773
      "\xd3\x9b"         => "\xd3\x9a",
4774
      "\xd3\x99"         => "\xd3\x98",
4775
      "\xd3\x97"         => "\xd3\x96",
4776
      "\xd3\x95"         => "\xd3\x94",
4777
      "\xd3\x93"         => "\xd3\x92",
4778
      "\xd3\x91"         => "\xd3\x90",
4779
      "\xd3\x8f"         => "\xd3\x80",
4780
      "\xd3\x8e"         => "\xd3\x8d",
4781
      "\xd3\x8c"         => "\xd3\x8b",
4782
      "\xd3\x8a"         => "\xd3\x89",
4783
      "\xd3\x88"         => "\xd3\x87",
4784
      "\xd3\x86"         => "\xd3\x85",
4785
      "\xd3\x84"         => "\xd3\x83",
4786
      "\xd3\x82"         => "\xd3\x81",
4787
      "\xd2\xbf"         => "\xd2\xbe",
4788
      "\xd2\xbd"         => "\xd2\xbc",
4789
      "\xd2\xbb"         => "\xd2\xba",
4790
      "\xd2\xb9"         => "\xd2\xb8",
4791
      "\xd2\xb7"         => "\xd2\xb6",
4792
      "\xd2\xb5"         => "\xd2\xb4",
4793
      "\xd2\xb3"         => "\xd2\xb2",
4794
      "\xd2\xb1"         => "\xd2\xb0",
4795
      "\xd2\xaf"         => "\xd2\xae",
4796
      "\xd2\xad"         => "\xd2\xac",
4797
      "\xd2\xab"         => "\xd2\xaa",
4798
      "\xd2\xa9"         => "\xd2\xa8",
4799
      "\xd2\xa7"         => "\xd2\xa6",
4800
      "\xd2\xa5"         => "\xd2\xa4",
4801
      "\xd2\xa3"         => "\xd2\xa2",
4802
      "\xd2\xa1"         => "\xd2\xa0",
4803
      "\xd2\x9f"         => "\xd2\x9e",
4804
      "\xd2\x9d"         => "\xd2\x9c",
4805
      "\xd2\x9b"         => "\xd2\x9a",
4806
      "\xd2\x99"         => "\xd2\x98",
4807
      "\xd2\x97"         => "\xd2\x96",
4808
      "\xd2\x95"         => "\xd2\x94",
4809
      "\xd2\x93"         => "\xd2\x92",
4810
      "\xd2\x91"         => "\xd2\x90",
4811
      "\xd2\x8f"         => "\xd2\x8e",
4812
      "\xd2\x8d"         => "\xd2\x8c",
4813
      "\xd2\x8b"         => "\xd2\x8a",
4814
      "\xd2\x81"         => "\xd2\x80",
4815
      "\xd1\xbf"         => "\xd1\xbe",
4816
      "\xd1\xbd"         => "\xd1\xbc",
4817
      "\xd1\xbb"         => "\xd1\xba",
4818
      "\xd1\xb9"         => "\xd1\xb8",
4819
      "\xd1\xb7"         => "\xd1\xb6",
4820
      "\xd1\xb5"         => "\xd1\xb4",
4821
      "\xd1\xb3"         => "\xd1\xb2",
4822
      "\xd1\xb1"         => "\xd1\xb0",
4823
      "\xd1\xaf"         => "\xd1\xae",
4824
      "\xd1\xad"         => "\xd1\xac",
4825
      "\xd1\xab"         => "\xd1\xaa",
4826
      "\xd1\xa9"         => "\xd1\xa8",
4827
      "\xd1\xa7"         => "\xd1\xa6",
4828
      "\xd1\xa5"         => "\xd1\xa4",
4829
      "\xd1\xa3"         => "\xd1\xa2",
4830
      "\xd1\xa1"         => "\xd1\xa0",
4831
      "\xd1\x9f"         => "\xd0\x8f",
4832
      "\xd1\x9e"         => "\xd0\x8e",
4833
      "\xd1\x9d"         => "\xd0\x8d",
4834
      "\xd1\x9c"         => "\xd0\x8c",
4835
      "\xd1\x9b"         => "\xd0\x8b",
4836
      "\xd1\x9a"         => "\xd0\x8a",
4837
      "\xd1\x99"         => "\xd0\x89",
4838
      "\xd1\x98"         => "\xd0\x88",
4839
      "\xd1\x97"         => "\xd0\x87",
4840
      "\xd1\x96"         => "\xd0\x86",
4841
      "\xd1\x95"         => "\xd0\x85",
4842
      "\xd1\x94"         => "\xd0\x84",
4843
      "\xd1\x93"         => "\xd0\x83",
4844
      "\xd1\x92"         => "\xd0\x82",
4845
      "\xd1\x91"         => "\xd0\x81",
4846
      "\xd1\x90"         => "\xd0\x80",
4847
      "\xd1\x8f"         => "\xd0\xaf",
4848
      "\xd1\x8e"         => "\xd0\xae",
4849
      "\xd1\x8d"         => "\xd0\xad",
4850
      "\xd1\x8c"         => "\xd0\xac",
4851
      "\xd1\x8b"         => "\xd0\xab",
4852
      "\xd1\x8a"         => "\xd0\xaa",
4853
      "\xd1\x89"         => "\xd0\xa9",
4854
      "\xd1\x88"         => "\xd0\xa8",
4855
      "\xd1\x87"         => "\xd0\xa7",
4856
      "\xd1\x86"         => "\xd0\xa6",
4857
      "\xd1\x85"         => "\xd0\xa5",
4858
      "\xd1\x84"         => "\xd0\xa4",
4859
      "\xd1\x83"         => "\xd0\xa3",
4860
      "\xd1\x82"         => "\xd0\xa2",
4861
      "\xd1\x81"         => "\xd0\xa1",
4862
      "\xd1\x80"         => "\xd0\xa0",
4863
      "\xd0\xbf"         => "\xd0\x9f",
4864
      "\xd0\xbe"         => "\xd0\x9e",
4865
      "\xd0\xbd"         => "\xd0\x9d",
4866
      "\xd0\xbc"         => "\xd0\x9c",
4867
      "\xd0\xbb"         => "\xd0\x9b",
4868
      "\xd0\xba"         => "\xd0\x9a",
4869
      "\xd0\xb9"         => "\xd0\x99",
4870
      "\xd0\xb8"         => "\xd0\x98",
4871
      "\xd0\xb7"         => "\xd0\x97",
4872
      "\xd0\xb6"         => "\xd0\x96",
4873
      "\xd0\xb5"         => "\xd0\x95",
4874
      "\xd0\xb4"         => "\xd0\x94",
4875
      "\xd0\xb3"         => "\xd0\x93",
4876
      "\xd0\xb2"         => "\xd0\x92",
4877
      "\xd0\xb1"         => "\xd0\x91",
4878
      "\xd0\xb0"         => "\xd0\x90",
4879
      "\xcf\xbb"         => "\xcf\xba",
4880
      "\xcf\xb8"         => "\xcf\xb7",
4881
      "\xcf\xb5"         => "\xce\x95",
4882
      "\xcf\xb2"         => "\xcf\xb9",
4883
      "\xcf\xb1"         => "\xce\xa1",
4884
      "\xcf\xb0"         => "\xce\x9a",
4885
      "\xcf\xaf"         => "\xcf\xae",
4886
      "\xcf\xad"         => "\xcf\xac",
4887
      "\xcf\xab"         => "\xcf\xaa",
4888
      "\xcf\xa9"         => "\xcf\xa8",
4889
      "\xcf\xa7"         => "\xcf\xa6",
4890
      "\xcf\xa5"         => "\xcf\xa4",
4891
      "\xcf\xa3"         => "\xcf\xa2",
4892
      "\xcf\xa1"         => "\xcf\xa0",
4893
      "\xcf\x9f"         => "\xcf\x9e",
4894
      "\xcf\x9d"         => "\xcf\x9c",
4895
      "\xcf\x9b"         => "\xcf\x9a",
4896
      "\xcf\x99"         => "\xcf\x98",
4897
      "\xcf\x97"         => "\xcf\x8f",
4898
      "\xcf\x96"         => "\xce\xa0",
4899
      "\xcf\x95"         => "\xce\xa6",
4900
      "\xcf\x91"         => "\xce\x98",
4901
      "\xcf\x90"         => "\xce\x92",
4902
      "\xcf\x8e"         => "\xce\x8f",
4903
      "\xcf\x8d"         => "\xce\x8e",
4904
      "\xcf\x8c"         => "\xce\x8c",
4905
      "\xcf\x8b"         => "\xce\xab",
4906
      "\xcf\x8a"         => "\xce\xaa",
4907
      "\xcf\x89"         => "\xce\xa9",
4908
      "\xcf\x88"         => "\xce\xa8",
4909
      "\xcf\x87"         => "\xce\xa7",
4910
      "\xcf\x86"         => "\xce\xa6",
4911
      "\xcf\x85"         => "\xce\xa5",
4912
      "\xcf\x84"         => "\xce\xa4",
4913
      "\xcf\x83"         => "\xce\xa3",
4914
      "\xcf\x82"         => "\xce\xa3",
4915
      "\xcf\x81"         => "\xce\xa1",
4916
      "\xcf\x80"         => "\xce\xa0",
4917
      "\xce\xbf"         => "\xce\x9f",
4918
      "\xce\xbe"         => "\xce\x9e",
4919
      "\xce\xbd"         => "\xce\x9d",
4920
      "\xce\xbc"         => "\xce\x9c",
4921
      "\xce\xbb"         => "\xce\x9b",
4922
      "\xce\xba"         => "\xce\x9a",
4923
      "\xce\xb9"         => "\xce\x99",
4924
      "\xce\xb8"         => "\xce\x98",
4925
      "\xce\xb7"         => "\xce\x97",
4926
      "\xce\xb6"         => "\xce\x96",
4927
      "\xce\xb5"         => "\xce\x95",
4928
      "\xce\xb4"         => "\xce\x94",
4929
      "\xce\xb3"         => "\xce\x93",
4930
      "\xce\xb2"         => "\xce\x92",
4931
      "\xce\xb1"         => "\xce\x91",
4932
      "\xce\xaf"         => "\xce\x8a",
4933
      "\xce\xae"         => "\xce\x89",
4934
      "\xce\xad"         => "\xce\x88",
4935
      "\xce\xac"         => "\xce\x86",
4936
      "\xcd\xbd"         => "\xcf\xbf",
4937
      "\xcd\xbc"         => "\xcf\xbe",
4938
      "\xcd\xbb"         => "\xcf\xbd",
4939
      "\xcd\xb7"         => "\xcd\xb6",
4940
      "\xcd\xb3"         => "\xcd\xb2",
4941
      "\xcd\xb1"         => "\xcd\xb0",
4942
      "\xca\x92"         => "\xc6\xb7",
4943
      "\xca\x8c"         => "\xc9\x85",
4944
      "\xca\x8b"         => "\xc6\xb2",
4945
      "\xca\x8a"         => "\xc6\xb1",
4946
      "\xca\x89"         => "\xc9\x84",
4947
      "\xca\x88"         => "\xc6\xae",
4948
      "\xca\x83"         => "\xc6\xa9",
4949
      "\xca\x80"         => "\xc6\xa6",
4950
      "\xc9\xbd"         => "\xe2\xb1\xa4",
4951
      "\xc9\xb5"         => "\xc6\x9f",
4952
      "\xc9\xb2"         => "\xc6\x9d",
4953
      "\xc9\xb1"         => "\xe2\xb1\xae",
4954
      "\xc9\xaf"         => "\xc6\x9c",
4955
      "\xc9\xab"         => "\xe2\xb1\xa2",
4956
      "\xc9\xa9"         => "\xc6\x96",
4957
      "\xc9\xa8"         => "\xc6\x97",
4958
      "\xc9\xa5"         => "\xea\x9e\x8d",
4959
      "\xc9\xa3"         => "\xc6\x94",
4960
      "\xc9\xa0"         => "\xc6\x93",
4961
      "\xc9\x9b"         => "\xc6\x90",
4962
      "\xc9\x99"         => "\xc6\x8f",
4963
      "\xc9\x97"         => "\xc6\x8a",
4964
      "\xc9\x96"         => "\xc6\x89",
4965
      "\xc9\x94"         => "\xc6\x86",
4966
      "\xc9\x93"         => "\xc6\x81",
4967
      "\xc9\x92"         => "\xe2\xb1\xb0",
4968
      "\xc9\x91"         => "\xe2\xb1\xad",
4969
      "\xc9\x90"         => "\xe2\xb1\xaf",
4970
      "\xc9\x8f"         => "\xc9\x8e",
4971
      "\xc9\x8d"         => "\xc9\x8c",
4972
      "\xc9\x8b"         => "\xc9\x8a",
4973
      "\xc9\x89"         => "\xc9\x88",
4974
      "\xc9\x87"         => "\xc9\x86",
4975
      "\xc9\x82"         => "\xc9\x81",
4976
      "\xc9\x80"         => "\xe2\xb1\xbf",
4977
      "\xc8\xbf"         => "\xe2\xb1\xbe",
4978
      "\xc8\xbc"         => "\xc8\xbb",
4979
      "\xc8\xb3"         => "\xc8\xb2",
4980
      "\xc8\xb1"         => "\xc8\xb0",
4981
      "\xc8\xaf"         => "\xc8\xae",
4982
      "\xc8\xad"         => "\xc8\xac",
4983
      "\xc8\xab"         => "\xc8\xaa",
4984
      "\xc8\xa9"         => "\xc8\xa8",
4985
      "\xc8\xa7"         => "\xc8\xa6",
4986
      "\xc8\xa5"         => "\xc8\xa4",
4987
      "\xc8\xa3"         => "\xc8\xa2",
4988
      "\xc8\x9f"         => "\xc8\x9e",
4989
      "\xc8\x9d"         => "\xc8\x9c",
4990
      "\xc8\x9b"         => "\xc8\x9a",
4991
      "\xc8\x99"         => "\xc8\x98",
4992
      "\xc8\x97"         => "\xc8\x96",
4993
      "\xc8\x95"         => "\xc8\x94",
4994
      "\xc8\x93"         => "\xc8\x92",
4995
      "\xc8\x91"         => "\xc8\x90",
4996
      "\xc8\x8f"         => "\xc8\x8e",
4997
      "\xc8\x8d"         => "\xc8\x8c",
4998
      "\xc8\x8b"         => "\xc8\x8a",
4999
      "\xc8\x89"         => "\xc8\x88",
5000
      "\xc8\x87"         => "\xc8\x86",
5001
      "\xc8\x85"         => "\xc8\x84",
5002
      "\xc8\x83"         => "\xc8\x82",
5003
      "\xc8\x81"         => "\xc8\x80",
5004
      "\xc7\xbf"         => "\xc7\xbe",
5005
      "\xc7\xbd"         => "\xc7\xbc",
5006
      "\xc7\xbb"         => "\xc7\xba",
5007
      "\xc7\xb9"         => "\xc7\xb8",
5008
      "\xc7\xb5"         => "\xc7\xb4",
5009
      "\xc7\xb3"         => "\xc7\xb2",
5010
      "\xc7\xaf"         => "\xc7\xae",
5011
      "\xc7\xad"         => "\xc7\xac",
5012
      "\xc7\xab"         => "\xc7\xaa",
5013
      "\xc7\xa9"         => "\xc7\xa8",
5014
      "\xc7\xa7"         => "\xc7\xa6",
5015
      "\xc7\xa5"         => "\xc7\xa4",
5016
      "\xc7\xa3"         => "\xc7\xa2",
5017
      "\xc7\xa1"         => "\xc7\xa0",
5018
      "\xc7\x9f"         => "\xc7\x9e",
5019
      "\xc7\x9d"         => "\xc6\x8e",
5020
      "\xc7\x9c"         => "\xc7\x9b",
5021
      "\xc7\x9a"         => "\xc7\x99",
5022
      "\xc7\x98"         => "\xc7\x97",
5023
      "\xc7\x96"         => "\xc7\x95",
5024
      "\xc7\x94"         => "\xc7\x93",
5025
      "\xc7\x92"         => "\xc7\x91",
5026
      "\xc7\x90"         => "\xc7\x8f",
5027
      "\xc7\x8e"         => "\xc7\x8d",
5028
      "\xc7\x8c"         => "\xc7\x8b",
5029
      "\xc7\x89"         => "\xc7\x88",
5030
      "\xc7\x86"         => "\xc7\x85",
5031
      "\xc6\xbf"         => "\xc7\xb7",
5032
      "\xc6\xbd"         => "\xc6\xbc",
5033
      "\xc6\xb9"         => "\xc6\xb8",
5034
      "\xc6\xb6"         => "\xc6\xb5",
5035
      "\xc6\xb4"         => "\xc6\xb3",
5036
      "\xc6\xb0"         => "\xc6\xaf",
5037
      "\xc6\xad"         => "\xc6\xac",
5038
      "\xc6\xa8"         => "\xc6\xa7",
5039
      "\xc6\xa5"         => "\xc6\xa4",
5040
      "\xc6\xa3"         => "\xc6\xa2",
5041
      "\xc6\xa1"         => "\xc6\xa0",
5042
      "\xc6\x9e"         => "\xc8\xa0",
5043
      "\xc6\x9a"         => "\xc8\xbd",
5044
      "\xc6\x99"         => "\xc6\x98",
5045
      "\xc6\x95"         => "\xc7\xb6",
5046
      "\xc6\x92"         => "\xc6\x91",
5047
      "\xc6\x8c"         => "\xc6\x8b",
5048
      "\xc6\x88"         => "\xc6\x87",
5049
      "\xc6\x85"         => "\xc6\x84",
5050
      "\xc6\x83"         => "\xc6\x82",
5051
      "\xc6\x80"         => "\xc9\x83",
5052
      "\xc5\xbf"         => "\x53",
5053
      "\xc5\xbe"         => "\xc5\xbd",
5054
      "\xc5\xbc"         => "\xc5\xbb",
5055
      "\xc5\xba"         => "\xc5\xb9",
5056
      "\xc5\xb7"         => "\xc5\xb6",
5057
      "\xc5\xb5"         => "\xc5\xb4",
5058
      "\xc5\xb3"         => "\xc5\xb2",
5059
      "\xc5\xb1"         => "\xc5\xb0",
5060
      "\xc5\xaf"         => "\xc5\xae",
5061
      "\xc5\xad"         => "\xc5\xac",
5062
      "\xc5\xab"         => "\xc5\xaa",
5063
      "\xc5\xa9"         => "\xc5\xa8",
5064
      "\xc5\xa7"         => "\xc5\xa6",
5065
      "\xc5\xa5"         => "\xc5\xa4",
5066
      "\xc5\xa3"         => "\xc5\xa2",
5067
      "\xc5\xa1"         => "\xc5\xa0",
5068
      "\xc5\x9f"         => "\xc5\x9e",
5069
      "\xc5\x9d"         => "\xc5\x9c",
5070
      "\xc5\x9b"         => "\xc5\x9a",
5071
      "\xc5\x99"         => "\xc5\x98",
5072
      "\xc5\x97"         => "\xc5\x96",
5073
      "\xc5\x95"         => "\xc5\x94",
5074
      "\xc5\x93"         => "\xc5\x92",
5075
      "\xc5\x91"         => "\xc5\x90",
5076
      "\xc5\x8f"         => "\xc5\x8e",
5077
      "\xc5\x8d"         => "\xc5\x8c",
5078
      "\xc5\x8b"         => "\xc5\x8a",
5079
      "\xc5\x88"         => "\xc5\x87",
5080
      "\xc5\x86"         => "\xc5\x85",
5081
      "\xc5\x84"         => "\xc5\x83",
5082
      "\xc5\x82"         => "\xc5\x81",
5083
      "\xc5\x80"         => "\xc4\xbf",
5084
      "\xc4\xbe"         => "\xc4\xbd",
5085
      "\xc4\xbc"         => "\xc4\xbb",
5086
      "\xc4\xba"         => "\xc4\xb9",
5087
      "\xc4\xb7"         => "\xc4\xb6",
5088
      "\xc4\xb5"         => "\xc4\xb4",
5089
      "\xc4\xb3"         => "\xc4\xb2",
5090
      "\xc4\xb1"         => "\x49",
5091
      "\xc4\xaf"         => "\xc4\xae",
5092
      "\xc4\xad"         => "\xc4\xac",
5093
      "\xc4\xab"         => "\xc4\xaa",
5094
      "\xc4\xa9"         => "\xc4\xa8",
5095
      "\xc4\xa7"         => "\xc4\xa6",
5096
      "\xc4\xa5"         => "\xc4\xa4",
5097
      "\xc4\xa3"         => "\xc4\xa2",
5098
      "\xc4\xa1"         => "\xc4\xa0",
5099
      "\xc4\x9f"         => "\xc4\x9e",
5100
      "\xc4\x9d"         => "\xc4\x9c",
5101
      "\xc4\x9b"         => "\xc4\x9a",
5102
      "\xc4\x99"         => "\xc4\x98",
5103
      "\xc4\x97"         => "\xc4\x96",
5104
      "\xc4\x95"         => "\xc4\x94",
5105
      "\xc4\x93"         => "\xc4\x92",
5106
      "\xc4\x91"         => "\xc4\x90",
5107
      "\xc4\x8f"         => "\xc4\x8e",
5108
      "\xc4\x8d"         => "\xc4\x8c",
5109
      "\xc4\x8b"         => "\xc4\x8a",
5110
      "\xc4\x89"         => "\xc4\x88",
5111
      "\xc4\x87"         => "\xc4\x86",
5112
      "\xc4\x85"         => "\xc4\x84",
5113
      "\xc4\x83"         => "\xc4\x82",
5114
      "\xc4\x81"         => "\xc4\x80",
5115
      "\xc3\xbf"         => "\xc5\xb8",
5116
      "\xc3\xbe"         => "\xc3\x9e",
5117
      "\xc3\xbd"         => "\xc3\x9d",
5118
      "\xc3\xbc"         => "\xc3\x9c",
5119
      "\xc3\xbb"         => "\xc3\x9b",
5120
      "\xc3\xba"         => "\xc3\x9a",
5121
      "\xc3\xb9"         => "\xc3\x99",
5122
      "\xc3\xb8"         => "\xc3\x98",
5123
      "\xc3\xb6"         => "\xc3\x96",
5124
      "\xc3\xb5"         => "\xc3\x95",
5125
      "\xc3\xb4"         => "\xc3\x94",
5126
      "\xc3\xb3"         => "\xc3\x93",
5127
      "\xc3\xb2"         => "\xc3\x92",
5128
      "\xc3\xb1"         => "\xc3\x91",
5129
      "\xc3\xb0"         => "\xc3\x90",
5130
      "\xc3\xaf"         => "\xc3\x8f",
5131
      "\xc3\xae"         => "\xc3\x8e",
5132
      "\xc3\xad"         => "\xc3\x8d",
5133
      "\xc3\xac"         => "\xc3\x8c",
5134
      "\xc3\xab"         => "\xc3\x8b",
5135
      "\xc3\xaa"         => "\xc3\x8a",
5136
      "\xc3\xa9"         => "\xc3\x89",
5137
      "\xc3\xa8"         => "\xc3\x88",
5138
      "\xc3\xa7"         => "\xc3\x87",
5139
      "\xc3\xa6"         => "\xc3\x86",
5140
      "\xc3\xa5"         => "\xc3\x85",
5141
      "\xc3\xa4"         => "\xc3\x84",
5142
      "\xc3\xa3"         => "\xc3\x83",
5143
      "\xc3\xa2"         => "\xc3\x82",
5144
      "\xc3\xa1"         => "\xc3\x81",
5145
      "\xc3\xa0"         => "\xc3\x80",
5146
      "\xc2\xb5"         => "\xce\x9c",
5147
      "\x7a"             => "\x5a",
5148
      "\x79"             => "\x59",
5149
      "\x78"             => "\x58",
5150
      "\x77"             => "\x57",
5151
      "\x76"             => "\x56",
5152
      "\x75"             => "\x55",
5153
      "\x74"             => "\x54",
5154
      "\x73"             => "\x53",
5155
      "\x72"             => "\x52",
5156
      "\x71"             => "\x51",
5157
      "\x70"             => "\x50",
5158
      "\x6f"             => "\x4f",
5159
      "\x6e"             => "\x4e",
5160
      "\x6d"             => "\x4d",
5161
      "\x6c"             => "\x4c",
5162
      "\x6b"             => "\x4b",
5163
      "\x6a"             => "\x4a",
5164
      "\x69"             => "\x49",
5165
      "\x68"             => "\x48",
5166
      "\x67"             => "\x47",
5167
      "\x66"             => "\x46",
5168
      "\x65"             => "\x45",
5169
      "\x64"             => "\x44",
5170
      "\x63"             => "\x43",
5171
      "\x62"             => "\x42",
5172
      "\x61"             => "\x41",
5173
5174
    );
5175
5176
    return $case;
5177
  }
5178
5179
  /**
5180
   * Translate characters or replace substrings
5181
   *
5182
   * @param string $s
5183
   * @param string $from
5184
   * @param string $to
5185
   *
5186
   * @return string
5187
   */
5188 1
  public static function strtr($s, $from, $to = INF)
5189
  {
5190 1
    if (INF !== $to) {
5191
      $from = self::str_split($from);
5192
      $to = self::str_split($to);
5193
      $a = count($from);
5194
      $b = count($to);
5195
5196
      if ($a > $b) {
5197
        $from = array_slice($from, 0, $b);
5198
      } elseif ($a < $b) {
5199
        $to = array_slice($to, 0, $a);
5200
      }
5201
5202
      $from = array_combine($from, $to);
5203
    }
5204
5205 1
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5188 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5206
  }
5207
5208
  /**
5209
   * Binary safe comparison of two strings from an offset, up to length characters
5210
   *
5211
   * @param string  $main_str           The main string being compared.
5212
   * @param string  $str                The secondary string being compared.
5213
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5214
   *                                    end of the string.
5215
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5216
   *                                    the str compared to the length of main_str less the offset.
5217
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5218
   *
5219
   * @return int
5220
   */
5221 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5222
  {
5223 1
    $main_str = self::substr($main_str, $offset, $length);
5224 1
    $str = self::substr($str, 0, self::strlen($main_str));
5225
5226 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5227
  }
5228
5229
  /**
5230
   * case-insensitive string comparison
5231
   *
5232
   * @param string $str1
5233
   * @param string $str2
5234
   *
5235
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5236
   */
5237 8
  public static function strcasecmp($str1, $str2)
5238
  {
5239 8
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5240
  }
5241
5242
  /**
5243
   * uppercase for all words in the string
5244
   *
5245
   * @param  string $string
5246
   * @param array   $exceptions
5247
   *
5248
   * @return string
5249
   */
5250 7
  public static function ucwords($string, $exceptions = array())
5251
  {
5252 7
    if (!$string) {
5253 2
      return '';
5254
    }
5255
5256
    // init
5257 6
    $words = explode(' ', $string);
5258 6
    $newwords = array();
5259
5260 6
    if (count($exceptions) > 0) {
5261 1
      $useExceptions = true;
5262
    } else {
5263 6
      $useExceptions = false;
5264
    }
5265
5266 6
    foreach ($words as $word) {
5267
      if (
5268 6
          ($useExceptions === false)
5269
          ||
5270
          (
5271 1
              $useExceptions === true
5272
              &&
5273 6
              !in_array($word, $exceptions, true)
5274
          )
5275
      ) {
5276 6
        $word = self::ucfirst($word);
5277
      }
5278 6
      $newwords[] = $word;
5279
    }
5280
5281 6
    return self::ucfirst(implode(' ', $newwords));
5282
  }
5283
5284
  /**
5285
   * Format a number with grouped thousands
5286
   *
5287
   * @param float  $number
5288
   * @param int    $decimals
5289
   * @param string $dec_point
5290
   * @param string $thousands_sep
5291
   *
5292
   * @return string
5293
   */
5294 1
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
5295
  {
5296 1
    if (Bootup::is_php('5.4') === true) {
5297 1
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
5298
        return str_replace(
5299
            array(
5300
                '.',
5301
                ',',
5302
            ),
5303
            array(
5304
                $dec_point,
5305
                $thousands_sep,
5306
            ),
5307
            number_format($number, $decimals, '.', ',')
5308
        );
5309
      }
5310
    }
5311
5312 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
5313
  }
5314
5315
  /**
5316
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
5317
   *
5318
   * (PHP 4, PHP 5)<br/>
5319
   * Replace all occurrences of the search string with the replacement string
5320
   *
5321
   * @link http://php.net/manual/en/function.str-replace.php
5322
   *
5323
   * @param mixed $search  <p>
5324
   *                       The value being searched for, otherwise known as the needle.
5325
   *                       An array may be used to designate multiple needles.
5326
   *                       </p>
5327
   * @param mixed $replace <p>
5328
   *                       The replacement value that replaces found search
5329
   *                       values. An array may be used to designate multiple replacements.
5330
   *                       </p>
5331
   * @param mixed $subject <p>
5332
   *                       The string or array being searched and replaced on,
5333
   *                       otherwise known as the haystack.
5334
   *                       </p>
5335
   *                       <p>
5336
   *                       If subject is an array, then the search and
5337
   *                       replace is performed with every entry of
5338
   *                       subject, and the return value is an array as
5339
   *                       well.
5340
   *                       </p>
5341
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5342
   *
5343
   * @return mixed This function returns a string or an array with the replaced values.
5344
   */
5345 12
  public static function str_replace($search, $replace, $subject, &$count = null)
5346
  {
5347 12
    return str_replace($search, $replace, $subject, $count);
5348
  }
5349
5350
  /**
5351
   * str_ireplace
5352
   *
5353
   * @param string $search
5354
   * @param string $replace
5355
   * @param string $subject
5356
   * @param null   $count
5357
   *
5358
   * @return string
5359
   */
5360 13
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5361
  {
5362 13
    $search = (array)$search;
5363
5364
    /** @noinspection AlterInForeachInspection */
5365 13
    foreach ($search as &$s) {
5366 13
      if ('' === $s .= '') {
5367 1
        $s = '/^(?<=.)$/';
5368
      } else {
5369 13
        $s = '/' . preg_quote($s, '/') . '/ui';
5370
      }
5371
    }
5372
5373 13
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
5374 13
    $count = $replace;
5375
5376 13
    return $subject;
5377
  }
5378
5379
  /**
5380
   * makes string's first char Lowercase
5381
   *
5382
   * @param    string $str The input string
5383
   *
5384
   * @return   string The resulting string
5385
   */
5386 6
  public static function lcfirst($str)
5387
  {
5388 6
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
5389
  }
5390
5391
  /**
5392
   * find position of last occurrence of a case-insensitive string
5393
   *
5394
   * @param    string $haystack The string to look in
5395
   * @param    string $needle   The string to look for
5396
   * @param    int    $offset   (Optional) Number of characters to ignore in the begining or end
5397
   *
5398
   * @return   int The position of offset
5399
   */
5400 1
  public static function strripos($haystack, $needle, $offset = 0)
5401
  {
5402 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5403
  }
5404
5405
  /**
5406
   * Find position of last occurrence of a string in a string
5407
   *
5408
   * @link http://php.net/manual/en/function.mb-strrpos.php
5409
   *
5410
   * @param string  $haystack     <p>
5411
   *                              The string being checked, for the last occurrence
5412
   *                              of needle
5413
   *                              </p>
5414
   * @param string  $needle       <p>
5415
   *                              The string to find in haystack.
5416
   *                              </p>
5417
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
5418
   *                              the string. Negative values will stop searching at an arbitrary point
5419
   *                              prior to the end of the string.
5420
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
5421
   *
5422
   * @return int the numeric position of
5423
   * the last occurrence of needle in the
5424
   * haystack string. If
5425
   * needle is not found, it returns false.
5426
   */
5427 10
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5428
  {
5429 10
    $haystack = (string)$haystack;
5430 10
    $needle = (string)$needle;
5431
5432 10
    if (!isset($haystack[0]) || !isset($needle[0])) {
5433 2
      return false;
5434
    }
5435
5436
    // init
5437 9
    self::checkForSupport();
5438
5439 9
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5440
      $needle = self::chr($needle);
5441
    }
5442
5443 9
    $needle = (string)$needle;
5444 9
    $offset = (int)$offset;
5445
5446 9
    if ($cleanUtf8 === true) {
5447
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5448
5449 1
      $needle = self::clean($needle);
5450 1
      $haystack = self::clean($haystack);
5451
    }
5452
5453 9
    if (self::$support['mbstring'] === true) {
5454 9
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5455
    }
5456
5457
    if (self::$support['iconv'] === true) {
5458
      return grapheme_strrpos($haystack, $needle, $offset);
5459
    }
5460
5461
    // fallback
5462
5463
    if ($offset > 0) {
5464
      $haystack = self::substr($haystack, $offset);
5465
    } elseif ($offset < 0) {
5466
      $haystack = self::substr($haystack, 0, $offset);
5467
    }
5468
5469 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5470
      $left = substr($haystack, 0, $pos);
5471
5472
      // negative offset not supported in PHP strpos(), ignoring
5473
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5474
    }
5475
5476
    return false;
5477
  }
5478
5479
  /**
5480
   * splits a string into smaller chunks and multiple lines, using the specified
5481
   * line ending character
5482
   *
5483
   * @param    string $body     The original string to be split.
5484
   * @param    int    $chunklen The maximum character length of a chunk
5485
   * @param    string $end      The character(s) to be inserted at the end of each chunk
5486
   *
5487
   * @return   string The chunked string
5488
   */
5489 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
5490
  {
5491 1
    return implode($end, self::split($body, $chunklen));
5492
  }
5493
5494
  /**
5495
   * convert to ISO-8859
5496
   *
5497
   * -> alias for "UTF8::to_win1252()"
5498
   *
5499
   * @param   string $text
5500
   *
5501
   * @return  array|string
5502
   */
5503
  public static function to_iso8859($text)
5504
  {
5505
    return self::to_win1252($text);
5506
  }
5507
5508
  /**
5509
   * fix -> utf8-win1252 chars
5510
   *
5511
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
5512
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
5513
   * See: http://en.wikipedia.org/wiki/Windows-1252
5514
   *
5515
   * @deprecated use "UTF8::fix_simple_utf8()"
5516
   *
5517
   * @param   string $string
5518
   *
5519
   * @return  string
5520
   */
5521
  public static function utf8_fix_win1252_chars($string)
5522
  {
5523
    return self::fix_simple_utf8($string);
5524
  }
5525
5526
  /**
5527
   * returns an array of Unicode White Space characters
5528
   *
5529
   * @return   array An array with numeric code point as key and White Space Character as value
5530
   */
5531
  public static function ws()
5532
  {
5533
    return self::$whitespace;
5534
  }
5535
5536
  /**
5537
   * Parses the string into variables
5538
   *
5539
   * WARNING: This differs from parse_str() by returning the results
5540
   *    instead of placing them in the local scope!
5541
   *
5542
   * @link http://php.net/manual/en/function.parse-str.php
5543
   *
5544
   * @param string $str     <p>
5545
   *                        The input string.
5546
   *                        </p>
5547
   * @param array  $result  <p>
5548
   *                        If the second parameter arr is present,
5549
   *                        variables are stored in this variable as array elements instead.
5550
   *                        </p>
5551
   *
5552
   * @return void
5553
   */
5554 1
  public static function parse_str($str, &$result)
5555
  {
5556
    // init
5557 1
    self::checkForSupport();
5558
5559 1
    $str = self::filter($str);
5560
5561 1
    mb_parse_str($str, $result);
5562 1
  }
5563
5564
  /**
5565
   * Get character of a specific character.
5566
   *
5567
   * @param   string $chr Character.
5568
   *
5569
   * @return  string 'RTL' or 'LTR'
5570
   */
5571 1
  public static function getCharDirection($chr)
5572
  {
5573 1
    $c = static::chr_to_decimal($chr);
5574
5575 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
5576 1
      return 'LTR';
5577
    }
5578
5579 1
    if (0x85e >= $c) {
5580
5581 1
      if (0x5be === $c ||
5582 1
          0x5c0 === $c ||
5583 1
          0x5c3 === $c ||
5584 1
          0x5c6 === $c ||
5585 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
5586 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
5587 1
          0x608 === $c ||
5588 1
          0x60b === $c ||
5589 1
          0x60d === $c ||
5590 1
          0x61b === $c ||
5591 1
          (0x61e <= $c && 0x64a >= $c) ||
5592
          (0x66d <= $c && 0x66f >= $c) ||
5593
          (0x671 <= $c && 0x6d5 >= $c) ||
5594
          (0x6e5 <= $c && 0x6e6 >= $c) ||
5595
          (0x6ee <= $c && 0x6ef >= $c) ||
5596
          (0x6fa <= $c && 0x70d >= $c) ||
5597
          0x710 === $c ||
5598
          (0x712 <= $c && 0x72f >= $c) ||
5599
          (0x74d <= $c && 0x7a5 >= $c) ||
5600
          0x7b1 === $c ||
5601
          (0x7c0 <= $c && 0x7ea >= $c) ||
5602
          (0x7f4 <= $c && 0x7f5 >= $c) ||
5603
          0x7fa === $c ||
5604
          (0x800 <= $c && 0x815 >= $c) ||
5605
          0x81a === $c ||
5606
          0x824 === $c ||
5607
          0x828 === $c ||
5608
          (0x830 <= $c && 0x83e >= $c) ||
5609
          (0x840 <= $c && 0x858 >= $c) ||
5610 1
          0x85e === $c
5611
      ) {
5612 1
        return 'RTL';
5613
      }
5614
5615
    } elseif (0x200f === $c) {
5616
5617
      return 'RTL';
5618
5619
    } elseif (0xfb1d <= $c) {
5620
5621
      if (0xfb1d === $c ||
5622
          (0xfb1f <= $c && 0xfb28 >= $c) ||
5623
          (0xfb2a <= $c && 0xfb36 >= $c) ||
5624
          (0xfb38 <= $c && 0xfb3c >= $c) ||
5625
          0xfb3e === $c ||
5626
          (0xfb40 <= $c && 0xfb41 >= $c) ||
5627
          (0xfb43 <= $c && 0xfb44 >= $c) ||
5628
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
5629
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
5630
          (0xfd50 <= $c && 0xfd8f >= $c) ||
5631
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
5632
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
5633
          (0xfe70 <= $c && 0xfe74 >= $c) ||
5634
          (0xfe76 <= $c && 0xfefc >= $c) ||
5635
          (0x10800 <= $c && 0x10805 >= $c) ||
5636
          0x10808 === $c ||
5637
          (0x1080a <= $c && 0x10835 >= $c) ||
5638
          (0x10837 <= $c && 0x10838 >= $c) ||
5639
          0x1083c === $c ||
5640
          (0x1083f <= $c && 0x10855 >= $c) ||
5641
          (0x10857 <= $c && 0x1085f >= $c) ||
5642
          (0x10900 <= $c && 0x1091b >= $c) ||
5643
          (0x10920 <= $c && 0x10939 >= $c) ||
5644
          0x1093f === $c ||
5645
          0x10a00 === $c ||
5646
          (0x10a10 <= $c && 0x10a13 >= $c) ||
5647
          (0x10a15 <= $c && 0x10a17 >= $c) ||
5648
          (0x10a19 <= $c && 0x10a33 >= $c) ||
5649
          (0x10a40 <= $c && 0x10a47 >= $c) ||
5650
          (0x10a50 <= $c && 0x10a58 >= $c) ||
5651
          (0x10a60 <= $c && 0x10a7f >= $c) ||
5652
          (0x10b00 <= $c && 0x10b35 >= $c) ||
5653
          (0x10b40 <= $c && 0x10b55 >= $c) ||
5654
          (0x10b58 <= $c && 0x10b72 >= $c) ||
5655
          (0x10b78 <= $c && 0x10b7f >= $c)
5656
      ) {
5657
        return 'RTL';
5658
      }
5659
    }
5660
5661
    return 'LTR';
5662
  }
5663
5664
  /**
5665
   * Get a decimal code representation of a specific character.
5666
   *
5667
   * @param   string $chr The input character
5668
   *
5669
   * @return  int
5670
   */
5671 2
  public static function chr_to_decimal($chr)
5672
  {
5673 2
    $chr = (string)$chr;
5674 2
    $code = self::ord($chr[0]);
5675 2
    $bytes = 1;
5676
5677 2
    if (!($code & 0x80)) {
5678
      // 0xxxxxxx
5679 2
      return $code;
5680
    }
5681
5682 2
    if (($code & 0xe0) === 0xc0) {
5683
      // 110xxxxx
5684 2
      $bytes = 2;
5685 2
      $code &= ~0xc0;
5686 1
    } elseif (($code & 0xf0) == 0xe0) {
5687
      // 1110xxxx
5688 1
      $bytes = 3;
5689 1
      $code &= ~0xe0;
5690
    } elseif (($code & 0xf8) === 0xf0) {
5691
      // 11110xxx
5692
      $bytes = 4;
5693
      $code &= ~0xf0;
5694
    }
5695
5696 2
    for ($i = 2; $i <= $bytes; $i++) {
5697
      // 10xxxxxx
5698 2
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
5699
    }
5700
5701 2
    return $code;
5702
  }
5703
5704
  /**
5705
   * Get a UTF-8 character from its decimal code representation.
5706
   *
5707
   * @param   int $code Code.
5708
   *
5709
   * @return  string
5710
   */
5711 1
  public static function decimal_to_chr($code)
5712
  {
5713 1
    self::checkForSupport();
5714
5715 1
    return mb_convert_encoding(
5716 1
        '&#x' . dechex($code) . ';',
5717 1
        'UTF-8',
5718 1
        'HTML-ENTITIES'
5719
    );
5720
  }
5721
5722
  /**
5723
   * return a array with "urlencoded"-win1252 -> UTF-8
5724
   *
5725
   * @return mixed
5726
   */
5727
  protected static function urldecode_fix_win1252_chars()
5728
  {
5729
    static $array = array(
5730
        '%20' => ' ',
5731
        '%21' => '!',
5732
        '%22' => '"',
5733
        '%23' => '#',
5734
        '%24' => '$',
5735
        '%25' => '%',
5736
        '%26' => '&',
5737
        '%27' => "'",
5738
        '%28' => '(',
5739
        '%29' => ')',
5740
        '%2A' => '*',
5741
        '%2B' => '+',
5742
        '%2C' => ',',
5743
        '%2D' => '-',
5744
        '%2E' => '.',
5745
        '%2F' => '/',
5746
        '%30' => '0',
5747
        '%31' => '1',
5748
        '%32' => '2',
5749
        '%33' => '3',
5750
        '%34' => '4',
5751
        '%35' => '5',
5752
        '%36' => '6',
5753
        '%37' => '7',
5754
        '%38' => '8',
5755
        '%39' => '9',
5756
        '%3A' => ':',
5757
        '%3B' => ';',
5758
        '%3C' => '<',
5759
        '%3D' => '=',
5760
        '%3E' => '>',
5761
        '%3F' => '?',
5762
        '%40' => '@',
5763
        '%41' => 'A',
5764
        '%42' => 'B',
5765
        '%43' => 'C',
5766
        '%44' => 'D',
5767
        '%45' => 'E',
5768
        '%46' => 'F',
5769
        '%47' => 'G',
5770
        '%48' => 'H',
5771
        '%49' => 'I',
5772
        '%4A' => 'J',
5773
        '%4B' => 'K',
5774
        '%4C' => 'L',
5775
        '%4D' => 'M',
5776
        '%4E' => 'N',
5777
        '%4F' => 'O',
5778
        '%50' => 'P',
5779
        '%51' => 'Q',
5780
        '%52' => 'R',
5781
        '%53' => 'S',
5782
        '%54' => 'T',
5783
        '%55' => 'U',
5784
        '%56' => 'V',
5785
        '%57' => 'W',
5786
        '%58' => 'X',
5787
        '%59' => 'Y',
5788
        '%5A' => 'Z',
5789
        '%5B' => '[',
5790
        '%5C' => '\\',
5791
        '%5D' => ']',
5792
        '%5E' => '^',
5793
        '%5F' => '_',
5794
        '%60' => '`',
5795
        '%61' => 'a',
5796
        '%62' => 'b',
5797
        '%63' => 'c',
5798
        '%64' => 'd',
5799
        '%65' => 'e',
5800
        '%66' => 'f',
5801
        '%67' => 'g',
5802
        '%68' => 'h',
5803
        '%69' => 'i',
5804
        '%6A' => 'j',
5805
        '%6B' => 'k',
5806
        '%6C' => 'l',
5807
        '%6D' => 'm',
5808
        '%6E' => 'n',
5809
        '%6F' => 'o',
5810
        '%70' => 'p',
5811
        '%71' => 'q',
5812
        '%72' => 'r',
5813
        '%73' => 's',
5814
        '%74' => 't',
5815
        '%75' => 'u',
5816
        '%76' => 'v',
5817
        '%77' => 'w',
5818
        '%78' => 'x',
5819
        '%79' => 'y',
5820
        '%7A' => 'z',
5821
        '%7B' => '{',
5822
        '%7C' => '|',
5823
        '%7D' => '}',
5824
        '%7E' => '~',
5825
        '%7F' => '',
5826
        '%80' => '`',
5827
        '%81' => '',
5828
        '%82' => '‚',
5829
        '%83' => 'ƒ',
5830
        '%84' => '„',
5831
        '%85' => '…',
5832
        '%86' => '†',
5833
        '%87' => '‡',
5834
        '%88' => 'ˆ',
5835
        '%89' => '‰',
5836
        '%8A' => 'Š',
5837
        '%8B' => '‹',
5838
        '%8C' => 'Œ',
5839
        '%8D' => '',
5840
        '%8E' => 'Ž',
5841
        '%8F' => '',
5842
        '%90' => '',
5843
        '%91' => '‘',
5844
        '%92' => '’',
5845
        '%93' => '“',
5846
        '%94' => '”',
5847
        '%95' => '•',
5848
        '%96' => '–',
5849
        '%97' => '—',
5850
        '%98' => '˜',
5851
        '%99' => '™',
5852
        '%9A' => 'š',
5853
        '%9B' => '›',
5854
        '%9C' => 'œ',
5855
        '%9D' => '',
5856
        '%9E' => 'ž',
5857
        '%9F' => 'Ÿ',
5858
        '%A0' => '',
5859
        '%A1' => '¡',
5860
        '%A2' => '¢',
5861
        '%A3' => '£',
5862
        '%A4' => '¤',
5863
        '%A5' => '¥',
5864
        '%A6' => '¦',
5865
        '%A7' => '§',
5866
        '%A8' => '¨',
5867
        '%A9' => '©',
5868
        '%AA' => 'ª',
5869
        '%AB' => '«',
5870
        '%AC' => '¬',
5871
        '%AD' => '',
5872
        '%AE' => '®',
5873
        '%AF' => '¯',
5874
        '%B0' => '°',
5875
        '%B1' => '±',
5876
        '%B2' => '²',
5877
        '%B3' => '³',
5878
        '%B4' => '´',
5879
        '%B5' => 'µ',
5880
        '%B6' => '¶',
5881
        '%B7' => '·',
5882
        '%B8' => '¸',
5883
        '%B9' => '¹',
5884
        '%BA' => 'º',
5885
        '%BB' => '»',
5886
        '%BC' => '¼',
5887
        '%BD' => '½',
5888
        '%BE' => '¾',
5889
        '%BF' => '¿',
5890
        '%C0' => 'À',
5891
        '%C1' => 'Á',
5892
        '%C2' => 'Â',
5893
        '%C3' => 'Ã',
5894
        '%C4' => 'Ä',
5895
        '%C5' => 'Å',
5896
        '%C6' => 'Æ',
5897
        '%C7' => 'Ç',
5898
        '%C8' => 'È',
5899
        '%C9' => 'É',
5900
        '%CA' => 'Ê',
5901
        '%CB' => 'Ë',
5902
        '%CC' => 'Ì',
5903
        '%CD' => 'Í',
5904
        '%CE' => 'Î',
5905
        '%CF' => 'Ï',
5906
        '%D0' => 'Ð',
5907
        '%D1' => 'Ñ',
5908
        '%D2' => 'Ò',
5909
        '%D3' => 'Ó',
5910
        '%D4' => 'Ô',
5911
        '%D5' => 'Õ',
5912
        '%D6' => 'Ö',
5913
        '%D7' => '×',
5914
        '%D8' => 'Ø',
5915
        '%D9' => 'Ù',
5916
        '%DA' => 'Ú',
5917
        '%DB' => 'Û',
5918
        '%DC' => 'Ü',
5919
        '%DD' => 'Ý',
5920
        '%DE' => 'Þ',
5921
        '%DF' => 'ß',
5922
        '%E0' => 'à',
5923
        '%E1' => 'á',
5924
        '%E2' => 'â',
5925
        '%E3' => 'ã',
5926
        '%E4' => 'ä',
5927
        '%E5' => 'å',
5928
        '%E6' => 'æ',
5929
        '%E7' => 'ç',
5930
        '%E8' => 'è',
5931
        '%E9' => 'é',
5932
        '%EA' => 'ê',
5933
        '%EB' => 'ë',
5934
        '%EC' => 'ì',
5935
        '%ED' => 'í',
5936
        '%EE' => 'î',
5937
        '%EF' => 'ï',
5938
        '%F0' => 'ð',
5939
        '%F1' => 'ñ',
5940
        '%F2' => 'ò',
5941
        '%F3' => 'ó',
5942
        '%F4' => 'ô',
5943
        '%F5' => 'õ',
5944
        '%F6' => 'ö',
5945
        '%F7' => '÷',
5946
        '%F8' => 'ø',
5947
        '%F9' => 'ù',
5948
        '%FA' => 'ú',
5949
        '%FB' => 'û',
5950
        '%FC' => 'ü',
5951
        '%FD' => 'ý',
5952
        '%FE' => 'þ',
5953
        '%FF' => 'ÿ',
5954
    );
5955
5956
    return $array;
5957
  }
5958
5959
}
5960