Completed
Push — master ( 37c3d7...e42a7d )
by Lars
03:50
created

UTF8::normalizeEncoding()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 24
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2.0185

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 24
ccs 5
cts 6
cp 0.8333
rs 8.9713
cc 2
eloc 16
nc 2
nop 1
crap 2.0185
1
<?php
2
3
namespace voku\helper;
4
5
use Patchwork\PHP\Shim\Intl;
6
use Patchwork\PHP\Shim\Normalizer;
7
use Patchwork\PHP\Shim\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * @var array
86
   */
87
  protected static $whitespace = array(
88
      0     => "\x0",
89
      //NUL Byte
90
      9     => "\x9",
91
      //Tab
92
      10    => "\xa",
93
      //New Line
94
      11    => "\xb",
95
      //Vertical Tab
96
      13    => "\xd",
97
      //Carriage Return
98
      32    => "\x20",
99
      //Ordinary Space
100
      160   => "\xc2\xa0",
101
      //NO-BREAK SPACE
102
      5760  => "\xe1\x9a\x80",
103
      //OGHAM SPACE MARK
104
      6158  => "\xe1\xa0\x8e",
105
      //MONGOLIAN VOWEL SEPARATOR
106
      8192  => "\xe2\x80\x80",
107
      //EN QUAD
108
      8193  => "\xe2\x80\x81",
109
      //EM QUAD
110
      8194  => "\xe2\x80\x82",
111
      //EN SPACE
112
      8195  => "\xe2\x80\x83",
113
      //EM SPACE
114
      8196  => "\xe2\x80\x84",
115
      //THREE-PER-EM SPACE
116
      8197  => "\xe2\x80\x85",
117
      //FOUR-PER-EM SPACE
118
      8198  => "\xe2\x80\x86",
119
      //SIX-PER-EM SPACE
120
      8199  => "\xe2\x80\x87",
121
      //FIGURE SPACE
122
      8200  => "\xe2\x80\x88",
123
      //PUNCTUATION SPACE
124
      8201  => "\xe2\x80\x89",
125
      //THIN SPACE
126
      8202  => "\xe2\x80\x8a",
127
      //HAIR SPACE
128
      8232  => "\xe2\x80\xa8",
129
      //LINE SEPARATOR
130
      8233  => "\xe2\x80\xa9",
131
      //PARAGRAPH SEPARATOR
132
      8239  => "\xe2\x80\xaf",
133
      //NARROW NO-BREAK SPACE
134
      8287  => "\xe2\x81\x9f",
135
      //MEDIUM MATHEMATICAL SPACE
136
      12288 => "\xe3\x80\x80"
137
      //IDEOGRAPHIC SPACE
138
  );
139
140
  /**
141
   * @var array
142
   */
143
  protected static $whitespaceTable = array(
144
      'SPACE'                     => "\x20",
145
      'NO-BREAK SPACE'            => "\xc2\xa0",
146
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
147
      'EN QUAD'                   => "\xe2\x80\x80",
148
      'EM QUAD'                   => "\xe2\x80\x81",
149
      'EN SPACE'                  => "\xe2\x80\x82",
150
      'EM SPACE'                  => "\xe2\x80\x83",
151
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
152
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
153
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
154
      'FIGURE SPACE'              => "\xe2\x80\x87",
155
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
156
      'THIN SPACE'                => "\xe2\x80\x89",
157
      'HAIR SPACE'                => "\xe2\x80\x8a",
158
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
159
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
160
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
161
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  protected static $commonCaseFold = array(
168
      'ſ'            => 's',
169
      "\xCD\x85"     => 'ι',
170
      'ς'            => 'σ',
171
      "\xCF\x90"     => 'β',
172
      "\xCF\x91"     => 'θ',
173
      "\xCF\x95"     => 'φ',
174
      "\xCF\x96"     => 'π',
175
      "\xCF\xB0"     => 'κ',
176
      "\xCF\xB1"     => 'ρ',
177
      "\xCF\xB5"     => 'ε',
178
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
179
      "\xE1\xBE\xBE" => 'ι',
180
  );
181
182
  /**
183
   * @var array
184
   */
185
  protected static $brokenUtf8ToUtf8 = array(
186
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
187
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
188
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
189
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
190
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
191
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
192
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
193
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
194
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
195
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
196
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
197
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
198
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
199
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
200
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
201
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
202
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
203
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
204
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
205
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
206
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
207
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
208
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
209
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
210
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
211
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
212
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
213
      'ü'       => 'ü',
214
      'ä'       => 'ä',
215
      'ö'       => 'ö',
216
      'Ö'       => 'Ö',
217
      'ß'       => 'ß',
218
      'Ã '       => 'à',
219
      'á'       => 'á',
220
      'â'       => 'â',
221
      'ã'       => 'ã',
222
      'ù'       => 'ù',
223
      'ú'       => 'ú',
224
      'û'       => 'û',
225
      'Ù'       => 'Ù',
226
      'Ú'       => 'Ú',
227
      'Û'       => 'Û',
228
      'Ü'       => 'Ü',
229
      'ò'       => 'ò',
230
      'ó'       => 'ó',
231
      'ô'       => 'ô',
232
      'è'       => 'è',
233
      'é'       => 'é',
234
      'ê'       => 'ê',
235
      'ë'       => 'ë',
236
      'À'       => 'À',
237
      'Á'       => 'Á',
238
      'Â'       => 'Â',
239
      'Ã'       => 'Ã',
240
      'Ä'       => 'Ä',
241
      'Ã…'       => 'Å',
242
      'Ç'       => 'Ç',
243
      'È'       => 'È',
244
      'É'       => 'É',
245
      'Ê'       => 'Ê',
246
      'Ë'       => 'Ë',
247
      'ÃŒ'       => 'Ì',
248
      'Í'       => 'Í',
249
      'ÃŽ'       => 'Î',
250
      'Ï'       => 'Ï',
251
      'Ñ'       => 'Ñ',
252
      'Ã’'       => 'Ò',
253
      'Ó'       => 'Ó',
254
      'Ô'       => 'Ô',
255
      'Õ'       => 'Õ',
256
      'Ø'       => 'Ø',
257
      'Ã¥'       => 'å',
258
      'æ'       => 'æ',
259
      'ç'       => 'ç',
260
      'ì'       => 'ì',
261
      'í'       => 'í',
262
      'î'       => 'î',
263
      'ï'       => 'ï',
264
      'ð'       => 'ð',
265
      'ñ'       => 'ñ',
266
      'õ'       => 'õ',
267
      'ø'       => 'ø',
268
      'ý'       => 'ý',
269
      'ÿ'       => 'ÿ',
270
      '€'      => '€',
271
  );
272
273
  /**
274
   * @var array
275
   */
276
  protected static $utf8ToWin1252 = array(
277
      "\xe2\x82\xac" => "\x80", // EURO SIGN
278
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
279
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
280
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
281
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
282
      "\xe2\x80\xa0" => "\x86", // DAGGER
283
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
284
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
285
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
286
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
287
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
288
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
289
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
290
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
291
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
292
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
293
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
294
      "\xe2\x80\xa2" => "\x95", // BULLET
295
      "\xe2\x80\x93" => "\x96", // EN DASH
296
      "\xe2\x80\x94" => "\x97", // EM DASH
297
      "\xcb\x9c"     => "\x98", // SMALL TILDE
298
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
299
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
300
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
301
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
302
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
303
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
304
  );
305
306
  /**
307
   * @var array
308
   */
309
  protected static $utf8MSWord = array(
310
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
311
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
312
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
313
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
314
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
315
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
316
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
317
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
318
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
319
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
320
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
321
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
322
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
323
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
324
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
325
  );
326
327
  /**
328
   * @var array
329
   */
330
  private static $support = array();
331
332
  /**
333
   * __construct()
334
   */
335 1
  public function __construct()
336
  {
337 1
    self::checkForSupport();
338 1
  }
339
340
  /**
341
   * check for UTF8-Support
342
   */
343 151
  public static function checkForSupport()
344
  {
345 151
    if (!isset(self::$support['mbstring'])) {
346
347 1
      self::$support['mbstring'] = self::mbstring_loaded();
348 1
      self::$support['iconv'] = self::iconv_loaded();
349 1
      self::$support['intl'] = self::intl_loaded();
350 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
351
352 1
      Bootup::initAll(); // Enables the portablity layer and configures PHP for UTF-8
353 1
      Bootup::filterRequestUri(); // Redirects to an UTF-8 encoded URL if it's not already the case
354 1
      Bootup::filterRequestInputs(); // Normalizes HTTP inputs to UTF-8 NFC
355 1
    }
356 151
  }
357
358
  /**
359
   * checks whether mbstring is available on the server
360
   *
361
   * @return   bool True if available, False otherwise
362
   */
363 2
  public static function mbstring_loaded()
364
  {
365 2
    $return = extension_loaded('mbstring');
366
367 2
    if ($return === true) {
368 2
      mb_internal_encoding('UTF-8');
369 2
    }
370
371 2
    return $return;
372
  }
373
374
  /**
375
   * checks whether iconv is available on the server
376
   *
377
   * @return   bool True if available, False otherwise
378
   */
379 1
  public static function iconv_loaded()
380
  {
381 1
    return extension_loaded('iconv') ? true : false;
382
  }
383
384
  /**
385
   * checks whether intl is available on the server
386
   *
387
   * @return   bool True if available, False otherwise
388
   */
389 1
  public static function intl_loaded()
390
  {
391 1
    return extension_loaded('intl') ? true : false;
392
  }
393
394
  /**
395
   * checks if \u modifier is available that enables Unicode support in PCRE.
396
   *
397
   * @return   bool True if support is available, false otherwise
398
   */
399 30
  public static function pcre_utf8_support()
400
  {
401
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
402 30
    return (bool)@preg_match('//u', '');
403
  }
404
405
  /**
406
   * alias for "UTF8::to_ascii()"
407
   *
408
   * @param string $s The input string e.g. a UTF-8 String
409
   * @param string $subst_chr
410
   *
411
   * @return string
412
   */
413 6
  public static function toAscii($s, $subst_chr = '?')
414
  {
415 6
    return self::to_ascii($s, $subst_chr);
416
  }
417
418
  /**
419
   * convert to ASCII
420
   *
421
   * @param string $s The input string e.g. a UTF-8 String
422
   * @param string $subst_chr
423
   *
424
   * @return string
425
   */
426 7
  public static function to_ascii($s, $subst_chr = '?')
427
  {
428 7
    static $translitExtra = null;
429
430 7
    $s = (string)$s;
431
432 7
    if (!isset($s[0])) {
433 2
      return '';
434
    }
435
436 6
    $s = self::clean($s);
437
438 6
    if (preg_match("/[\x80-\xFF]/", $s)) {
439 3
      $s = Normalizer::normalize($s, Normalizer::NFKC);
440
441 3
      $glibc = 'glibc' === ICONV_IMPL;
442
443 3
      preg_match_all('/./u', $s, $s);
444
445
      /** @noinspection AlterInForeachInspection */
446 3
      foreach ($s[0] as &$c) {
447
448 3
        if (!isset($c[1])) {
449 3
          continue;
450
        }
451
452 3
        if ($glibc) {
453 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
454 3
        } else {
455
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
456
457
          if ($t !== false && is_string($t)) {
458
            if (!isset($t[0])) {
459
              $t = '?';
460
            } elseif (isset($t[1])) {
461
              $t = ltrim($t, '\'`"^~');
462
            }
463
          }
464
        }
465
466 3
        if ('?' === $t) {
467
468 1
          if ($translitExtra === null) {
469 1
            $translitExtra = (array)self::getData('translit_extra');
470 1
          }
471
472 1
          if (isset($translitExtra[$c])) {
473 1
            $t = $translitExtra[$c];
474 1
          } else {
475 1
            $t = Normalizer::normalize($c, Normalizer::NFD);
476
477 1
            if ($t[0] < "\x80") {
478
              $t = $t[0];
479
            } else {
480 1
              $t = $subst_chr;
481
            }
482
          }
483 1
        }
484
485 3
        if ('?' === $t) {
486 1
          $t = self::str_transliterate($c, $subst_chr);
487 1
        }
488
489 3
        $c = $t;
490 3
      }
491
492 3
      $s = implode('', $s[0]);
493 3
    }
494
495 6
    return $s;
496
  }
497
498
  /**
499
   * accepts a string and removes all non-UTF-8 characters from it.
500
   *
501
   * @param string $str                     The string to be sanitized.
502
   * @param bool   $remove_bom
503
   * @param bool   $normalize_whitespace
504
   * @param bool   $normalize_msword        e.g.: "…" => "..."
505
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
506
   *
507
   * @return string Clean UTF-8 encoded string
508
   */
509 24
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
510
  {
511
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
512
    // caused connection reset problem on larger strings
513
514
    $regx = '/
515
       (
516
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
517
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
518
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
519
        |   [\xE1-\xEC][\x80-\xBF]{2}
520
        |   \xED[\x80-\x9F][\x80-\xBF]
521
        |   [\xEE-\xEF][\x80-\xBF]{2}
522
        ){1,50}                          # ...one or more times
523
       )
524
       | .                               # anything else
525 24
       /x';
526 24
    $str = preg_replace($regx, '$1', $str);
527
528 24
    $str = self::replace_diamond_question_mark($str, '');
529 24
    $str = self::remove_invisible_characters($str);
530
531 24
    if ($normalize_whitespace === true) {
532 7
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
533 7
    }
534
535 24
    if ($normalize_msword === true) {
536 1
      $str = self::normalize_msword($str);
537 1
    }
538
539 24
    if ($remove_bom === true) {
540 6
      $str = self::removeBOM($str);
541 6
    }
542
543 24
    return $str;
544
  }
545
546
  /**
547
   * replace diamond question mark (�)
548
   *
549
   * @param string $str
550
   * @param string $unknown
551
   *
552
   * @return string
553
   */
554 25
  public static function replace_diamond_question_mark($str, $unknown = '?')
555
  {
556 25
    return str_replace(
557
        array(
558 25
            "\xEF\xBF\xBD",
559 25
            '�',
560 25
        ),
561
        array(
562 25
            $unknown,
563 25
            $unknown,
564 25
        ),
565
        $str
566 25
    );
567
  }
568
569
  /**
570
   * Remove Invisible Characters
571
   *
572
   * This prevents sandwiching null characters
573
   * between ascii characters, like Java\0script.
574
   *
575
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
576
   *
577
   * @param  string $str
578
   * @param  bool   $url_encoded
579
   *
580
   * @return  string
581
   */
582 25
  public static function remove_invisible_characters($str, $url_encoded = true)
583
  {
584
    // init
585 25
    $non_displayables = array();
586
587
    // every control character except newline (dec 10),
588
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
589 25
    if ($url_encoded) {
590 25
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
591 25
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
592 25
    }
593
594 25
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
595
596
    do {
597 25
      $str = preg_replace($non_displayables, '', $str, -1, $count);
598 25
    } while ($count !== 0);
599
600 25
    return $str;
601
  }
602
603
  /**
604
   * Normalize the whitespace.
605
   *
606
   * @param string $str                  The string to be normalized.
607
   * @param bool   $keepNonBreakingSpace Set to true, to keep non-breaking-spaces.
608
   *
609
   * @return string
610
   */
611 8
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false)
612
  {
613 8
    static $whitespaces = array();
614
615 8
    if (!isset($whitespaces[$keepNonBreakingSpace])) {
616
617 2
      $whitespaces[$keepNonBreakingSpace] = self::$whitespaceTable;
618
619 2
      if ($keepNonBreakingSpace === true) {
620
        /** @noinspection OffsetOperationsInspection */
621 1
        unset($whitespaces[$keepNonBreakingSpace]['NO-BREAK SPACE']);
622 1
      }
623
624 2
      $whitespaces[$keepNonBreakingSpace] = array_values($whitespaces[$keepNonBreakingSpace]);
625 2
    }
626
627 8
    return str_replace($whitespaces[$keepNonBreakingSpace], ' ', $str);
628
  }
629
630
  /**
631
   * Returns an array with all utf8 whitespace characters.
632
   *
633
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
634
   *
635
   * @author: Derek E. [email protected]
636
   *
637
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
638
   *         as defined in above URL
639 1
   */
640
  public static function whitespace_table()
641 1
  {
642
    return self::$whitespaceTable;
643
  }
644
645
  /**
646
   * Normalize MS Word special characters.
647
   *
648
   * @param string $str The string to be normalized.
649
   *
650
   * @return string
651 2
   */
652
  public static function normalize_msword($str)
653 2
  {
654 2
    static $utf8MSWordKeys = null;
655
    static $utf8MSWordValues = null;
656 2
657 1
    if ($utf8MSWordKeys === null) {
658 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
659 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
660
    }
661 2
662
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
663
  }
664
665
  /**
666
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
667
   *
668
   * @param string $str
669
   *
670
   * @return string
671 7
   */
672
  public static function removeBOM($str = '')
673
  {
674
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
675 7
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
676 1
    /** @noinspection SubStrUsedAsStrPosInspection */
677 1 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
678
      $str = substr($str, 4);
679
    }
680 7
681 1
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
682 1
683
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
684
    /** @noinspection SubStrUsedAsStrPosInspection */
685 7 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
686 2
      $str = substr($str, 4);
687 2
    }
688
689
    // UTF-8
690 7
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
691 1
    /** @noinspection SubStrUsedAsStrPosInspection */
692 1 View Code Duplication
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
693
      $str = substr($str, 3);
694
    }
695 7
696 1
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
697 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
698
    /** @noinspection SubStrUsedAsStrPosInspection */
699 7 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
700
      $str = substr($str, 2);
701
    }
702
703
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
704
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
705
    /** @noinspection SubStrUsedAsStrPosInspection */
706 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
707
      $str = substr($str, 2);
708
    }
709 2
710
    return $str;
711 2
  }
712 2
713 2
  /**
714
   * get data ...
715
   *
716
   * @param string $file
717
   *
718
   * @return bool|string|array|int false on error
719
   */
720
  protected static function getData($file)
721
  {
722
    $file = __DIR__ . '/data/' . $file . '.ser';
723
    if (file_exists($file)) {
724
      return unserialize(file_get_contents($file));
725
    } else {
726
      return false;
727
    }
728
  }
729
730
  /**
731
   * US-ASCII transliterations of Unicode text.
732
   *
733
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
734
   * Warning: you should only pass this well formed UTF-8!
735
   * Be aware it works by making a copy of the input string which it appends transliterated
736 8
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
737
   * requiring up to the same amount again as the input string
738 8
   *
739
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
740 8
   *
741
   * @author <[email protected]>
742 8
   *
743 2
   * @param string $str     UTF-8 string to convert
744
   * @param string $unknown Character use if character unknown. (default is ?)
745
   *
746 7
   * @return string US-ASCII string
747
   */
748 7
  public static function str_transliterate($str, $unknown = '?')
749 7
  {
750 7
    static $UTF8_TO_ASCII;
751
752 7
    $str = (string)$str;
753
754 7
    if (!isset($str[0])) {
755 6
      return '';
756
    }
757
758 4
    $str = self::clean($str);
759
760
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
761 4
    $chars = $ar[0];
762 4
    foreach ($chars as &$c) {
763 4
764
      $ordC0 = ord($c[0]);
765 4
766 3
      if ($ordC0 >= 0 && $ordC0 <= 127) {
767
        continue;
768 3
      }
769 3
770 3
      $ordC1 = ord($c[1]);
771
772 3
      // ASCII - next please
773
      if ($ordC0 >= 192 && $ordC0 <= 223) {
774
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
775
      }
776
777
      if ($ordC0 >= 224) {
778
        $ordC2 = ord($c[2]);
779
780
        if ($ordC0 <= 239) {
781
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
782
        }
783
784
        if ($ordC0 >= 240) {
785
          $ordC3 = ord($c[3]);
786
787
          if ($ordC0 <= 247) {
788
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
789
          }
790
791
          if ($ordC0 >= 248) {
792
            $ordC4 = ord($c[4]);
793
794 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
795 3
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
796
            }
797 4
798
            if ($ordC0 >= 252) {
799
              $ordC5 = ord($c[5]);
800
801 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
802 4
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
803
              }
804
            }
805
          }
806
        }
807 4
      }
808 4
809 2
      if ($ordC0 >= 254 && $ordC0 <= 255) {
810 2
        $c = $unknown;
811
        continue;
812 2
      }
813 2
814
      if (!isset($ord)) {
815
        $c = $unknown;
816 2
        continue;
817
      }
818 4
819 4
      $bank = $ord >> 8;
820 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
821 4
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
822
        if (file_exists($bankfile)) {
823
          /** @noinspection PhpIncludeInspection */
824 7
          include $bankfile;
825
        } else {
826 7
          $UTF8_TO_ASCII[$bank] = array();
827
        }
828
      }
829
830
      $newchar = $ord & 255;
831
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
832
        $c = $UTF8_TO_ASCII[$bank][$newchar];
833
      } else {
834
        $c = $unknown;
835
      }
836
    }
837
838
    return implode('', $chars);
839
  }
840
841
  /**
842
   * Echo native UTF8-Support libs ... e.g. for debugging
843
   */
844
  public static function showSupport()
845
  {
846
    foreach (self::$support as $utf8Support) {
847
      echo $utf8Support . "\n<br>";
848
    }
849
  }
850
851
  /**
852
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
853
   *
854
   * @link http://php.net/manual/en/function.htmlentities.php
855
   *
856
   * @param string $string        <p>
857
   *                              The input string.
858
   *                              </p>
859
   * @param int    $flags         [optional] <p>
860
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
861
   *                              invalid code unit sequences and the used document type. The default is
862
   *                              ENT_COMPAT | ENT_HTML401.
863
   *                              <table>
864
   *                              Available <i>flags</i> constants
865
   *                              <tr valign="top">
866
   *                              <td>Constant Name</td>
867
   *                              <td>Description</td>
868
   *                              </tr>
869
   *                              <tr valign="top">
870
   *                              <td><b>ENT_COMPAT</b></td>
871
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
872
   *                              </tr>
873
   *                              <tr valign="top">
874
   *                              <td><b>ENT_QUOTES</b></td>
875
   *                              <td>Will convert both double and single quotes.</td>
876
   *                              </tr>
877
   *                              <tr valign="top">
878
   *                              <td><b>ENT_NOQUOTES</b></td>
879
   *                              <td>Will leave both double and single quotes unconverted.</td>
880
   *                              </tr>
881
   *                              <tr valign="top">
882
   *                              <td><b>ENT_IGNORE</b></td>
883
   *                              <td>
884
   *                              Silently discard invalid code unit sequences instead of returning
885
   *                              an empty string. Using this flag is discouraged as it
886
   *                              may have security implications.
887
   *                              </td>
888
   *                              </tr>
889
   *                              <tr valign="top">
890
   *                              <td><b>ENT_SUBSTITUTE</b></td>
891
   *                              <td>
892
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
893
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
894
   *                              </td>
895
   *                              </tr>
896
   *                              <tr valign="top">
897
   *                              <td><b>ENT_DISALLOWED</b></td>
898
   *                              <td>
899
   *                              Replace invalid code points for the given document type with a
900
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
901
   *                              (otherwise) instead of leaving them as is. This may be useful, for
902
   *                              instance, to ensure the well-formedness of XML documents with
903
   *                              embedded external content.
904
   *                              </td>
905
   *                              </tr>
906
   *                              <tr valign="top">
907
   *                              <td><b>ENT_HTML401</b></td>
908
   *                              <td>
909
   *                              Handle code as HTML 4.01.
910
   *                              </td>
911
   *                              </tr>
912
   *                              <tr valign="top">
913
   *                              <td><b>ENT_XML1</b></td>
914
   *                              <td>
915
   *                              Handle code as XML 1.
916
   *                              </td>
917
   *                              </tr>
918
   *                              <tr valign="top">
919
   *                              <td><b>ENT_XHTML</b></td>
920
   *                              <td>
921
   *                              Handle code as XHTML.
922
   *                              </td>
923
   *                              </tr>
924
   *                              <tr valign="top">
925
   *                              <td><b>ENT_HTML5</b></td>
926
   *                              <td>
927
   *                              Handle code as HTML 5.
928
   *                              </td>
929
   *                              </tr>
930
   *                              </table>
931
   *                              </p>
932
   * @param string $encoding      [optional] <p>
933
   *                              Like <b>htmlspecialchars</b>,
934
   *                              <b>htmlentities</b> takes an optional third argument
935
   *                              <i>encoding</i> which defines encoding used in
936
   *                              conversion.
937
   *                              Although this argument is technically optional, you are highly
938
   *                              encouraged to specify the correct value for your code.
939
   *                              </p>
940
   * @param bool   $double_encode [optional] <p>
941
   *                              When <i>double_encode</i> is turned off PHP will not
942
   *                              encode existing html entities. The default is to convert everything.
943
   *                              </p>
944 2
   *
945
   *
946 2
   * @return string the encoded string.
947
   * </p>
948
   * <p>
949
   * If the input <i>string</i> contains an invalid code unit
950
   * sequence within the given <i>encoding</i> an empty string
951
   * will be returned, unless either the <b>ENT_IGNORE</b> or
952
   * <b>ENT_SUBSTITUTE</b> flags are set.
953
   */
954
  public static function htmlentities($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
955
  {
956
    return htmlentities($string, $flags, $encoding, $double_encode);
957
  }
958
959
  /**
960
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
961
   *
962
   * @link http://php.net/manual/en/function.htmlspecialchars.php
963
   *
964
   * @param string $string        <p>
965
   *                              The string being converted.
966
   *                              </p>
967
   * @param int    $flags         [optional] <p>
968
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
969
   *                              invalid code unit sequences and the used document type. The default is
970
   *                              ENT_COMPAT | ENT_HTML401.
971
   *                              <table>
972
   *                              Available <i>flags</i> constants
973
   *                              <tr valign="top">
974
   *                              <td>Constant Name</td>
975
   *                              <td>Description</td>
976
   *                              </tr>
977
   *                              <tr valign="top">
978
   *                              <td><b>ENT_COMPAT</b></td>
979
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
980
   *                              </tr>
981
   *                              <tr valign="top">
982
   *                              <td><b>ENT_QUOTES</b></td>
983
   *                              <td>Will convert both double and single quotes.</td>
984
   *                              </tr>
985
   *                              <tr valign="top">
986
   *                              <td><b>ENT_NOQUOTES</b></td>
987
   *                              <td>Will leave both double and single quotes unconverted.</td>
988
   *                              </tr>
989
   *                              <tr valign="top">
990
   *                              <td><b>ENT_IGNORE</b></td>
991
   *                              <td>
992
   *                              Silently discard invalid code unit sequences instead of returning
993
   *                              an empty string. Using this flag is discouraged as it
994
   *                              may have security implications.
995
   *                              </td>
996
   *                              </tr>
997
   *                              <tr valign="top">
998
   *                              <td><b>ENT_SUBSTITUTE</b></td>
999
   *                              <td>
1000
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1001
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1002
   *                              </td>
1003
   *                              </tr>
1004
   *                              <tr valign="top">
1005
   *                              <td><b>ENT_DISALLOWED</b></td>
1006
   *                              <td>
1007
   *                              Replace invalid code points for the given document type with a
1008
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1009
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1010
   *                              instance, to ensure the well-formedness of XML documents with
1011
   *                              embedded external content.
1012
   *                              </td>
1013
   *                              </tr>
1014
   *                              <tr valign="top">
1015
   *                              <td><b>ENT_HTML401</b></td>
1016
   *                              <td>
1017
   *                              Handle code as HTML 4.01.
1018
   *                              </td>
1019
   *                              </tr>
1020
   *                              <tr valign="top">
1021
   *                              <td><b>ENT_XML1</b></td>
1022
   *                              <td>
1023
   *                              Handle code as XML 1.
1024
   *                              </td>
1025
   *                              </tr>
1026
   *                              <tr valign="top">
1027
   *                              <td><b>ENT_XHTML</b></td>
1028
   *                              <td>
1029
   *                              Handle code as XHTML.
1030
   *                              </td>
1031
   *                              </tr>
1032
   *                              <tr valign="top">
1033
   *                              <td><b>ENT_HTML5</b></td>
1034
   *                              <td>
1035
   *                              Handle code as HTML 5.
1036
   *                              </td>
1037
   *                              </tr>
1038
   *                              </table>
1039
   *                              </p>
1040
   * @param string $encoding      [optional] <p>
1041
   *                              Defines encoding used in conversion.
1042
   *                              </p>
1043
   *                              <p>
1044
   *                              For the purposes of this function, the encodings
1045
   *                              ISO-8859-1, ISO-8859-15,
1046
   *                              UTF-8, cp866,
1047
   *                              cp1251, cp1252, and
1048
   *                              KOI8-R are effectively equivalent, provided the
1049
   *                              <i>string</i> itself is valid for the encoding, as
1050
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1051
   *                              the same positions in all of these encodings.
1052
   *                              </p>
1053
   * @param bool   $double_encode [optional] <p>
1054
   *                              When <i>double_encode</i> is turned off PHP will not
1055
   *                              encode existing html entities, the default is to convert everything.
1056
   *                              </p>
1057
   *
1058 1
   * @return string The converted string.
1059
   * </p>
1060 1
   * <p>
1061
   * If the input <i>string</i> contains an invalid code unit
1062
   * sequence within the given <i>encoding</i> an empty string
1063
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1064
   * <b>ENT_SUBSTITUTE</b> flags are set.
1065
   */
1066
  public static function htmlspecialchars($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1067
  {
1068
    return htmlspecialchars($string, $flags, $encoding, $double_encode);
1069
  }
1070 16
1071
  /**
1072 16
   * alias for "UTF8::is_utf8"
1073
   *
1074
   * @param string $str
1075
   *
1076
   * @return bool
1077
   */
1078
  public static function isUtf8($str)
1079
  {
1080
    return self::is_utf8($str);
1081
  }
1082
1083
  /**
1084
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
1085
   *
1086
   * @see    http://hsivonen.iki.fi/php-utf8/
1087 31
   *
1088
   * @param    string $str The string to be checked.
1089 31
   *
1090
   * @return   bool
1091 31
   */
1092 3
  public static function is_utf8($str)
1093
  {
1094
    $str = (string)$str;
1095 29
1096
    if (!isset($str[0])) {
1097
      return true;
1098
    }
1099
1100
    if (self::pcre_utf8_support() !== true) {
1101
1102 29
      // If even just the first character can be matched, when the /u
1103
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
1104 29
      // invalid, nothing at all will match, even if the string contains
1105 29
      // some valid sequences
1106 29
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
1107 29
1108 29
    } else {
1109 29
1110
      $mState = 0; // cached expected number of octets after the current octet
1111
      // until the beginning of the next UTF8 character sequence
1112 29
      $mUcs4 = 0; // cached Unicode character
1113
      $mBytes = 1; // cached expected number of octets in the current sequence
1114 27
      $len = strlen($str);
1115 29
1116
      /** @noinspection ForeachInvariantsInspection */
1117 25
      for ($i = 0; $i < $len; $i++) {
1118 25
        $in = ord($str[$i]);
1119 25
        if ($mState == 0) {
1120 25
          // When mState is zero we expect either a US-ASCII character or a
1121 27
          // multi-octet sequence.
1122
          if (0 == (0x80 & $in)) {
1123 11
            // US-ASCII, pass straight through.
1124 11
            $mBytes = 1;
1125 11 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1126 11
            // First octet of 2 octet sequence.
1127 21
            $mUcs4 = $in;
1128
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
1129 5
            $mState = 1;
1130 5
            $mBytes = 2;
1131 5
          } elseif (0xE0 == (0xF0 & $in)) {
1132 5
            // First octet of 3 octet sequence.
1133 11
            $mUcs4 = $in;
1134
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
1135
            $mState = 2;
1136
            $mBytes = 3;
1137 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1138
            // First octet of 4 octet sequence.
1139
            $mUcs4 = $in;
1140
            $mUcs4 = ($mUcs4 & 0x07) << 18;
1141
            $mState = 3;
1142 3
            $mBytes = 4;
1143 3
          } elseif (0xF8 == (0xFC & $in)) {
1144 3
            /* First octet of 5 octet sequence.
1145 3
            *
1146 7
            * This is illegal because the encoded codepoint must be either
1147
            * (a) not the shortest form or
1148 3
            * (b) outside the Unicode range of 0-0x10FFFF.
1149 3
            * Rather than trying to resynchronize, we will carry on until the end
1150 3
            * of the sequence and let the later error handling code catch it.
1151 3
            */
1152 3
            $mUcs4 = $in;
1153
            $mUcs4 = ($mUcs4 & 0x03) << 24;
1154
            $mState = 4;
1155
            $mBytes = 5;
1156 3 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1157
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
1158 29
            $mUcs4 = $in;
1159
            $mUcs4 = ($mUcs4 & 1) << 30;
1160
            $mState = 5;
1161 27
            $mBytes = 6;
1162
          } else {
1163 25
            /* Current octet is neither in the US-ASCII range nor a legal first
1164 25
             * octet of a multi-octet sequence.
1165 25
             */
1166 25
            return false;
1167
          }
1168
        } else {
1169
          // When mState is non-zero, we expect a continuation of the multi-octet
1170
          // sequence
1171 25
          if (0x80 == (0xC0 & $in)) {
1172
            // Legal continuation.
1173
            $shift = ($mState - 1) * 6;
1174
            $tmp = $in;
1175
            $tmp = ($tmp & 0x0000003F) << $shift;
1176
            $mUcs4 |= $tmp;
1177 25
            /**
1178 25
             * End of the multi-octet sequence. mUcs4 now contains the final
1179 25
             * Unicode code point to be output
1180 25
             */
1181
            if (0 == --$mState) {
1182 25
              /*
1183
              * Check for illegal sequences and code points.
1184 25
              */
1185 25
              // From Unicode 3.1, non-shortest form is illegal
1186 5
              if (
1187
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
1188
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
1189 25
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
1190 25
                  (4 < $mBytes) ||
1191 25
                  // From Unicode 3.2, surrogate characters are illegal.
1192 25
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
1193 25
                  // Code points outside the Unicode range are illegal.
1194
                  ($mUcs4 > 0x10FFFF)
1195
              ) {
1196
                return false;
1197
              }
1198 13
              // initialize UTF8 cache
1199
              $mState = 0;
1200
              $mUcs4 = 0;
1201 29
              $mBytes = 1;
1202
            }
1203 11
          } else {
1204
            /**
1205
             *((0xC0 & (*in) != 0x80) && (mState != 0))
1206
             * Incomplete multi-octet sequence.
1207
             */
1208
            return false;
1209
          }
1210
        }
1211
      }
1212
1213
      return true;
1214
    }
1215
  }
1216
1217
  /**
1218 6
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
1219
   * mask.
1220 6
   *
1221
   * @param string $s
1222
   * @param string $mask
1223
   * @param int    $start
1224 6
   * @param int    $len
1225
   *
1226
   * @return int|null
1227
   */
1228
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
1229
  {
1230
    if ($start || 2147483647 != $len) {
1231
      $s = self::substr($s, $start, $len);
1232
    }
1233
1234
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
1235
  }
1236
1237
  /**
1238
   * Get part of a string.
1239
   *
1240
   * @link http://php.net/manual/en/function.mb-substr.php
1241
   *
1242
   * @param string  $str       <p>
1243
   *                           The string being checked.
1244
   *                           </p>
1245
   * @param int     $start     <p>
1246
   *                           The first position used in str.
1247 37
   *                           </p>
1248
   * @param int     $length    [optional] <p>
1249 37
   *                           The maximum length of the returned string.
1250
   *                           </p>
1251 37
   * @param string  $encoding
1252
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1253 37
   *
1254 9
   * @return string mb_substr returns the portion of
1255
   * str specified by the start and length parameters.
1256
   */
1257
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
1258 35
  {
1259
    static $bug62759;
1260 35
1261
    $str = (string)$str;
1262
1263
    if (!isset($str[0])) {
1264 1
      return '';
1265 1
    }
1266
1267 35
    // init
1268 21
    self::checkForSupport();
1269 21
1270 31
    if ($cleanUtf8 === true) {
1271
      // iconv and mbstring are not tolerant to invalid encoding
1272
      // further, their behaviour is inconsistent with that of PHP's substr
1273 35
1274
      $str = self::clean($str);
1275
    }
1276 35
1277 1
    if ($length === null) {
1278 1
      $length = (int)self::strlen($str);
1279
    } else {
1280 35
      $length = (int)$length;
1281
    }
1282
1283
    if (self::$support['mbstring'] === true) {
1284
1285
      // INFO: this is only a fallback for old versions
1286
      if ($encoding === true || $encoding === false) {
1287
        $encoding = 'UTF-8';
1288
      }
1289
1290
      return mb_substr($str, $start, $length, $encoding);
1291
    }
1292
1293
    if (self::$support['iconv'] === true) {
1294
1295
      if (!isset($bug62759)) {
1296
        $bug62759 = ('à' === grapheme_substr('éà', 1, -2));
1297
      }
1298
1299
      if ($bug62759) {
1300
        return (string)Intl::grapheme_substr_workaround62759($str, $start, $length);
1301
      } else {
1302
        return (string)grapheme_substr($str, $start, $length);
1303
      }
1304
    }
1305
1306
    // fallback
1307
1308
    // split to array, and remove invalid characters
1309
    $array = self::split($str);
1310
1311
    // extract relevant part, and join to make sting again
1312
    return implode(array_slice($array, $start, $length));
1313
  }
1314
1315
  /**
1316
   * Get the string length, not the byte-length!
1317
   *
1318
   * @link     http://php.net/manual/en/function.mb-strlen.php
1319 63
   *
1320
   * @param string  $string    The string being checked for length.
1321 63
   * @param string  $encoding  Set the charset for e.g. "mb_" function
1322
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1323 63
   *
1324 4
   * @return int the number of characters in
1325
   *           string str having character encoding
1326
   *           encoding. A multi-byte character is
1327
   *           counted as 1.
1328 62
   */
1329
  public static function strlen($string, $encoding = 'UTF-8', $cleanUtf8 = false)
1330
  {
1331 62
    $string = (string)$string;
1332
1333
    if (!isset($string[0])) {
1334
      return 0;
1335 62
    }
1336
1337
    // init
1338 62
    self::checkForSupport();
1339
1340
    // INFO: this is only a fallback for old versions
1341 62
    if ($encoding === true || $encoding === false) {
1342
      $encoding = 'UTF-8';
1343
    }
1344
1345
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
1346
      $str = self::clean($string);
1347
    } else {
1348
      $str = $string;
1349
    }
1350
1351
    return mb_strlen($str, $encoding);
1352
  }
1353 24
1354
  /**
1355 24
   * Convert a string to an array of Unicode characters.
1356
   *
1357 24
   * @param    string  $str       The string to split into array.
1358 5
   * @param    int     $length    Max character length of each array element.
1359
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
1360
   *
1361
   * @return   array An array containing chunks of the string.
1362 23
   */
1363 23
  public static function split($str, $length = 1, $cleanUtf8 = false)
1364 23
  {
1365
    $str = (string)$str;
1366 23
1367
    if (!isset($str[0])) {
1368 23
      return array();
1369
    }
1370
1371
    // init
1372 23
    self::checkForSupport();
1373 23
    $str = (string)$str;
1374 23
    $ret = array();
1375 23
1376 23
    if (self::$support['pcre_utf8'] === true) {
1377
1378 23
      if ($cleanUtf8 === true) {
1379
        $str = self::clean($str);
1380
      }
1381
1382
      preg_match_all('/./us', $str, $retArray);
1383
      if (isset($retArray[0])) {
1384
        $ret = $retArray[0];
1385
      }
1386
      unset($retArray);
1387
1388
    } else {
1389
1390
      // fallback
1391
1392
      $len = strlen($str);
1393
1394
      /** @noinspection ForeachInvariantsInspection */
1395
      for ($i = 0; $i < $len; $i++) {
1396
        if (($str[$i] & "\x80") === "\x00") {
1397
          $ret[] = $str[$i];
1398
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
1399
          if (($str[$i + 1] & "\xC0") === "\x80") {
1400
            $ret[] = $str[$i] . $str[$i + 1];
1401
1402
            $i++;
1403
          }
1404 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1405
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
1406
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
1407
1408
            $i += 2;
1409 23
          }
1410 5
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
1411 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1412 5
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
1413 5
1414
            $i += 3;
1415 23
          }
1416
        }
1417
      }
1418
    }
1419 23
1420
    if ($length > 1) {
1421
      $ret = array_chunk($ret, $length);
1422
1423
      $ret = array_map('implode', $ret);
1424
    }
1425
1426
    if (isset($ret[0]) && $ret[0] === '') {
1427
      return array();
1428
    }
1429
1430 40
    return $ret;
1431
  }
1432 40
1433
  /**
1434 40
   * rxClass
1435
   *
1436 40
   * @param string $s
1437 30
   * @param string $class
1438
   *
1439
   * @return string
1440 16
   */
1441
  protected static function rxClass($s, $class = '')
1442 16
  {
1443 15
    static $rxClassCache = array();
1444
1445 15
    $cacheKey = $s . $class;
1446 14
1447 15
    if (isset($rxClassCache[$cacheKey])) {
1448 1
      return $rxClassCache[$cacheKey];
1449 1
    }
1450
1451
    $class = array($class);
1452 16
1453
    foreach (self::str_split($s) as $s) {
1454 16
      if ('-' === $s) {
1455
        $class[0] = '-' . $class[0];
1456 16
      } elseif (!isset($s[2])) {
1457 16
        $class[0] .= preg_quote($s, '/');
1458 16
      } elseif (1 === self::strlen($s)) {
1459
        $class[0] .= $s;
1460
      } else {
1461
        $class[] = $s;
1462 16
      }
1463
    }
1464 16
1465
    $class[0] = '[' . $class[0] . ']';
1466
1467
    if (1 === count($class)) {
1468
      $return = $class[0];
1469
    } else {
1470
      $return = '(?:' . implode('|', $class) . ')';
1471
    }
1472
1473
    $rxClassCache[$cacheKey] = $return;
1474
1475 17
    return $return;
1476
  }
1477
1478 17
  /**
1479
   * Convert a string to an array.
1480 17
   *
1481
   * @param string $string
1482
   * @param int    $len
1483
   *
1484
   * @return array
1485
   */
1486 17
  public static function str_split($string, $len = 1)
1487 17
  {
1488 17
    // init
1489 17
    self::checkForSupport();
1490 17
1491 16
    if (1 > $len = (int)$len) {
1492 16
      $len = func_get_arg(1);
1493 17
1494
      return str_split($string, $len);
1495
    }
1496
1497
    if (self::$support['intl'] === true) {
1498 17
      $a = array();
1499 17
      $p = 0;
1500
      $l = strlen($string);
1501
      while ($p < $l) {
1502 1
        $a[] = grapheme_extract($string, 1, GRAPHEME_EXTR_COUNT, $p, $p);
1503 1
      }
1504
    } else {
1505
      preg_match_all('/' . GRAPHEME_CLUSTER_RX . '/u', $string, $a);
1506 1
      $a = $a[0];
1507 1
    }
1508 1
1509 1
    if (1 == $len) {
1510 1
      return $a;
1511
    }
1512 1
1513
    $arrayOutput = array();
1514 1
    $p = -1;
1515
1516
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
1517
    foreach ($a as $l => $a) {
1518
      if ($l % $len) {
1519
        $arrayOutput[$p] .= $a;
1520
      } else {
1521
        $arrayOutput[++$p] = $a;
1522
      }
1523
    }
1524 1
1525
    return $arrayOutput;
1526
  }
1527 1
1528
  /**
1529 1
   * Return the width of a string.
1530
   *
1531
   * @param string $s
1532
   *
1533
   * @return int
1534
   */
1535
  public static function strwidth($s)
1536
  {
1537
    // init
1538
    self::checkForSupport();
1539
1540
    return mb_strwidth($s, 'UTF-8');
1541
  }
1542 5
1543
  /**
1544 5
   * Find length of initial segment not matching mask.
1545
   *
1546
   * @param string $str
1547
   * @param string $charlist
1548 5
   * @param int    $start
1549
   * @param int    $len
1550
   *
1551 5
   * @return int|null
1552
   */
1553
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
1554
  {
1555 5
    if ('' === $charlist .= '') {
1556 5
      return null;
1557
    }
1558
1559
    if ($start || 2147483647 != $len) {
1560
      $str = (string)self::substr($str, $start, $len);
1561
    } else {
1562
      $str = (string)$str;
1563
    }
1564
1565
    /* @var $len array */
1566
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
1567
      return self::strlen($len[1]);
1568
    } else {
1569
      return self::strlen($str);
1570
    }
1571
  }
1572 1
1573
  /**
1574 1
   * Checks if the number of Unicode characters in a string are not
1575
   * more than the specified integer.
1576
   *
1577
   * @param    string $str      The original string to be checked.
1578
   * @param    int    $box_size The size in number of chars to be checked against string.
1579
   *
1580
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
1581
   */
1582
  public static function fits_inside($str, $box_size)
1583
  {
1584
    return (self::strlen($str) <= $box_size);
1585
  }
1586 7
1587
  /**
1588 7
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
1589 2
   *
1590
   * @param string $string
1591
   * @param string $needle
1592
   * @param bool   $before_needle
1593 5
   *
1594
   * @return false|string
1595 5
   */
1596
  public static function stristr($string, $needle, $before_needle = false)
1597
  {
1598
    if ('' === $needle .= '') {
1599
      return false;
1600
    }
1601
1602
    // init
1603
    self::checkForSupport();
1604
1605
    return mb_stristr($string, $needle, $before_needle, 'UTF-8');
1606
  }
1607 1
1608
  /**
1609 1
   * Case insensitive string comparisons using a "natural order" algorithm.
1610
   *
1611
   * @param string $str1
1612
   * @param string $str2
1613
   *
1614
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
1615
   *             str1 is greater than str2, and 0 if they are equal.
1616
   */
1617
  public static function strnatcasecmp($str1, $str2)
1618
  {
1619
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
1620
  }
1621 2
1622
  /**
1623 2
   * String comparisons using a "natural order" algorithm.
1624
   *
1625
   * @param string $str1
1626
   * @param string $str2
1627
   *
1628
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
1629
   *             str1 is greater than str2, and 0 if they are equal.
1630
   */
1631
  public static function strnatcmp($str1, $str2)
1632
  {
1633 3
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
1634
  }
1635 3
1636
  /**
1637
   * Generic case sensitive transformation for collation matching.
1638
   *
1639
   * @param string $s
1640
   *
1641
   * @return string
1642
   */
1643
  protected static function strtonatfold($s)
1644
  {
1645
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
1646
  }
1647
1648 10
  /**
1649
   * Unicode transformation for case-less matching.
1650 10
   *
1651 10
   * @link http://unicode.org/reports/tr21/tr21-5.html
1652 10
   *
1653
   * @param string $string
1654 10
   * @param bool   $full
1655 1
   *
1656 1
   * @return string
1657 1
   */
1658
  public static function strtocasefold($string, $full = true)
1659 10
  {
1660
    static $fullCaseFold = null;
1661 10
    static $commonCaseFoldKeys = null;
1662
    static $commonCaseFoldValues = null;
1663 10
1664 1
    if ($commonCaseFoldKeys === null) {
1665 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
1666
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
1667
    }
1668 10
1669 10
    $string = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $string);
1670
1671 10
    if ($full) {
1672
1673
      if ($fullCaseFold === null) {
1674
        $fullCaseFold = self::getData('caseFolding_full');
1675
      }
1676
1677
      /** @noinspection OffsetOperationsInspection */
1678
      $string = str_replace($fullCaseFold[0], $fullCaseFold[1], $string);
1679
    }
1680
1681
    return self::strtolower($string);
1682
  }
1683
1684
  /**
1685
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
1686
   * Make a string lowercase.
1687 19
   *
1688
   * @link http://php.net/manual/en/function.mb-strtolower.php
1689 19
   *
1690
   * @param string $str <p>
1691 19
   *                    The string being lowercased.
1692 5
   *                    </p>
1693
   * @param string $encoding
1694
   *
1695
   * @return string str with all alphabetic characters converted to lowercase.
1696 17
   */
1697
  public static function strtolower($str, $encoding = 'UTF-8')
1698 17
  {
1699
    $str = (string)$str;
1700
1701
    if (!isset($str[0])) {
1702
      return '';
1703
    }
1704
1705
    // init
1706
    self::checkForSupport();
1707
1708
    return mb_strtolower($str, $encoding);
1709
  }
1710 1
1711
  /**
1712 1
   * urldecode & fixing urlencoded-win1252-chars ...
1713
   *
1714 1
   * @param string $str
1715 1
   *
1716
   * @return string
1717
   */
1718 1
  public static function urldecode($str)
1719
  {
1720 1
    $str = (string)$str;
1721
1722 1
    if (!isset($str[0])) {
1723 1
      return '';
1724 1
    }
1725 1
1726
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
1727 1
1728 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
1729 1
1730
    $str = self::fix_simple_utf8(
1731 1
        rawurldecode(
1732
            self::html_entity_decode(
1733
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
1734
                $flags
1735
            )
1736
        )
1737
    );
1738
1739
    return (string)$str;
1740
  }
1741 8
1742
  /**
1743 8
   * Fixing a broken UTF-8 string.
1744 8
   *
1745
   * @param string $str
1746 8
   *
1747
   * @return string
1748 8
   */
1749 2
  public static function fix_simple_utf8($str)
1750
  {
1751
    static $brokenUtf8ToUtf8Keys = null;
1752 8
    static $brokenUtf8ToUtf8Values = null;
1753 1
1754 1
    $str = (string)$str;
1755 1
1756
    if (!isset($str[0])) {
1757 8
      return '';
1758
    }
1759
1760
    if ($brokenUtf8ToUtf8Keys === null) {
1761
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
1762
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
1763
    }
1764
1765
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
1766
  }
1767
1768
  /**
1769
   * UTF-8 version of html_entity_decode()
1770
   *
1771
   * The reason we are not using html_entity_decode() by itself is because
1772
   * while it is not technically correct to leave out the semicolon
1773
   * at the end of an entity most browsers will still interpret the entity
1774
   * correctly. html_entity_decode() does not convert entities without
1775
   * semicolons, so we are left with our own little solution here. Bummer.
1776
   *
1777
   * Convert all HTML entities to their applicable characters
1778
   *
1779
   * @link http://php.net/manual/en/function.html-entity-decode.php
1780
   *
1781
   * @param string $string   <p>
1782
   *                         The input string.
1783
   *                         </p>
1784
   * @param int    $flags    [optional] <p>
1785
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1786
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1787
   *                         <table>
1788
   *                         Available <i>flags</i> constants
1789
   *                         <tr valign="top">
1790
   *                         <td>Constant Name</td>
1791
   *                         <td>Description</td>
1792
   *                         </tr>
1793
   *                         <tr valign="top">
1794
   *                         <td><b>ENT_COMPAT</b></td>
1795
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1796
   *                         </tr>
1797
   *                         <tr valign="top">
1798
   *                         <td><b>ENT_QUOTES</b></td>
1799
   *                         <td>Will convert both double and single quotes.</td>
1800
   *                         </tr>
1801
   *                         <tr valign="top">
1802
   *                         <td><b>ENT_NOQUOTES</b></td>
1803
   *                         <td>Will leave both double and single quotes unconverted.</td>
1804
   *                         </tr>
1805
   *                         <tr valign="top">
1806
   *                         <td><b>ENT_HTML401</b></td>
1807
   *                         <td>
1808
   *                         Handle code as HTML 4.01.
1809
   *                         </td>
1810
   *                         </tr>
1811
   *                         <tr valign="top">
1812
   *                         <td><b>ENT_XML1</b></td>
1813
   *                         <td>
1814
   *                         Handle code as XML 1.
1815
   *                         </td>
1816
   *                         </tr>
1817
   *                         <tr valign="top">
1818
   *                         <td><b>ENT_XHTML</b></td>
1819
   *                         <td>
1820
   *                         Handle code as XHTML.
1821
   *                         </td>
1822
   *                         </tr>
1823
   *                         <tr valign="top">
1824
   *                         <td><b>ENT_HTML5</b></td>
1825
   *                         <td>
1826
   *                         Handle code as HTML 5.
1827
   *                         </td>
1828
   *                         </tr>
1829
   *                         </table>
1830 14
   *                         </p>
1831
   * @param string $encoding [optional] <p>
1832 14
   *                         Encoding to use.
1833
   *                         </p>
1834 14
   *
1835 3
   * @return string the decoded string.
1836
   */
1837
  public static function html_entity_decode($string, $flags = null, $encoding = 'UTF-8')
1838 14
  {
1839 4
    $string = (string)$string;
1840
1841
    if (!isset($string[0])) {
1842 14
      return '';
1843 2
    }
1844 2
1845 2
    if (strpos($string, '&') === false) {
1846
      return $string;
1847
    }
1848 2
1849
    if ($flags === null) {
1850
      if (Bootup::is_php('5.4') === true) {
1851 14
        $flags = ENT_COMPAT | ENT_HTML5;
1852
      } else {
1853
        $flags = ENT_COMPAT;
1854 14
      }
1855 14
    }
1856 14
1857
    do {
1858 14
      $str_compare = $string;
1859 14
1860
      // decode numeric & UTF16 two byte entities
1861 14
      $string = html_entity_decode(
1862
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $string),
1863
          $flags,
1864
          $encoding
1865
      );
1866
    } while ($str_compare !== $string);
1867
1868
    return $string;
1869
  }
1870
1871
  /**
1872
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
1873
   *
1874
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
1875
   *
1876
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
1877
   *
1878
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
1879
   *    are followed by any of these:  ("group B")
1880
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
1881
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
1882
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
1883
   * is also a valid unicode character, and will be left unchanged.
1884
   *
1885
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
1886
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
1887
   *
1888
   * @name               to_utf8
1889
   *
1890 20
   * @param string       $string Any string or array.
1891
   *
1892 20
   * @return string The same string, but UTF8 encoded.
1893 2
   *
1894
   */
1895 2
  public static function to_utf8($string)
1896 2
  {
1897 View Code Duplication
    if (is_array($string)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1898 2
      foreach ($string as $k => $v) {
1899
        /** @noinspection AlterInForeachInspection */
1900
        $string[$k] = self::to_utf8($v);
1901 20
      }
1902
1903 20
      return $string;
1904 9
    }
1905
1906
    $string = (string)$string;
1907 20
1908
    if (!isset($string[0])) {
1909 20
      return $string;
1910 20
    }
1911 20
1912
    $max = self::strlen($string, '8bit');
1913 20
1914 20
    $buf = '';
1915 20
    /** @noinspection ForeachInvariantsInspection */
1916 20
    for ($i = 0; $i < $max; $i++) {
1917
      $c1 = $string[$i];
1918 20
1919
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
1920 18
        $c2 = $i + 1 >= $max ? "\x00" : $string[$i + 1];
1921 17
        $c3 = $i + 2 >= $max ? "\x00" : $string[$i + 2];
1922 17
        $c4 = $i + 3 >= $max ? "\x00" : $string[$i + 3];
1923 17
1924 5
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
1925 5
1926 5
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
1927
            $buf .= $c1 . $c2;
1928
            $i++;
1929 20
          } else { // not valid UTF8 - convert it
1930
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1931 18
            $cc2 = ($c1 & "\x3f") | "\x80";
1932 14
            $buf .= $cc1 . $cc2;
1933 14
          }
1934 14
1935 8 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1936 8
1937 8
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
1938
            $buf .= $c1 . $c2 . $c3;
1939
            $i += 2;
1940 19
          } else { // not valid UTF8 - convert it
1941
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1942 7
            $cc2 = ($c1 & "\x3f") | "\x80";
1943 1
            $buf .= $cc1 . $cc2;
1944 1
          }
1945 1
1946 6
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
1947 6
1948 6 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1949
            $buf .= $c1 . $c2 . $c3 . $c4;
1950
            $i += 3;
1951 7
          } else { // not valid UTF8 - convert it
1952 6
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1953 6
            $cc2 = ($c1 & "\x3f") | "\x80";
1954 6
            $buf .= $cc1 . $cc2;
1955
          }
1956
1957 20
        } else { // doesn't look like UTF8, but should be converted
1958
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
1959 2
          $cc2 = (($c1 & "\x3f") | "\x80");
1960 2
          $buf .= $cc1 . $cc2;
1961
        }
1962
1963 2
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
1964 2
1965 2
        $ordC1 = ord($c1);
1966
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
1967
          $buf .= self::$win1252ToUtf8[$ordC1];
1968 2
        } else {
1969 18
          $cc1 = (chr($ordC1 / 64) | "\xc0");
1970
          $cc2 = (($c1 & "\x3f") | "\x80");
1971 20
          $buf .= $cc1 . $cc2;
1972
        }
1973 20
1974
      } else { // it doesn't need conversion
1975
        $buf .= $c1;
1976 20
      }
1977 20
    }
1978
1979 3
    self::checkForSupport();
1980 20
1981
    // decode unicode escape sequences
1982 20
    $buf = preg_replace_callback(
1983
        '/\\\\u([0-9a-f]{4})/i',
1984
        function ($match) {
1985 20
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
1986 20
        },
1987 20
        $buf
1988 2
    );
1989 20
1990
    // decode UTF-8 codepoints
1991 20
    $buf = preg_replace_callback(
1992
        '/&#\d{2,4};/',
1993 20
        function ($match) {
1994
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
1995
        },
1996
        $buf
1997
    );
1998
1999
    return $buf;
2000
  }
2001
2002
  /**
2003
   * alias for "UTF8::to_utf8"
2004
   *
2005
   * @param string $string
2006
   *
2007
   * @return string
2008
   */
2009
  public static function toUTF8($string)
2010
  {
2011
    return self::to_utf8($string);
2012
  }
2013
2014
  /**
2015
   * Try to check if a string is a json-string...
2016
   *
2017
   * @param $string
2018
   *
2019
   * @return bool
2020
   *
2021
   * @deprecated
2022
   */
2023
  public static function isJson($string)
2024
  {
2025
    $string = (string)$string;
2026
2027
    if (!isset($string[0])) {
2028
      return false;
2029
    }
2030
2031
    if (
2032
        is_object(json_decode($string))
2033
        &&
2034
        json_last_error() == JSON_ERROR_NONE
2035
    ) {
2036
      return true;
2037
    } else {
2038
      return false;
2039
    }
2040
  }
2041
2042
  /**
2043
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
2044
   *
2045
   * @link http://php.net/manual/en/function.grapheme-strstr.php
2046
   *
2047
   * @param string $haystack      <p>
2048
   *                              The input string. Must be valid UTF-8.
2049
   *                              </p>
2050
   * @param string $needle        <p>
2051
   *                              The string to look for. Must be valid UTF-8.
2052
   *                              </p>
2053
   * @param bool   $before_needle [optional] <p>
2054 1
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
2055
   *                              haystack before the first occurrence of the needle (excluding the needle).
2056 1
   *                              </p>
2057
   *
2058 1
   * @return string the portion of string, or FALSE if needle is not found.
2059
   */
2060
  public static function strstr($haystack, $needle, $before_needle = false)
2061
  {
2062
    self::checkForSupport();
2063
2064
    return grapheme_strstr($haystack, $needle, $before_needle);
2065
  }
2066
2067
  /**
2068
   * Reads entire file into a string.
2069
   *
2070
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2071
   *
2072
   * @link http://php.net/manual/en/function.file-get-contents.php
2073
   *
2074
   * @param string   $filename      <p>
2075
   *                                Name of the file to read.
2076
   *                                </p>
2077
   * @param int      $flags         [optional] <p>
2078
   *                                Prior to PHP 6, this parameter is called
2079
   *                                use_include_path and is a bool.
2080
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2081
   *                                to trigger include path
2082
   *                                search.
2083
   *                                </p>
2084
   *                                <p>
2085
   *                                The value of flags can be any combination of
2086
   *                                the following flags (with some restrictions), joined with the
2087
   *                                binary OR (|)
2088
   *                                operator.
2089
   *                                </p>
2090
   *                                <p>
2091
   *                                <table>
2092
   *                                Available flags
2093
   *                                <tr valign="top">
2094
   *                                <td>Flag</td>
2095
   *                                <td>Description</td>
2096
   *                                </tr>
2097
   *                                <tr valign="top">
2098
   *                                <td>
2099
   *                                FILE_USE_INCLUDE_PATH
2100
   *                                </td>
2101
   *                                <td>
2102
   *                                Search for filename in the include directory.
2103
   *                                See include_path for more
2104
   *                                information.
2105
   *                                </td>
2106
   *                                </tr>
2107
   *                                <tr valign="top">
2108
   *                                <td>
2109
   *                                FILE_TEXT
2110
   *                                </td>
2111
   *                                <td>
2112
   *                                As of PHP 6, the default encoding of the read
2113
   *                                data is UTF-8. You can specify a different encoding by creating a
2114
   *                                custom context or by changing the default using
2115
   *                                stream_default_encoding. This flag cannot be
2116
   *                                used with FILE_BINARY.
2117
   *                                </td>
2118
   *                                </tr>
2119
   *                                <tr valign="top">
2120
   *                                <td>
2121
   *                                FILE_BINARY
2122
   *                                </td>
2123
   *                                <td>
2124
   *                                With this flag, the file is read in binary mode. This is the default
2125
   *                                setting and cannot be used with FILE_TEXT.
2126
   *                                </td>
2127
   *                                </tr>
2128
   *                                </table>
2129
   *                                </p>
2130
   * @param resource $context       [optional] <p>
2131
   *                                A valid context resource created with
2132
   *                                stream_context_create. If you don't need to use a
2133
   *                                custom context, you can skip this parameter by &null;.
2134
   *                                </p>
2135
   * @param int      $offset        [optional] <p>
2136
   *                                The offset where the reading starts.
2137
   *                                </p>
2138
   * @param int      $maxlen        [optional] <p>
2139
   *                                Maximum length of data read. The default is to read until end
2140
   *                                of file is reached.
2141 2
   *                                </p>
2142
   * @param int      $timeout
2143
   *
2144 2
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2145 2
   *                                default utf-8 chars
2146
   *
2147 2
   * @return string The function returns the read data or false on failure.
2148 2
   */
2149
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2150
  {
2151
    // init
2152 2
    $timeout = (int)$timeout;
2153 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2154
2155 2
    if ($timeout && $context === null) {
2156 2
      $context = stream_context_create(
2157
          array(
2158 2
              'http' =>
2159 1
                  array(
2160 1
                      'timeout' => $timeout,
2161 2
                  ),
2162
          )
2163
      );
2164
    }
2165 2
2166
    if (is_int($maxlen)) {
2167
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2168
    } else {
2169 2
      $data = file_get_contents($filename, $flags, $context, $offset);
2170 2
    }
2171
2172 2
    // return false on error
2173 2
    if ($data === false) {
2174 1
      return false;
2175 1
    }
2176
2177 2
    if ($convertToUtf8 === true) {
2178 2
      self::checkForSupport();
2179
2180
      $encoding = self::str_detect_encoding($data);
2181 2
      if ($encoding != 'UTF-8') {
2182
        $data = mb_convert_encoding($data, 'UTF-8', $encoding);
2183
      }
2184
2185
      $data = self::cleanup($data);
2186
    }
2187
2188
    // clean utf-8 string
2189
    return $data;
2190
  }
2191 2
2192
  /**
2193
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
2194 2
   *
2195
   * @param string $str
2196
   *
2197
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
2198 2
   *                      otherwise it will return false.
2199
   */
2200
  public static function str_detect_encoding($str)
2201
  {
2202
    // init
2203
    $encoding = '';
2204 2
2205
    // UTF-8
2206
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2207
    /** @noinspection SubStrUsedAsStrPosInspection */
2208
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
2209
      return 'UTF-8';
2210 2
    }
2211
2212
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2213
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2214
    /** @noinspection SubStrUsedAsStrPosInspection */
2215
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
2216 2
      return 'UTF-16BE';
2217
    }
2218
2219
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2220
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2221
    /** @noinspection SubStrUsedAsStrPosInspection */
2222 2
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
2223
      return 'UTF-16LE';
2224
    }
2225
2226 2
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2227 2
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2228
    /** @noinspection SubStrUsedAsStrPosInspection */
2229
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
2230
      return 'UTF-32BE';
2231 2
    }
2232 2
2233 2
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2234 2
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2235 2
    /** @noinspection SubStrUsedAsStrPosInspection */
2236 2
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
2237
      return 'UTF32LE';
2238 2
    }
2239 1
2240 1
    if (!$encoding) {
2241 1
      self::checkForSupport();
2242 1
2243 1
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
2244
      $detectOrder = array(
2245 1
          'UTF-8',
2246
          'windows-1251',
2247
          'ISO-8859-1',
2248 1
      );
2249
      $encoding = mb_detect_encoding($str, $detectOrder, true);
2250 2
    }
2251
2252
    if (self::is_binary($str)) {
2253
      if (self::is_utf16($str) == 1) {
2254 2
        return 'UTF-16LE';
2255
      } elseif (self::is_utf16($str) == 2) {
2256
        return 'UTF-16BE';
2257
      } elseif (self::is_utf32($str) == 1) {
2258
        return 'UTF-32LE';
2259
      } elseif (self::is_utf32($str) == 2) {
2260
        return 'UTF-32BE';
2261
      }
2262
    }
2263
2264 3
    if (!$encoding) {
2265
      $encoding = false;
2266
    }
2267 3
2268
    return $encoding;
2269
  }
2270 3
2271
  /**
2272 3
   * Check if the input is binary... (is look like a hack)
2273 3
   *
2274 3
   * @param string $input
2275 3
   *
2276 2
   * @return bool
2277
   */
2278 3
  public static function is_binary($input)
2279
  {
2280
2281
    $testLength = strlen($input);
2282
2283
    if (
2284
        preg_match('~^[01]+$~', $input)
2285
        ||
2286
        substr_count($input, "\x00") > 0
2287
        ||
2288
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2289 1
    ) {
2290
      return true;
2291 1
    } else {
2292 1
      return false;
2293
    }
2294 1
  }
2295 1
2296 1
  /**
2297 1
   * Check if the string is UTF-16.
2298 1
   *
2299 1
   * @param string $string
2300 1
   *
2301 1
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2302 1
   */
2303 1 View Code Duplication
  public static function is_utf16($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2304 1
  {
2305 1
    if (self::is_binary($string)) {
2306 1
      self::checkForSupport();
2307 1
2308
      $maybeUTF16LE = 0;
2309 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16LE');
2310 1
      if ($test !== false && strlen($test) > 1) {
2311 1
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2312 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2313 1
        if ($test3 == $test) {
2314 1
          $stringChars = self::count_chars($string);
2315 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2316 1
            if (in_array($test3char, $stringChars, true) === true) {
2317 1
              $maybeUTF16LE++;
2318 1
            }
2319 1
          }
2320 1
        }
2321 1
      }
2322 1
2323
      $maybeUTF16BE = 0;
2324 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16BE');
2325 1
      if ($test !== false && strlen($test) > 1) {
2326 1
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2327
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2328 1
        if ($test3 == $test) {
2329
          $stringChars = self::count_chars($string);
2330
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2331
            if (in_array($test3char, $stringChars, true) === true) {
2332 1
              $maybeUTF16BE++;
2333
            }
2334 1
          }
2335
        }
2336
      }
2337
2338
      if ($maybeUTF16BE != $maybeUTF16LE) {
2339
        if ($maybeUTF16LE > $maybeUTF16BE) {
2340
          return 1;
2341
        } else {
2342
          return 2;
2343
        }
2344
      }
2345 2
2346
    }
2347 2
2348
    return false;
2349 2
  }
2350
2351 2
  /**
2352
   * Returns count of characters used in a string.
2353
   *
2354
   * @param    string $str The input string.
2355
   *
2356
   * @return   array An associative array of Character as keys and
2357
   *           their count as values.
2358
   */
2359
  public static function count_chars($str) // there is no $mode parameters
2360
  {
2361 1
    $array = array_count_values(self::split($str));
2362
2363 1
    ksort($array);
2364 1
2365
    return $array;
2366 1
  }
2367 1
2368 1
  /**
2369 1
   * Check if the string is UTF-32.
2370 1
   *
2371 1
   * @param string $string
2372 1
   *
2373 1
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2374 1
   */
2375 View Code Duplication
  public static function is_utf32($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2376
  {
2377 1
    if (self::is_binary($string)) {
2378 1
      self::checkForSupport();
2379 1
2380
      $maybeUTF32LE = 0;
2381 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32LE');
2382 1
      if ($test !== false && strlen($test) > 1) {
2383 1
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2384
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2385
        if ($test3 == $test) {
2386
          $stringChars = self::count_chars($string);
2387
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2388
            if (in_array($test3char, $stringChars, true) === true) {
2389
              $maybeUTF32LE++;
2390
            }
2391
          }
2392
        }
2393
      }
2394
2395
      $maybeUTF32BE = 0;
2396 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32BE');
2397
      if ($test !== false && strlen($test) > 1) {
2398
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2399
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2400
        if ($test3 == $test) {
2401
          $stringChars = self::count_chars($string);
2402
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2403
            if (in_array($test3char, $stringChars, true) === true) {
2404 1
              $maybeUTF32BE++;
2405
            }
2406 1
          }
2407
        }
2408
      }
2409
2410
      if ($maybeUTF32BE != $maybeUTF32LE) {
2411
        if ($maybeUTF32LE > $maybeUTF32BE) {
2412
          return 1;
2413
        } else {
2414
          return 2;
2415
        }
2416 5
      }
2417
2418 5
    }
2419
2420 5
    return false;
2421
  }
2422
2423
  /**
2424
   * Clean-up a and show only printable UTF-8 chars at the end...
2425 5
   *
2426
   * @param string|false $string
2427
   *
2428 5
   * @return string
2429
   */
2430
  public static function cleanup($string)
2431
  {
2432
    $string = (string)$string;
2433
2434
    if (!isset($string[0])) {
2435 5
      return '';
2436
    }
2437 5
2438
    // init
2439
    self::checkForSupport();
2440
2441
    // fixed ISO <-> UTF-8 Errors
2442
    $string = self::fix_simple_utf8($string);
2443
2444
    // remove all none UTF-8 symbols
2445
    // && remove diamond question mark (�)
2446
    // && remove remove invisible characters (e.g. "\0")
2447
    // && remove BOM
2448
    // && normalize whitespace chars
2449
    $string = self::clean($string, true, true, false, true);
2450
2451
    return (string)$string;
2452
  }
2453
2454
  /**
2455
   * Check if the file is binary.
2456
   *
2457
   * @param string $file
2458
   *
2459
   * @return boolean
2460
   */
2461
  public static function is_binary_file($file)
2462
  {
2463
    try {
2464
      $fp = fopen($file, 'r');
2465
      $block = fread($fp, 512);
2466
      fclose($fp);
2467
    } catch (\Exception $e) {
2468
      $block = '';
2469
    }
2470
2471
    return self::is_binary($block);
2472
  }
2473
2474
  /**
2475
   * Finds the last occurrence of a character in a string within another.
2476
   *
2477
   * @link http://php.net/manual/en/function.mb-strrchr.php
2478
   *
2479
   * @param string $haystack <p>
2480
   *                         The string from which to get the last occurrence
2481
   *                         of needle
2482
   *                         </p>
2483
   * @param string $needle   <p>
2484
   *                         The string to find in haystack
2485
   *                         </p>
2486
   * @param bool   $part     [optional] <p>
2487
   *                         Determines which portion of haystack
2488 1
   *                         this function returns.
2489
   *                         If set to true, it returns all of haystack
2490 1
   *                         from the beginning to the last occurrence of needle.
2491
   *                         If set to false, it returns all of haystack
2492 1
   *                         from the last occurrence of needle to the end,
2493
   *                         </p>
2494
   * @param string $encoding [optional] <p>
2495
   *                         Character encoding name to use.
2496
   *                         If it is omitted, internal character encoding is used.
2497
   *                         </p>
2498
   *
2499
   * @return string the portion of haystack.
2500
   * or false if needle is not found.
2501
   */
2502
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2503
  {
2504
    self::checkForSupport();
2505
2506
    return mb_strrchr($haystack, $needle, $part, $encoding);
2507
  }
2508
2509
  /**
2510
   * Finds the last occurrence of a character in a string within another, case insensitive.
2511
   *
2512
   * @link http://php.net/manual/en/function.mb-strrichr.php
2513
   *
2514
   * @param string $haystack <p>
2515
   *                         The string from which to get the last occurrence
2516
   *                         of needle
2517
   *                         </p>
2518
   * @param string $needle   <p>
2519
   *                         The string to find in haystack
2520
   *                         </p>
2521
   * @param bool   $part     [optional] <p>
2522
   *                         Determines which portion of haystack
2523 1
   *                         this function returns.
2524
   *                         If set to true, it returns all of haystack
2525 1
   *                         from the beginning to the last occurrence of needle.
2526
   *                         If set to false, it returns all of haystack
2527 1
   *                         from the last occurrence of needle to the end,
2528
   *                         </p>
2529
   * @param string $encoding [optional] <p>
2530
   *                         Character encoding name to use.
2531
   *                         If it is omitted, internal character encoding is used.
2532
   *                         </p>
2533
   *
2534
   * @return string the portion of haystack.
2535
   * or false if needle is not found.
2536
   */
2537
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2538
  {
2539 1
    self::checkForSupport();
2540
2541 1
    return mb_strrichr($haystack, $needle, $part, $encoding);
2542 1
  }
2543 1
2544 1
  /**
2545
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2546
   *
2547 1
   * @param mixed $var
2548
   * @param int   $filter
2549
   * @param mixed $option
2550
   *
2551
   * @return mixed
2552
   */
2553 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2554
  {
2555
    if (3 > func_num_args()) {
2556
      $var = filter_var($var, $filter);
2557
    } else {
2558
      $var = filter_var($var, $filter, $option);
2559 6
    }
2560
2561 6
    return self::filter($var);
2562 6
  }
2563 1
2564
  /**
2565
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2566 1
   *
2567 1
   * @param mixed  $var
2568 6
   * @param int    $normalization_form
2569 1
   * @param string $leading_combining
2570 1
   *
2571 1
   * @return mixed
2572 1
   */
2573 6
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2574 6
  {
2575
    switch (gettype($var)) {
2576 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2577
        foreach ($var as $k => $v) {
2578 6
          /** @noinspection AlterInForeachInspection */
2579 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2580 1
        }
2581 1
        break;
2582 6 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2583
        foreach ($var as $k => $v) {
2584 6
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2585 4
        }
2586 4
        break;
2587 4
      case 'string':
2588 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2589
          // Workaround https://bugs.php.net/65732
2590
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2591 6
        }
2592 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2593
          if (Normalizer::isNormalized($var, $normalization_form)) {
2594
            $n = '-';
2595
          } else {
2596 6
            $n = Normalizer::normalize($var, $normalization_form);
2597 6
2598 6
            if (isset($n[0])) {
2599
              $var = $n;
2600 6
            } else {
2601
              $var = self::encode('UTF-8', $var);
2602
            }
2603
2604
          }
2605
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2606
            // Prevent leading combining chars
2607
            // for NFC-safe concatenations.
2608
            $var = $leading_combining . $var;
2609
          }
2610
        }
2611
        break;
2612
    }
2613
2614 11
    return $var;
2615
  }
2616 11
2617
  /**
2618 11
   * Encode to UTF8 or LATIN1.
2619 11
   *
2620
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2621
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2622 1
   *
2623 1
   * @param string $encodingLabel ISO-8859-1 || UTF-8
2624
   * @param string $string
2625
   *
2626
   * @return false|string Will return false on error.
2627
   */
2628
  public static function encode($encodingLabel, $string)
2629
  {
2630
    $encodingLabel = self::normalizeEncoding($encodingLabel);
2631
2632
    if ($encodingLabel === 'UTF-8') {
2633
      return self::to_utf8($string);
2634
    }
2635
2636 11
    if ($encodingLabel === 'ISO-8859-1') {
2637
      return self::to_latin1($string);
2638 11
    }
2639 11
2640
    return false;
2641 11
  }
2642 11
2643 11
  /**
2644 11
   * Normalize the encoding-name input.
2645 11
   *
2646 11
   * @param string $encodingLabel e.g.: ISO, UTF8, ISO88591, WIN1252 ...
2647 11
   *
2648 11
   * @return string
2649 11
   */
2650 11
  protected static function normalizeEncoding($encodingLabel)
2651 11
  {
2652
    $encoding = strtoupper($encodingLabel);
2653
2654
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2655 11
2656
    $equivalences = array(
2657
        'ISO88591'    => 'ISO-8859-1',
2658
        'ISO8859'     => 'ISO-8859-1',
2659
        'ISO'         => 'ISO-8859-1',
2660
        'LATIN1'      => 'ISO-8859-1',
2661
        'LATIN'       => 'ISO-8859-1',
2662
        'UTF8'        => 'UTF-8',
2663
        'UTF'         => 'UTF-8',
2664
        'WIN1252'     => 'ISO-8859-1',
2665 2
        'WINDOWS1252' => 'ISO-8859-1',
2666
    );
2667 2
2668
    if (empty($equivalences[$encoding])) {
2669
      return 'UTF-8';
2670
    }
2671
2672
    return $equivalences[$encoding];
2673
  }
2674
2675
  /**
2676
   * alias for "UTF8::to_win1252()"
2677 2
   *
2678
   * @param $string
2679 2
   *
2680
   * @return string
2681 1
   */
2682
  public static function to_latin1($string)
2683 1
  {
2684 1
    return self::to_win1252($string);
2685
  }
2686 1
2687 2
  /**
2688 2
   * Convert a string into win1252.
2689
   *
2690
   * @param  string|array $string
2691
   *
2692
   * @return string
2693
   */
2694
  protected static function to_win1252($string)
2695
  {
2696
    if (is_array($string)) {
2697
2698
      foreach ($string as $k => $v) {
2699
        /** @noinspection AlterInForeachInspection */
2700
        $string[$k] = self::to_win1252($v);
2701 6
      }
2702
2703 6
      return $string;
2704 6
    } elseif (is_string($string)) {
2705
      return self::utf8_decode($string);
2706 6
    } else {
2707
      return $string;
2708 6
    }
2709 5
  }
2710
2711
  /**
2712
   * Decodes an UTF-8 string to ISO-8859-1.
2713 6
   *
2714
   * @param string $string
2715 6
   *
2716
   * @return string
2717 6
   */
2718 1
  public static function utf8_decode($string)
2719 1
  {
2720 1
    static $utf8ToWin1252Keys = null;
2721
    static $utf8ToWin1252Values = null;
2722 6
2723
    $string = (string)$string;
2724
2725
    if (!isset($string[0])) {
2726
      return '';
2727
    }
2728
2729
    // init
2730
    self::checkForSupport();
2731
2732
    $string = self::to_utf8($string);
2733
2734
    if ($utf8ToWin1252Keys === null) {
2735
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
2736
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
2737
    }
2738
2739
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $string));
2740
  }
2741
2742
  /**
2743
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2744
   *
2745
   * @param int    $type
2746
   * @param string $var
2747
   * @param int    $filter
2748
   * @param mixed  $option
2749
   *
2750
   * @return mixed
2751
   */
2752 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2753 6
  {
2754
    if (4 > func_num_args()) {
2755 6
      $var = filter_input($type, $var, $filter);
2756
    } else {
2757 6
      $var = filter_input($type, $var, $filter, $option);
2758 6
    }
2759
2760
    return self::filter($var);
2761 5
  }
2762 5
2763
  /**
2764 5
   * Encodes an ISO-8859-1 string to UTF-8.
2765 1
   *
2766 1
   * @param string $string
2767 1
   *
2768
   * @return string
2769 5
   */
2770
  public static function utf8_encode($string)
2771
  {
2772
    $string = utf8_encode($string);
2773
2774
    if (false === strpos($string, "\xC2")) {
2775
      return $string;
2776
    } else {
2777
2778
      static $cp1252ToUtf8Keys = null;
2779
      static $cp1252ToUtf8Values = null;
2780
2781
      if ($cp1252ToUtf8Keys === null) {
2782
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
2783
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
2784
      }
2785
2786
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $string);
2787
    }
2788
  }
2789
2790
  /**
2791
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2792
   * Returns the JSON representation of a value
2793
   *
2794
   * @link http://php.net/manual/en/function.json-encode.php
2795
   *
2796
   * @param mixed $value   <p>
2797
   *                       The <i>value</i> being encoded. Can be any type except
2798
   *                       a resource.
2799
   *                       </p>
2800
   *                       <p>
2801
   *                       All string data must be UTF-8 encoded.
2802
   *                       </p>
2803
   *                       <p>PHP implements a superset of
2804
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2805
   *                       only supports these values when they are nested inside an array or an object.
2806
   *                       </p>
2807
   * @param int   $options [optional] <p>
2808
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2809 1
   *                       <b>JSON_HEX_TAG</b>,
2810
   *                       <b>JSON_HEX_AMP</b>,
2811 1
   *                       <b>JSON_HEX_APOS</b>,
2812
   *                       <b>JSON_NUMERIC_CHECK</b>,
2813 1
   *                       <b>JSON_PRETTY_PRINT</b>,
2814 1
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2815 1
   *                       <b>JSON_FORCE_OBJECT</b>,
2816
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2817
   *                       constants is described on
2818
   *                       the JSON constants page.
2819 1
   *                       </p>
2820
   * @param int   $depth   [optional] <p>
2821
   *                       Set the maximum depth. Must be greater than zero.
2822
   *                       </p>
2823
   *
2824
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2825
   */
2826
  public static function json_encode($value, $options = 0, $depth = 512)
2827
  {
2828
    $value = self::filter($value);
2829
2830
    if (Bootup::is_php('5.5')) {
2831
      $json = json_encode($value, $options, $depth);
2832
    } else {
2833
      $json = json_encode($value, $options);
2834
    }
2835
2836
    return $json;
2837
  }
2838
2839
  /**
2840
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2841
   * Decodes a JSON string
2842
   *
2843
   * @link http://php.net/manual/en/function.json-decode.php
2844
   *
2845
   * @param string $json    <p>
2846
   *                        The <i>json</i> string being decoded.
2847
   *                        </p>
2848
   *                        <p>
2849
   *                        This function only works with UTF-8 encoded strings.
2850
   *                        </p>
2851
   *                        <p>PHP implements a superset of
2852
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2853
   *                        only supports these values when they are nested inside an array or an object.
2854
   *                        </p>
2855
   * @param bool   $assoc   [optional] <p>
2856
   *                        When <b>TRUE</b>, returned objects will be converted into
2857
   *                        associative arrays.
2858 2
   *                        </p>
2859
   * @param int    $depth   [optional] <p>
2860 2
   *                        User specified recursion depth.
2861
   *                        </p>
2862 2
   * @param int    $options [optional] <p>
2863 2
   *                        Bitmask of JSON decode options. Currently only
2864 2
   *                        <b>JSON_BIGINT_AS_STRING</b>
2865
   *                        is supported (default is to cast large integers as floats)
2866
   *                        </p>
2867
   *
2868 2
   * @return mixed the value encoded in <i>json</i> in appropriate
2869
   * PHP type. Values true, false and
2870
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2871
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2872
   * <i>json</i> cannot be decoded or if the encoded
2873
   * data is deeper than the recursion limit.
2874
   */
2875
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
2876
  {
2877
    $json = self::filter($json);
2878
2879
    if (Bootup::is_php('5.4') === true) {
2880
      $json = json_decode($json, $assoc, $depth, $options);
2881
    } else {
2882
      $json = json_decode($json, $assoc, $depth);
2883
    }
2884
2885
    return $json;
2886
  }
2887
2888
  /**
2889
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2890
   *
2891
   * @param array $data
2892
   * @param mixed $definition
2893
   * @param bool  $add_empty
2894
   *
2895
   * @return mixed
2896
   */
2897 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2898
  {
2899
    if (2 > func_num_args()) {
2900
      $a = filter_var_array($data);
2901
    } else {
2902
      $a = filter_var_array($data, $definition, $add_empty);
2903
    }
2904
2905
    return self::filter($a);
2906
  }
2907
2908
  /**
2909
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2910
   *
2911
   * @param int   $type
2912
   * @param mixed $definition
2913
   * @param bool  $add_empty
2914
   *
2915
   * @return mixed
2916
   */
2917 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2918
  {
2919
    if (2 > func_num_args()) {
2920
      $a = filter_input_array($type);
2921
    } else {
2922
      $a = filter_input_array($type, $definition, $add_empty);
2923
    }
2924
2925
    return self::filter($a);
2926
  }
2927
2928
  /**
2929
   * Search a string for any of a set of characters.
2930
   *
2931
   * @param string $s
2932
   * @param string $charList
2933
   *
2934
   * @return string|false
2935
   */
2936
  public static function strpbrk($s, $charList)
2937
  {
2938
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
2939
      return substr($s, strpos($s, $m[0]));
2940
    } else {
2941
      return false;
2942
    }
2943
  }
2944 8
2945
  /**
2946 8
   * Case-insensitive string comparison of the first n characters.
2947 5
   *
2948 5
   * @param string $str1
2949 8
   * @param string $str2
2950
   * @param int    $len
2951
   *
2952
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
2953
   */
2954
  public static function strncasecmp($str1, $str2, $len)
2955
  {
2956
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
2957
  }
2958
2959
  /**
2960 1
   * Comparison of the first n characters.
2961
   *
2962 1
   * @param string $str1
2963 1
   * @param string $str2
2964 1
   * @param int    $len
2965
   *
2966 1
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
2967
   *              <strong>> 0</strong> if str1 is greater than str2<br />
2968
   *              <strong>0</strong> if they are equal
2969
   */
2970
  public static function strncmp($str1, $str2, $len)
2971
  {
2972
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
2973
  }
2974
2975
  /**
2976
   * String comparison ...
2977
   *
2978
   * @param string $str1
2979
   * @param string $str2
2980
   *
2981
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
2982 2
   *              <strong>> 0</strong> if str1 is greater than str2<br />
2983
   *              <strong>0</strong> if they are equal.
2984 2
   */
2985 2
  public static function strcmp($str1, $str2)
2986
  {
2987
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
2988 2
        Normalizer::normalize($str1, Normalizer::NFD),
2989
        Normalizer::normalize($str2, Normalizer::NFD)
2990
    );
2991
  }
2992
2993
  /**
2994
   * Calculates and returns the maximum number of bytes taken by any
2995
   * UTF-8 encoded character in the given string.
2996
   *
2997
   * @param    string $str The original Unicode string.
2998 2
   *
2999
   * @return   int An array of byte lengths of each character.
3000 2
   */
3001 1
  public static function max_chr_width($str)
3002
  {
3003
    $bytes = self::chr_size_list($str);
3004 2
    if (count($bytes) > 0) {
3005
      return (int)max($bytes);
3006
    } else {
3007
      return 0;
3008
    }
3009
  }
3010
3011
  /**
3012
   * Generates an array of byte length of each character of a Unicode string.
3013
   *
3014
   * 1 byte => U+0000  - U+007F
3015 15
   * 2 byte => U+0080  - U+07FF
3016
   * 3 byte => U+0800  - U+FFFF
3017 15
   * 4 byte => U+10000 - U+10FFFF
3018 2
   *
3019
   * @param    string $str The original Unicode string.
3020
   *
3021 14
   * @return   array An array of byte lengths of each character.
3022 14
   */
3023
  public static function chr_size_list($str)
3024 14
  {
3025 2
    if (!$str) {
3026
      return array();
3027
    }
3028 13
3029 7
    return array_map('strlen', self::split($str));
3030
  }
3031
3032 12
  /**
3033 8
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3034
   *
3035
   * @param    string $chr The Unicode character to be encoded as numbered entity.
3036 10
   *
3037
   * @return   string The HTML numbered entity.
3038
   */
3039
  public static function single_chr_html_encode($chr)
3040
  {
3041
    if (!$chr) {
3042
      return '';
3043
    }
3044
3045
    return '&#' . self::ord($chr) . ';';
3046
  }
3047
3048 1
  /**
3049
   * Calculates Unicode code point of the given UTF-8 encoded character.
3050 1
   *
3051 1
   * @param    string $s The character of which to calculate code point.
3052
   *
3053 1
   * @return   int Unicode code point of the given character,<br />
3054 1
   *           0 on invalid UTF-8 byte sequence.
3055 1
   */
3056 1
  public static function ord($s)
3057 1
  {
3058 1
    if (!$s) {
3059
      return 0;
3060
    }
3061
3062
    $s = unpack('C*', substr($s, 0, 4));
3063
    $a = $s ? $s[1] : 0;
3064
3065
    if (0xF0 <= $a && isset($s[4])) {
3066
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3067
    }
3068 1
3069
    if (0xE0 <= $a && isset($s[3])) {
3070 1
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3071
    }
3072
3073
    if (0xC0 <= $a && isset($s[2])) {
3074
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3075
    }
3076
3077
    return $a;
3078
  }
3079
3080
  /**
3081
   * Converts a UTF-8 string to a series of ...
3082 2
   *
3083
   * INFO: HTML Numbered Entities like &#123;&#39;&#1740;...
3084 2
   *
3085
   * @param    string $str The Unicode string to be encoded as numbered entities.
3086
   *
3087
   * @return   string HTML numbered entities.
3088
   */
3089
  public static function html_encode($str)
3090
  {
3091
    return implode(
3092 2
        array_map(
3093
            array(
3094 2
                '\\voku\\helper\\UTF8',
3095
                'single_chr_html_encode',
3096
            ),
3097
            self::split($str)
3098
        )
3099
    );
3100
  }
3101
3102
  /**
3103
   * Checks if a file starts with BOM character.
3104
   *
3105
   * @param    string $file_path Path to a valid file.
3106
   *
3107
   * @return   bool True if the file has BOM at the start, False otherwise.
3108
   */
3109
  public static function file_has_bom($file_path)
3110
  {
3111
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
3112
  }
3113
3114
  /**
3115
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3116 1
   *
3117
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3118 1
   *
3119
   * @param    string $utf8_chr The input string.
3120
   *
3121
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3122
   */
3123
  public static function is_bom($utf8_chr)
3124
  {
3125
    return ($utf8_chr === self::bom());
3126
  }
3127
3128
  /**
3129
   * Returns the Byte Order Mark Character.
3130
   *
3131
   * @return   string Byte Order Mark
3132
   */
3133
  public static function bom()
3134
  {
3135
    return "\xEF\xBB\xBF";
3136
  }
3137
3138
  /**
3139
   * alias for "UTF8::is_bom"
3140
   *
3141
   * @param string $utf8_chr
3142
   *
3143
   * @return boolean
3144
   */
3145
  public static function isBom($utf8_chr)
3146
  {
3147
    return self::is_bom($utf8_chr);
3148
  }
3149
3150
  /**
3151
   * Checks if string starts with "UTF-8 BOM" character.
3152
   *
3153
   * @param    string $str The input string.
3154
   *
3155
   * @return   bool True if the string has BOM at the start, False otherwise.
3156
   */
3157
  public static function string_has_bom($str)
3158
  {
3159
    return self::is_bom(substr($str, 0, 3));
3160
  }
3161
3162
  /**
3163
   * Prepends BOM character to the string and returns the whole string.
3164
   *
3165 4
   * INFO: If BOM already existed there, the Input string is returned.
3166
   *
3167 4
   * @param    string $str The input string
3168
   *
3169
   * @return   string The output string that contains BOM
3170
   */
3171 4
  public static function add_bom_to_string($str)
3172 4
  {
3173 4
    if (!self::is_bom(substr($str, 0, 3))) {
3174
      $str = self::bom() . $str;
3175 4
    }
3176 4
3177 4
    return $str;
3178 4
  }
3179
3180 4
  /**
3181
   * Shuffles all the characters in the string.
3182
   *
3183
   * @param    string $str The input string
3184 4
   *
3185
   * @return   string The shuffled string.
3186 4
   */
3187
  public static function str_shuffle($str)
3188
  {
3189
    $array = self::split($str);
3190
3191 4
    shuffle($array);
3192 4
3193
    return implode('', $array);
3194 4
  }
3195 4
3196 4
  /**
3197 4
   * Wraps a string to a given number of characters.
3198 4
   *
3199
   * @param string $string
3200 4
   * @param int    $width
3201 4
   * @param string $break
3202 4
   * @param bool   $cut
3203 4
   *
3204
   * @return false|string Returns the given string wrapped at the specified length.
3205 4
   */
3206 3
  public static function wordwrap($string, $width = 75, $break = "\n", $cut = false)
3207 3
  {
3208 3
    if (false === wordwrap('-', $width, $break, $cut)) {
3209 3
      return false;
3210
    }
3211 3
3212
    if (is_string($break)) {
3213
      $break = (string)$break;
3214
    }
3215 3
3216 3
    $w = '';
3217
    $string = explode($break, $string);
3218 4
    $iLen = count($string);
3219
    $chars = array();
3220
3221
    if (1 === $iLen && '' === $string[0]) {
3222
      return '';
3223
    }
3224
3225
    /** @noinspection ForeachInvariantsInspection */
3226
    for ($i = 0; $i < $iLen; ++$i) {
3227
3228
      if ($i) {
3229
        $chars[] = $break;
3230
        $w .= '#';
3231
      }
3232
3233
      $c = $string[$i];
3234
      unset($string[$i]);
3235
3236
      foreach (self::split($c) as $c) {
3237
        $chars[] = $c;
3238
        $w .= ' ' === $c ? ' ' : '?';
3239
      }
3240
    }
3241
3242
    $string = '';
3243 11
    $j = 0;
3244
    $b = $i = -1;
3245 11
    $w = wordwrap($w, $width, '#', $cut);
3246 11
3247
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
3248 11
      for (++$i; $i < $b; ++$i) {
3249 2
        $string .= $chars[$j];
3250
        unset($chars[$j++]);
3251
      }
3252
3253 10
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
3254 10
        unset($chars[$j++]);
3255
      }
3256
3257
      $string .= $break;
3258 10
    }
3259
3260
    return $string . implode('', $chars);
3261
  }
3262 10
3263
  /**
3264
   * Find position of first occurrence of string in a string.
3265
   *
3266 1
   * @link http://php.net/manual/en/function.mb-strpos.php
3267 1
   *
3268 1
   * @param string  $haystack     <p>
3269
   *                              The string being checked.
3270 10
   *                              </p>
3271
   * @param string  $needle       <p>
3272
   *                              The position counted from the beginning of haystack.
3273 10
   *                              </p>
3274 1
   * @param int     $offset       [optional] <p>
3275 1
   *                              The search offset. If it is not specified, 0 is used.
3276
   *                              </p>
3277 10
   * @param string  $encoding
3278
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
3279
   *
3280
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
3281
   *             If needle is not found it returns false.
3282
   */
3283
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
3284
  {
3285
    $haystack = (string)$haystack;
3286
    $needle = (string)$needle;
3287
3288
    if (!isset($haystack[0]) || !isset($needle[0])) {
3289
      return false;
3290
    }
3291
3292
    // init
3293
    self::checkForSupport();
3294
    $offset = (int)$offset;
3295
3296
    // iconv and mbstring do not support integer $needle
3297
3298
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
3299
      $needle = self::chr($needle);
3300
    }
3301
3302
    if ($cleanUtf8 === true) {
3303
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
3304
      // iconv_strpos is not tolerant to invalid characters
3305
3306 8
      $needle = self::clean((string)$needle);
3307
      $haystack = self::clean($haystack);
3308 8
    }
3309
3310 8
    if (self::$support['mbstring'] === true) {
3311
3312
      // INFO: this is only a fallback for old versions
3313
      if ($encoding === true || $encoding === false) {
3314
        $encoding = 'UTF-8';
3315
      }
3316
3317 8
      return mb_strpos($haystack, $needle, $offset, $encoding);
3318
    }
3319
3320
    if (self::$support['iconv'] === true) {
3321
      return grapheme_strpos($haystack, $needle, $offset);
3322
    }
3323
3324
    if ($offset > 0) {
3325
      $haystack = self::substr($haystack, $offset);
3326
    }
3327
3328 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3329
      $left = substr($haystack, 0, $pos);
3330
3331
      // negative offset not supported in PHP strpos(), ignoring
3332
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
3333
    }
3334
3335
    return false;
3336
  }
3337
3338
  /**
3339
   * Generates a UTF-8 encoded character from the given code point.
3340
   *
3341
   * @param    int $code_point The code point for which to generate a character.
3342
   *
3343
   * @return   string Multi-Byte character, returns empty string on failure to encode.
3344
   */
3345 4
  public static function chr($code_point)
3346
  {
3347 4
    self::checkForSupport();
3348
3349
    if (($i = (int)$code_point) !== $code_point) {
3350
      // $code_point is a string, lets extract int code point from it
3351
      if (!($i = (int)self::hex_to_int($code_point))) {
3352
        return '';
3353
      }
3354
    }
3355
3356
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
3357 1
  }
3358
3359 1
  /**
3360
   * Converts hexadecimal U+xxxx code point representation to Integer.
3361
   *
3362
   * INFO: opposite to UTF8::int_to_hex( )
3363 1
   *
3364
   * @param    string $str The hexadecimal code point representation.
3365
   *
3366
   * @return   int The code point, or 0 on failure.
3367
   */
3368
  public static function hex_to_int($str)
3369
  {
3370
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
3371
      return intval($match[1], 16);
3372
    }
3373
3374
    return 0;
3375
  }
3376
3377 3
  /**
3378
   * Reverses characters order in the string.
3379 3
   *
3380 3
   * @param    string $str The input string
3381 3
   *
3382
   * @return   string The string with characters in the reverse sequence
3383 3
   */
3384
  public static function strrev($str)
3385 3
  {
3386 3
    return implode(array_reverse(self::split($str)));
3387 3
  }
3388
3389 3
  /**
3390
   * Returns the UTF-8 character with the maximum code point in the given data.
3391 3
   *
3392
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3393
   *
3394
   * @return   string The character with the highest code point than others.
3395
   */
3396 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3397
  {
3398
    if (is_array($arg)) {
3399
      $arg = implode($arg);
3400
    }
3401 3
3402
    return self::chr(max(self::codepoints($arg)));
3403
  }
3404
3405
  /**
3406
   * Accepts a string and returns an array of Unicode code points.
3407
   *
3408
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
3409
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
3410
   *                          default, code points will be returned as integers.
3411 1
   *
3412
   * @return   array The array of code points
3413 1
   */
3414
  public static function codepoints($arg, $u_style = false)
3415
  {
3416
    if (is_string($arg)) {
3417 1
      $arg = self::split($arg);
3418
    }
3419
3420
    $arg = array_map(
3421
        array(
3422
            '\\voku\\helper\\UTF8',
3423
            'ord',
3424
        ),
3425
        $arg
3426
    );
3427
3428
    if ($u_style) {
3429
      $arg = array_map(
3430
          array(
3431
              '\\voku\\helper\\UTF8',
3432
              'int_to_hex',
3433
          ),
3434
          $arg
3435
      );
3436
    }
3437
3438
    return $arg;
3439
  }
3440
3441
  /**
3442
   * Returns the UTF-8 character with the minimum code point in the given data.
3443
   *
3444
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3445
   *
3446
   * @return   string The character with the lowest code point than others.
3447
   */
3448 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3449
  {
3450
    if (is_array($arg)) {
3451
      $arg = implode($arg);
3452
    }
3453
3454
    return self::chr(min(self::codepoints($arg)));
3455
  }
3456
3457
  /**
3458
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
3459
   *
3460
   * @param    string $chr The input character
3461 1
   * @param    string $pfix
3462
   *
3463 1
   * @return   string The code point encoded as U+xxxx
3464
   */
3465 1
  public static function chr_to_hex($chr, $pfix = 'U+')
3466
  {
3467
    return self::int_to_hex(self::ord($chr), $pfix);
3468
  }
3469
3470 1
  /**
3471 1
   * Converts Integer to hexadecimal U+xxxx code point representation.
3472
   *
3473 1
   * @param    int    $int The integer to be converted to hexadecimal code point.
3474 1
   * @param    string $pfix
3475 1
   *
3476
   * @return   string The code point, or empty string on failure.
3477 1
   */
3478
  public static function int_to_hex($int, $pfix = 'U+')
3479
  {
3480
    if (ctype_digit((string)$int)) {
3481
      $hex = dechex((int)$int);
3482
3483
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3484
3485
      return $pfix . $hex;
3486
    }
3487
3488
    return '';
3489 1
  }
3490
3491 1
  /**
3492 1
   * Get a binary representation of a specific character.
3493 1
   *
3494 1
   * @param   string $string The input character.
3495
   *
3496 1
   * @return  string
3497
   */
3498
  public static function str_to_binary($string)
3499
  {
3500 1
    $string = (string)$string;
3501
3502
    if (!isset($string[0])) {
3503
      return '';
3504
    }
3505
3506
    // init
3507
    $out = null;
3508
    $max = strlen($string);
3509 1
3510
    /** @noinspection ForeachInvariantsInspection */
3511
    for ($i = 0; $i < $max; ++$i) {
3512 1
      $out .= vsprintf('%08b', (array)self::ord($string[$i]));
3513
    }
3514
3515
    return $out;
3516
  }
3517
3518
  /**
3519
   * Counts number of words in the UTF-8 string.
3520
   *
3521
   * @param string $s The input string.
3522
   * @param int    $format
3523
   * @param string $charlist
3524
   *
3525
   * @return array|float|string The number of words in the string
3526
   */
3527
  public static function str_word_count($s, $format = 0, $charlist = '')
3528 26
  {
3529
    $charlist = self::rxClass($charlist, '\pL');
3530 26
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
3531
    $charlist = array();
3532 26
    $len = count($s);
3533 5
3534
    if (1 == $format) {
3535
      for ($i = 1; $i < $len; $i += 2) {
3536
        $charlist[] = $s[$i];
3537 22
      }
3538 6
    } elseif (2 == $format) {
3539
      self::checkForSupport();
3540
3541 16
      $offset = self::strlen($s[0]);
3542
      for ($i = 1; $i < $len; $i += 2) {
3543
        $charlist[$offset] = $s[$i];
3544
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
3545
      }
3546
    } else {
3547
      $charlist = ($len - 1) / 2;
3548
    }
3549
3550
    return $charlist;
3551
  }
3552
3553
  /**
3554 22
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
3555
   *
3556 22
   * INFO: This is slower then "trim()"
3557
   *
3558 22
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
3559 5
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
3560
   *
3561
   * @param    string $string The string to be trimmed
3562 18
   * @param    string $chars  Optional characters to be stripped
3563
   *
3564 18
   * @return   string The trimmed string
3565
   */
3566
  public static function trim($string = '', $chars = INF)
3567
  {
3568
    $string = (string)$string;
3569
3570
    if (!isset($string[0])) {
3571
      return '';
3572
    }
3573
3574
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3575
    if ($chars === INF || !$chars) {
3576
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $string);
3577 24
    }
3578
3579 24
    return self::rtrim(self::ltrim($string, $chars), $chars);
3580
  }
3581 24
3582 2
  /**
3583
   * Strip whitespace or other characters from end of a UTF-8 string.
3584
   *
3585 23
   * WARNING: This is much slower then "rtrim()" !!!!
3586
   *
3587 23
   * @param    string $string The string to be trimmed
3588
   * @param    string $chars  Optional characters to be stripped
3589
   *
3590
   * @return   string The string with unwanted characters stripped from the right
3591
   */
3592 View Code Duplication
  public static function rtrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3593
  {
3594
    $string = (string)$string;
3595
3596
    if (!isset($string[0])) {
3597
      return '';
3598
    }
3599
3600
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3601
3602 6
    return preg_replace("/{$chars}+$/u", '', $string);
3603
  }
3604
3605 6
  /**
3606 1
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3607
   *
3608
   * WARNING: This is much slower then "ltrim()" !!!!
3609 1
   *
3610
   * @param    string $string The string to be trimmed
3611
   * @param    string $chars  Optional characters to be stripped
3612 1
   *
3613
   * @return   string The string with unwanted characters stripped from the left
3614
   */
3615 View Code Duplication
  public static function ltrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3616 1
  {
3617
    $string = (string)$string;
3618
3619
    if (!isset($string[0])) {
3620
      return '';
3621
    }
3622 1
3623
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3624
3625
    return preg_replace("/^{$chars}+/u", '', $string);
3626 1
  }
3627 1
3628 1
  /**
3629
   * Replace text within a portion of a string.
3630
   *
3631
   * source: https://gist.github.com/stemar/8287074
3632
   *
3633
   * @param string|array $string
3634
   * @param string|array $replacement
3635
   * @param int          $start
3636
   * @param null|int     $length
3637
   *
3638 1
   * @return array|string
3639
   */
3640
  public static function substr_replace($string, $replacement, $start, $length = null)
3641
  {
3642 1
3643
    if (is_array($string)) {
3644 6
      $num = count($string);
3645 1
3646 1
      // $replacement
3647 1
      if (is_array($replacement)) {
3648 1
        $replacement = array_slice($replacement, 0, $num);
3649
      } else {
3650 1
        $replacement = array_pad(array($replacement), $num, $replacement);
3651
      }
3652
3653 6
      // $start
3654 6
      if (is_array($start)) {
3655
        $start = array_slice($start, 0, $num);
3656 6
        foreach ($start as &$valueTmp) {
3657 4
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
3658
        }
3659 4
        unset($value);
3660 4
      } else {
3661
        $start = array_pad(array($start), $num, $start);
3662 6
      }
3663
3664 6
      // $length
3665
      if (!isset($length)) {
3666
        $length = array_fill(0, $num, 0);
3667
      } elseif (is_array($length)) {
3668
        $length = array_slice($length, 0, $num);
3669
        foreach ($length as &$valueTmpV2) {
3670
          if (isset($valueTmpV2)) {
3671
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
3672
          } else {
3673
            $valueTmpV2 = 0;
3674
          }
3675
        }
3676
        unset($valueTmpV2);
3677
      } else {
3678
        $length = array_pad(array($length), $num, $length);
3679
      }
3680
3681
      // Recursive call
3682
      return array_map(array(__CLASS__, 'substr_replace'), $string, $replacement, $start, $length);
3683
    } else {
3684
      if (is_array($replacement)) {
3685
        if (count($replacement) > 0) {
3686
          $replacement = $replacement[0];
3687
        } else {
3688
          $replacement = '';
3689
        }
3690
      }
3691
    }
3692
3693
    preg_match_all('/./us', (string)$string, $smatches);
3694
    preg_match_all('/./us', (string)$replacement, $rmatches);
3695
3696
    if ($length === null) {
3697
      self::checkForSupport();
3698
3699
      $length = mb_strlen($string);
3700
    }
3701
3702
    array_splice($smatches[0], $start, $length, $rmatches[0]);
3703
3704
    return join($smatches[0], null);
3705
  }
3706
3707
  /**
3708
   * alias for "UTF8::to_latin1()"
3709
   *
3710
   * @param $string
3711
   *
3712
   * @return string
3713 1
   */
3714
  public static function toLatin1($string)
3715 1
  {
3716
    return self::to_latin1($string);
3717
  }
3718
3719
  /**
3720
   * Count the number of sub-string occurrences.
3721
   *
3722
   * @param    string $haystack The string to search in.
3723
   * @param    string $needle   The string to search for.
3724
   * @param    int    $offset   The offset where to start counting.
3725 1
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
3726
   *
3727 1
   * @return   int number of occurrences of $needle
3728
   */
3729
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
3730
  {
3731
    $offset = (int)$offset;
3732
3733
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
3734
      $length = (int)$length;
3735
3736
      $haystack = self::substr($haystack, $offset, $length);
3737
    }
3738 1
3739
    if ($length === null) {
3740 1
      return substr_count($haystack, $needle, $offset);
3741 1
    } else {
3742
      return substr_count($haystack, $needle, $offset, $length);
3743
    }
3744 1
  }
3745
3746 1
  /**
3747
   * alias for "UTF8::is_ascii()"
3748
   *
3749 1
   * @param string $str
3750
   *
3751
   * @return boolean
3752 1
   */
3753
  public static function isAscii($str)
3754
  {
3755
    return self::is_ascii($str);
3756 1
  }
3757
3758 1
  /**
3759
   * Checks if a string is 7 bit ASCII.
3760
   *
3761 1
   * @param    string $str The string to check.
3762
   *
3763
   * @return   bool <strong>true</strong> if it is ASCII<br />
3764 1
   *                <strong>false</strong> otherwise
3765
   */
3766
  public static function is_ascii($str)
3767
  {
3768 1
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3769
  }
3770 1
3771 1
  /**
3772 1
   * Create an array containing a range of UTF-8 characters.
3773 1
   *
3774 1
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3775
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3776
   *
3777
   * @return   array
3778
   */
3779
  public static function range($var1, $var2)
3780
  {
3781
    if (!$var1 || !$var2) {
3782
      return array();
3783
    }
3784 1
3785 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3786 1
      $start = (int)$var1;
3787 1
    } elseif (ctype_xdigit($var1)) {
3788
      $start = (int)self::hex_to_int($var1);
3789 1
    } else {
3790 1
      $start = self::ord($var1);
3791
    }
3792
3793
    if (!$start) {
3794 1
      return array();
3795
    }
3796 1
3797 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3798 1
      $end = (int)$var2;
3799
    } elseif (ctype_xdigit($var2)) {
3800 1
      $end = (int)self::hex_to_int($var2);
3801 1
    } else {
3802 1
      $end = self::ord($var2);
3803 1
    }
3804 1
3805
    if (!$end) {
3806 1
      return array();
3807
    }
3808 1
3809 1
    return array_map(
3810
        array(
3811
            '\\voku\\helper\\UTF8',
3812
            'chr',
3813 1
        ),
3814 1
        range($start, $end)
3815
    );
3816 1
  }
3817
3818 1
  /**
3819 1
   * Creates a random string of UTF-8 characters.
3820 1
   *
3821
   * @param    int $len The length of string in characters.
3822 1
   *
3823
   * @return   string String consisting of random characters.
3824
   */
3825
  public static function hash($len = 8)
3826
  {
3827
    static $chars = array();
3828
    static $chars_len = null;
3829
3830
    if ($len <= 0) {
3831
      return '';
3832
    }
3833
3834
    // init
3835 1
    self::checkForSupport();
3836
3837 1
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
3838
      if (self::$support['pcre_utf8'] === true) {
3839
        $chars = array_map(
3840
            array(
3841
                '\\voku\\helper\\UTF8',
3842
                'chr',
3843
            ),
3844
            range(48, 79)
3845
        );
3846
3847
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
3848
3849 1
        $chars = array_values(array_filter($chars));
3850
      } else {
3851 1
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
3852
      }
3853 1
3854
      $chars_len = count($chars);
3855
    }
3856
3857
    $hash = '';
3858
3859
    for (; $len; --$len) {
3860
      $hash .= $chars[mt_rand() % $chars_len];
3861
    }
3862
3863
    return $hash;
3864 1
  }
3865
3866
  /**
3867
   * @alias of UTF8::chr_map()
3868 1
   *
3869
   * @param $callback
3870
   * @param $str
3871
   *
3872
   * @return array
3873
   */
3874
  public static function callback($callback, $str)
3875
  {
3876
    return self::chr_map($callback, $str);
3877
  }
3878
3879
  /**
3880 1
   * Applies callback to all characters of a string.
3881
   *
3882 1
   * @param    string $callback The callback function.
3883
   * @param    string $str      UTF-8 string to run callback on.
3884 1
   *
3885 1
   * @return   array The outcome of callback.
3886 1
   */
3887
3888 1
  public static function chr_map($callback, $str)
3889 1
  {
3890 1
    $chars = self::split($str);
3891 1
3892
    return array_map($callback, $chars);
3893
  }
3894 1
3895
  /**
3896
   * Returns a single UTF-8 character from string.
3897
   *
3898
   * @param    string $string A UTF-8 string.
3899
   * @param    int    $pos    The position of character to return.
3900
   *
3901
   * @return   string Single Multi-Byte character.
3902
   */
3903
  public static function access($string, $pos)
3904 2
  {
3905
    // Return the character at the specified position: $str[1] like functionality.
3906 2
3907 2
    return self::substr($string, $pos, 1);
3908
  }
3909 2
3910 2
  /**
3911 2
   * Sort all characters according to code points.
3912
   *
3913 2
   * @param    string $str    A UTF-8 string.
3914 2
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
3915
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
3916
   *
3917
   * @return   string String of sorted characters
3918
   */
3919
  public static function str_sort($str, $unique = false, $desc = false)
3920
  {
3921
    $array = self::codepoints($str);
3922
3923
    if ($unique) {
3924
      $array = array_flip(array_flip($array));
3925
    }
3926
3927
    if ($desc) {
3928
      arsort($array);
3929
    } else {
3930
      asort($array);
3931
    }
3932
3933
    return self::string($array);
3934
  }
3935
3936 2
  /**
3937
   * Makes a UTF-8 string from code points.
3938
   *
3939 2
   * @param    array $array Integer or Hexadecimal codepoints
3940
   *
3941 2
   * @return   string UTF-8 encoded string
3942
   */
3943
  public static function string($array)
3944
  {
3945
    return implode(
3946
        array_map(
3947
            array(
3948
                '\\voku\\helper\\UTF8',
3949
                'chr',
3950
            ),
3951
            $array
3952
        )
3953
    );
3954 2
  }
3955
3956 2
  /**
3957
   * Strip HTML and PHP tags from a string.
3958 2
   *
3959 2
   * @link http://php.net/manual/en/function.strip-tags.php
3960
   *
3961 2
   * @param string $str            <p>
3962
   *                               The input string.
3963
   *                               </p>
3964 2
   * @param string $allowable_tags [optional] <p>
3965 2
   *                               You can use the optional second parameter to specify tags which should
3966 2
   *                               not be stripped.
3967 2
   *                               </p>
3968 2
   *                               <p>
3969
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
3970 2
   *                               can not be changed with allowable_tags.
3971 2
   *                               </p>
3972 2
   *
3973 2
   * @return string the stripped string.
3974 2
   */
3975 2
  public static function strip_tags($str, $allowable_tags = null)
3976
  {
3977 2
    //clean broken utf8
3978 2
    $str = self::clean($str);
3979 2
3980 2
    return strip_tags($str, $allowable_tags);
3981 2
  }
3982 2
3983
  /**
3984 2
   * Pad a UTF-8 string to given length with another string.
3985
   *
3986
   * @param    string $input      The input string
3987 2
   * @param    int    $pad_length The length of return string
3988
   * @param    string $pad_string String to use for padding the input string
3989
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
3990
   *
3991
   * @return   string Returns the padded string
3992
   */
3993
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
3994
  {
3995
    $input_length = self::strlen($input);
3996
3997
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
3998
      $ps_length = self::strlen($pad_string);
3999
4000
      $diff = $pad_length - $input_length;
4001
4002
      switch ($pad_type) {
4003 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4004
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4005
          $pre = self::substr($pre, 0, $diff);
4006
          $post = '';
4007
          break;
4008 1
4009
        case STR_PAD_BOTH:
4010 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4011
          $pre = self::substr($pre, 0, (int)$diff / 2);
4012 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4013
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4014
          break;
4015
4016
        case STR_PAD_RIGHT:
4017 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4018
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4019
          $post = self::substr($post, 0, $diff);
4020
          $pre = '';
4021
      }
4022
4023 1
      return $pre . $input . $post;
4024
    }
4025 1
4026 1
    return $input;
4027 1
  }
4028
4029 1
  /**
4030 1
   * Repeat a string...
4031 1
   *
4032 1
   * @param string $input      <p>
4033 1
   *                           The string to be repeated.
4034
   *                           </p>
4035 1
   * @param int    $multiplier <p>
4036
   *                           Number of time the input string should be
4037
   *                           repeated.
4038
   *                           </p>
4039
   *                           <p>
4040
   *                           multiplier has to be greater than or equal to 0.
4041
   *                           If the multiplier is set to 0, the function
4042
   *                           will return an empty string.
4043
   *                           </p>
4044
   *
4045
   * @return string the repeated string.
4046
   */
4047
  public static function str_repeat($input, $multiplier)
4048
  {
4049
    $input = self::filter($input);
4050
4051
    return str_repeat($input, $multiplier);
4052
  }
4053
4054
  /**
4055
   * Removes duplicate occurrences of a string in another string.
4056
   *
4057
   * @param    string       $str  The base string
4058
   * @param    string|array $what String to search for in the base string
4059
   *
4060
   * @return   string The result string with removed duplicates
4061 8
   */
4062
  public static function remove_duplicates($str, $what = ' ')
4063 8
  {
4064 8
    if (is_string($what)) {
4065
      $what = array($what);
4066 8
    }
4067 2
4068
    if (is_array($what)) {
4069
      foreach ($what as $item) {
4070
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4071 7
      }
4072
    }
4073 7
4074 1
    return $str;
4075 1
  }
4076 1
4077
  /**
4078
   * Finds position of first occurrence of a string within another, case insensitive.
4079 7
   *
4080 1
   * @link http://php.net/manual/en/function.mb-stripos.php
4081 1
   *
4082
   * @param string  $haystack  <p>
4083 7
   *                           The string from which to get the position of the first occurrence
4084
   *                           of needle
4085
   *                           </p>
4086
   * @param string  $needle    <p>
4087
   *                           The string to find in haystack
4088
   *                           </p>
4089
   * @param int     $offset    [optional] <p>
4090
   *                           The position in haystack
4091
   *                           to start searching
4092
   *                           </p>
4093 1
   * @param string  $encoding
4094
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4095 1
   *
4096
   * @return int Return the numeric position of the first occurrence of
4097
   * needle in the haystack
4098
   * string, or false if needle is not found.
4099
   */
4100
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4101
  {
4102
    $haystack = (string)$haystack;
4103
    $needle = (string)$needle;
4104
4105 1
    if (!isset($haystack[0]) || !isset($needle[0])) {
4106 1
      return false;
4107 1
    }
4108 1
4109 1
    // init
4110
    self::checkForSupport();
4111 1
4112
    if ($cleanUtf8 === true) {
4113
      $haystack = self::clean($haystack);
4114
      $needle = self::clean($needle);
4115
    }
4116
4117
    // INFO: this is only a fallback for old versions
4118
    if ($encoding === true || $encoding === false) {
4119
      $encoding = 'UTF-8';
4120
    }
4121
4122
    return mb_stripos($haystack, $needle, $offset, $encoding);
4123
  }
4124
4125
  /**
4126
   * Fix a double (or multiple) encoded UTF8 string.
4127
   *
4128
   * @param array|string $string
4129
   *
4130
   * @return string
4131
   */
4132
  public static function fix_utf8($string)
4133 13
  {
4134 View Code Duplication
    if (is_array($string)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4135 13
4136
      foreach ($string as $k => $v) {
4137
        /** @noinspection AlterInForeachInspection */
4138
        $string[$k] = self::fix_utf8($v);
4139
      }
4140
4141
      return $string;
4142
    }
4143
4144
    $last = '';
4145
    while ($last <> $string) {
4146
      $last = $string;
4147
      $string = self::to_utf8(self::utf8_decode($string));
0 ignored issues
show
Bug introduced by
It seems like $string defined by self::to_utf8(self::utf8_decode($string)) on line 4147 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
4148
    }
4149
4150 14
    return $string;
4151
  }
4152 14
4153
  /**
4154 14
   * alias for "UTF8::ucfirst"
4155 4
   *
4156
   * @param $str
4157
   *
4158
   * @return string
4159 13
   */
4160
  public static function ucword($str)
4161 13
  {
4162 13
    return self::ucfirst($str);
4163
  }
4164
4165
  /**
4166
   * Makes string's first char uppercase.
4167
   *
4168
   * @param    string $str The input string
4169
   *
4170
   * @return   string The resulting string
4171
   */
4172
  public static function ucfirst($str)
4173
  {
4174
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
4175
  }
4176
4177
  /**
4178
   * Make a string uppercase.
4179
   *
4180
   * @link http://php.net/manual/en/function.mb-strtoupper.php
4181
   *
4182
   * @param string $str <p>
4183
   *                    The string being uppercased.
4184
   *                    </p>
4185
   * @param string $encoding
4186
   *
4187
   * @return string str with all alphabetic characters converted to uppercase.
4188
   */
4189
  public static function strtoupper($str, $encoding = 'UTF-8')
4190
  {
4191
    $str = (string)$str;
4192
4193
    if (!isset($str[0])) {
4194
      return '';
4195
    }
4196
4197
    // init
4198
    self::checkForSupport();
4199
4200
    if (self::$support['mbstring'] === true) {
4201
      return mb_strtoupper($str, $encoding);
4202
    } else {
4203
4204
      // fallback
4205
4206
      static $caseTableKeys = null;
4207
      static $caseTableValues = null;
4208
4209
      if ($caseTableKeys === null) {
4210
        $caseTable = self::case_table();
4211
        $caseTableKeys = array_keys($caseTable);
4212
        $caseTableValues = array_values($caseTable);
4213
      }
4214
4215
      $str = self::clean($str);
4216
4217
      return str_replace($caseTableKeys, $caseTableValues, $str);
4218
    }
4219
  }
4220
4221
  /**
4222
   * Returns an array of all lower and upper case UTF-8 encoded characters.
4223
   *
4224
   * @return   string An array with lower case chars as keys and upper chars as values.
4225
   */
4226
  protected static function case_table()
4227
  {
4228
    static $case = array(
4229
4230
      // lower => upper
4231
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
4232
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
4233
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
4234
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
4235
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
4236
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
4237
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
4238
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
4239
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
4240
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
4241
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
4242
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
4243
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
4244
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
4245
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
4246
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
4247
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
4248
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
4249
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
4250
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
4251
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
4252
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
4253
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
4254
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
4255
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
4256
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
4257
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
4258
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
4259
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
4260
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
4261
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
4262
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
4263
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
4264
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
4265
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
4266
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
4267
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
4268
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
4269
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
4270
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
4271
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
4272
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
4273
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
4274
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
4275
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
4276
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
4277
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
4278
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
4279
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
4280
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
4281
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
4282
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
4283
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
4284
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
4285
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
4286
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
4287
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
4288
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
4289
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
4290
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
4291
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
4292
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
4293
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
4294
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
4295
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
4296
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
4297
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
4298
      "\xea\x9e\x87"     => "\xea\x9e\x86",
4299
      "\xea\x9e\x85"     => "\xea\x9e\x84",
4300
      "\xea\x9e\x83"     => "\xea\x9e\x82",
4301
      "\xea\x9e\x81"     => "\xea\x9e\x80",
4302
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
4303
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
4304
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
4305
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
4306
      "\xea\x9d\xad"     => "\xea\x9d\xac",
4307
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
4308
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
4309
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
4310
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
4311
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
4312
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
4313
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
4314
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
4315
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
4316
      "\xea\x9d\x99"     => "\xea\x9d\x98",
4317
      "\xea\x9d\x97"     => "\xea\x9d\x96",
4318
      "\xea\x9d\x95"     => "\xea\x9d\x94",
4319
      "\xea\x9d\x93"     => "\xea\x9d\x92",
4320
      "\xea\x9d\x91"     => "\xea\x9d\x90",
4321
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
4322
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
4323
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
4324
      "\xea\x9d\x89"     => "\xea\x9d\x88",
4325
      "\xea\x9d\x87"     => "\xea\x9d\x86",
4326
      "\xea\x9d\x85"     => "\xea\x9d\x84",
4327
      "\xea\x9d\x83"     => "\xea\x9d\x82",
4328
      "\xea\x9d\x81"     => "\xea\x9d\x80",
4329
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
4330
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
4331
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
4332
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
4333
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
4334
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
4335
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
4336
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
4337
      "\xea\x9c\xad"     => "\xea\x9c\xac",
4338
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
4339
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
4340
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
4341
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
4342
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
4343
      "\xea\x9a\x97"     => "\xea\x9a\x96",
4344
      "\xea\x9a\x95"     => "\xea\x9a\x94",
4345
      "\xea\x9a\x93"     => "\xea\x9a\x92",
4346
      "\xea\x9a\x91"     => "\xea\x9a\x90",
4347
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
4348
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
4349
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
4350
      "\xea\x9a\x89"     => "\xea\x9a\x88",
4351
      "\xea\x9a\x87"     => "\xea\x9a\x86",
4352
      "\xea\x9a\x85"     => "\xea\x9a\x84",
4353
      "\xea\x9a\x83"     => "\xea\x9a\x82",
4354
      "\xea\x9a\x81"     => "\xea\x9a\x80",
4355
      "\xea\x99\xad"     => "\xea\x99\xac",
4356
      "\xea\x99\xab"     => "\xea\x99\xaa",
4357
      "\xea\x99\xa9"     => "\xea\x99\xa8",
4358
      "\xea\x99\xa7"     => "\xea\x99\xa6",
4359
      "\xea\x99\xa5"     => "\xea\x99\xa4",
4360
      "\xea\x99\xa3"     => "\xea\x99\xa2",
4361
      "\xea\x99\x9f"     => "\xea\x99\x9e",
4362
      "\xea\x99\x9d"     => "\xea\x99\x9c",
4363
      "\xea\x99\x9b"     => "\xea\x99\x9a",
4364
      "\xea\x99\x99"     => "\xea\x99\x98",
4365
      "\xea\x99\x97"     => "\xea\x99\x96",
4366
      "\xea\x99\x95"     => "\xea\x99\x94",
4367
      "\xea\x99\x93"     => "\xea\x99\x92",
4368
      "\xea\x99\x91"     => "\xea\x99\x90",
4369
      "\xea\x99\x8f"     => "\xea\x99\x8e",
4370
      "\xea\x99\x8d"     => "\xea\x99\x8c",
4371
      "\xea\x99\x8b"     => "\xea\x99\x8a",
4372
      "\xea\x99\x89"     => "\xea\x99\x88",
4373
      "\xea\x99\x87"     => "\xea\x99\x86",
4374
      "\xea\x99\x85"     => "\xea\x99\x84",
4375
      "\xea\x99\x83"     => "\xea\x99\x82",
4376
      "\xea\x99\x81"     => "\xea\x99\x80",
4377
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
4378
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
4379
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
4380
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
4381
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
4382
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
4383
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
4384
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
4385
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
4386
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
4387
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
4388
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
4389
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
4390
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
4391
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
4392
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
4393
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
4394
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
4395
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
4396
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
4397
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
4398
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
4399
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
4400
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
4401
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
4402
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
4403
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
4404
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
4405
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
4406
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
4407
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
4408
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
4409
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
4410
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
4411
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
4412
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
4413
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
4414
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
4415
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
4416
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
4417
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
4418
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
4419
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
4420
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
4421
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
4422
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
4423
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
4424
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
4425
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
4426
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
4427
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
4428
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
4429
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
4430
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
4431
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
4432
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
4433
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
4434
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
4435
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
4436
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
4437
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
4438
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
4439
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
4440
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
4441
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
4442
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
4443
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
4444
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
4445
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
4446
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
4447
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
4448
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
4449
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
4450
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
4451
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
4452
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
4453
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
4454
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
4455
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
4456
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
4457
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
4458
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
4459
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
4460
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
4461
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
4462
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
4463
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
4464
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
4465
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
4466
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
4467
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
4468
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
4469
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
4470
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
4471
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
4472
      "\xe2\xb1\xa6"     => "\xc8\xbe",
4473
      "\xe2\xb1\xa5"     => "\xc8\xba",
4474
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
4475
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
4476
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
4477
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
4478
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
4479
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
4480
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
4481
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
4482
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
4483
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
4484
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
4485
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
4486
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
4487
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
4488
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
4489
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
4490
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
4491
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
4492
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
4493
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
4494
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
4495
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
4496
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
4497
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
4498
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
4499
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
4500
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
4501
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
4502
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
4503
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
4504
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
4505
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
4506
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
4507
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
4508
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
4509
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
4510
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
4511
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
4512
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
4513
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
4514
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
4515
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
4516
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
4517
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
4518
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
4519
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
4520
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
4521
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
4522
      "\xe2\x86\x84"     => "\xe2\x86\x83",
4523
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
4524
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
4525
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
4526
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
4527
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
4528
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
4529
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
4530
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
4531
      "\xe1\xbe\xbe"     => "\xce\x99",
4532
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
4533
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
4534
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
4535
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
4536
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
4537
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
4538
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
4539
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
4540
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
4541
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
4542
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
4543
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
4544
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
4545
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
4546
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
4547
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
4548
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
4549
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
4550
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
4551
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
4552
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
4553
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
4554
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
4555
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
4556
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
4557
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
4558
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
4559
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
4560
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
4561
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
4562
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
4563
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
4564
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
4565
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
4566
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
4567
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
4568
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
4569
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
4570
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
4571
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
4572
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
4573
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
4574
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
4575
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
4576
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
4577
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
4578
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
4579
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
4580
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
4581
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
4582
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
4583
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
4584
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
4585
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
4586
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
4587
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
4588
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
4589
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
4590
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
4591
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
4592
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
4593
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
4594
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
4595
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
4596
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
4597
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
4598
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
4599
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
4600
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
4601
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
4602
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
4603
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
4604
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
4605
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
4606
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
4607
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
4608
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
4609
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
4610
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
4611
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
4612
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
4613
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
4614
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
4615
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
4616
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
4617
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
4618
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
4619
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
4620
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
4621
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
4622
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
4623
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
4624
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
4625
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
4626
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
4627
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
4628
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
4629
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
4630
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
4631
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
4632
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
4633
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
4634
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
4635
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
4636
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
4637
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
4638
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
4639
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
4640
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
4641
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
4642
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
4643
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
4644
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
4645
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
4646
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
4647
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
4648
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
4649
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
4650
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
4651
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
4652
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
4653
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
4654
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
4655
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
4656
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
4657
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
4658
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
4659
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
4660
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
4661
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
4662
      "\xe1\xba\xad"     => "\xe1\xba\xac",
4663
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
4664
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
4665
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
4666
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
4667
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
4668
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
4669
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
4670
      "\xe1\xba\x95"     => "\xe1\xba\x94",
4671
      "\xe1\xba\x93"     => "\xe1\xba\x92",
4672
      "\xe1\xba\x91"     => "\xe1\xba\x90",
4673
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
4674
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
4675
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
4676
      "\xe1\xba\x89"     => "\xe1\xba\x88",
4677
      "\xe1\xba\x87"     => "\xe1\xba\x86",
4678
      "\xe1\xba\x85"     => "\xe1\xba\x84",
4679
      "\xe1\xba\x83"     => "\xe1\xba\x82",
4680
      "\xe1\xba\x81"     => "\xe1\xba\x80",
4681
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
4682
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
4683
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
4684
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
4685
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
4686
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
4687
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
4688
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
4689
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
4690
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
4691
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
4692
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
4693
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
4694
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
4695
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
4696
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
4697
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
4698
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
4699
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
4700
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
4701
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
4702
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
4703
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
4704
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
4705
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
4706
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
4707
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
4708
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
4709
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
4710
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
4711
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
4712
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
4713
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
4714
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
4715
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
4716
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
4717
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
4718
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
4719
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
4720
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
4721
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
4722
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
4723
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
4724
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
4725
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
4726
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
4727
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
4728
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
4729
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
4730
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
4731
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
4732
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
4733
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
4734
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
4735
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
4736
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
4737
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
4738
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
4739
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
4740
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
4741
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
4742
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
4743
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
4744
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
4745
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
4746
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
4747
      "\xd6\x86"         => "\xd5\x96",
4748
      "\xd6\x85"         => "\xd5\x95",
4749
      "\xd6\x84"         => "\xd5\x94",
4750
      "\xd6\x83"         => "\xd5\x93",
4751
      "\xd6\x82"         => "\xd5\x92",
4752
      "\xd6\x81"         => "\xd5\x91",
4753
      "\xd6\x80"         => "\xd5\x90",
4754
      "\xd5\xbf"         => "\xd5\x8f",
4755
      "\xd5\xbe"         => "\xd5\x8e",
4756
      "\xd5\xbd"         => "\xd5\x8d",
4757
      "\xd5\xbc"         => "\xd5\x8c",
4758
      "\xd5\xbb"         => "\xd5\x8b",
4759
      "\xd5\xba"         => "\xd5\x8a",
4760
      "\xd5\xb9"         => "\xd5\x89",
4761
      "\xd5\xb8"         => "\xd5\x88",
4762
      "\xd5\xb7"         => "\xd5\x87",
4763
      "\xd5\xb6"         => "\xd5\x86",
4764
      "\xd5\xb5"         => "\xd5\x85",
4765
      "\xd5\xb4"         => "\xd5\x84",
4766
      "\xd5\xb3"         => "\xd5\x83",
4767
      "\xd5\xb2"         => "\xd5\x82",
4768
      "\xd5\xb1"         => "\xd5\x81",
4769
      "\xd5\xb0"         => "\xd5\x80",
4770
      "\xd5\xaf"         => "\xd4\xbf",
4771
      "\xd5\xae"         => "\xd4\xbe",
4772
      "\xd5\xad"         => "\xd4\xbd",
4773
      "\xd5\xac"         => "\xd4\xbc",
4774
      "\xd5\xab"         => "\xd4\xbb",
4775
      "\xd5\xaa"         => "\xd4\xba",
4776
      "\xd5\xa9"         => "\xd4\xb9",
4777
      "\xd5\xa8"         => "\xd4\xb8",
4778
      "\xd5\xa7"         => "\xd4\xb7",
4779
      "\xd5\xa6"         => "\xd4\xb6",
4780
      "\xd5\xa5"         => "\xd4\xb5",
4781
      "\xd5\xa4"         => "\xd4\xb4",
4782
      "\xd5\xa3"         => "\xd4\xb3",
4783
      "\xd5\xa2"         => "\xd4\xb2",
4784
      "\xd5\xa1"         => "\xd4\xb1",
4785
      "\xd4\xa5"         => "\xd4\xa4",
4786
      "\xd4\xa3"         => "\xd4\xa2",
4787
      "\xd4\xa1"         => "\xd4\xa0",
4788
      "\xd4\x9f"         => "\xd4\x9e",
4789
      "\xd4\x9d"         => "\xd4\x9c",
4790
      "\xd4\x9b"         => "\xd4\x9a",
4791
      "\xd4\x99"         => "\xd4\x98",
4792
      "\xd4\x97"         => "\xd4\x96",
4793
      "\xd4\x95"         => "\xd4\x94",
4794
      "\xd4\x93"         => "\xd4\x92",
4795
      "\xd4\x91"         => "\xd4\x90",
4796
      "\xd4\x8f"         => "\xd4\x8e",
4797
      "\xd4\x8d"         => "\xd4\x8c",
4798
      "\xd4\x8b"         => "\xd4\x8a",
4799
      "\xd4\x89"         => "\xd4\x88",
4800
      "\xd4\x87"         => "\xd4\x86",
4801
      "\xd4\x85"         => "\xd4\x84",
4802
      "\xd4\x83"         => "\xd4\x82",
4803
      "\xd4\x81"         => "\xd4\x80",
4804
      "\xd3\xbf"         => "\xd3\xbe",
4805
      "\xd3\xbd"         => "\xd3\xbc",
4806
      "\xd3\xbb"         => "\xd3\xba",
4807
      "\xd3\xb9"         => "\xd3\xb8",
4808
      "\xd3\xb7"         => "\xd3\xb6",
4809
      "\xd3\xb5"         => "\xd3\xb4",
4810
      "\xd3\xb3"         => "\xd3\xb2",
4811
      "\xd3\xb1"         => "\xd3\xb0",
4812
      "\xd3\xaf"         => "\xd3\xae",
4813
      "\xd3\xad"         => "\xd3\xac",
4814
      "\xd3\xab"         => "\xd3\xaa",
4815
      "\xd3\xa9"         => "\xd3\xa8",
4816
      "\xd3\xa7"         => "\xd3\xa6",
4817
      "\xd3\xa5"         => "\xd3\xa4",
4818
      "\xd3\xa3"         => "\xd3\xa2",
4819
      "\xd3\xa1"         => "\xd3\xa0",
4820
      "\xd3\x9f"         => "\xd3\x9e",
4821
      "\xd3\x9d"         => "\xd3\x9c",
4822
      "\xd3\x9b"         => "\xd3\x9a",
4823
      "\xd3\x99"         => "\xd3\x98",
4824
      "\xd3\x97"         => "\xd3\x96",
4825
      "\xd3\x95"         => "\xd3\x94",
4826
      "\xd3\x93"         => "\xd3\x92",
4827
      "\xd3\x91"         => "\xd3\x90",
4828
      "\xd3\x8f"         => "\xd3\x80",
4829
      "\xd3\x8e"         => "\xd3\x8d",
4830
      "\xd3\x8c"         => "\xd3\x8b",
4831
      "\xd3\x8a"         => "\xd3\x89",
4832
      "\xd3\x88"         => "\xd3\x87",
4833
      "\xd3\x86"         => "\xd3\x85",
4834
      "\xd3\x84"         => "\xd3\x83",
4835
      "\xd3\x82"         => "\xd3\x81",
4836
      "\xd2\xbf"         => "\xd2\xbe",
4837
      "\xd2\xbd"         => "\xd2\xbc",
4838
      "\xd2\xbb"         => "\xd2\xba",
4839
      "\xd2\xb9"         => "\xd2\xb8",
4840
      "\xd2\xb7"         => "\xd2\xb6",
4841
      "\xd2\xb5"         => "\xd2\xb4",
4842
      "\xd2\xb3"         => "\xd2\xb2",
4843
      "\xd2\xb1"         => "\xd2\xb0",
4844
      "\xd2\xaf"         => "\xd2\xae",
4845
      "\xd2\xad"         => "\xd2\xac",
4846
      "\xd2\xab"         => "\xd2\xaa",
4847
      "\xd2\xa9"         => "\xd2\xa8",
4848
      "\xd2\xa7"         => "\xd2\xa6",
4849
      "\xd2\xa5"         => "\xd2\xa4",
4850
      "\xd2\xa3"         => "\xd2\xa2",
4851
      "\xd2\xa1"         => "\xd2\xa0",
4852
      "\xd2\x9f"         => "\xd2\x9e",
4853
      "\xd2\x9d"         => "\xd2\x9c",
4854
      "\xd2\x9b"         => "\xd2\x9a",
4855
      "\xd2\x99"         => "\xd2\x98",
4856
      "\xd2\x97"         => "\xd2\x96",
4857
      "\xd2\x95"         => "\xd2\x94",
4858
      "\xd2\x93"         => "\xd2\x92",
4859
      "\xd2\x91"         => "\xd2\x90",
4860
      "\xd2\x8f"         => "\xd2\x8e",
4861
      "\xd2\x8d"         => "\xd2\x8c",
4862
      "\xd2\x8b"         => "\xd2\x8a",
4863
      "\xd2\x81"         => "\xd2\x80",
4864
      "\xd1\xbf"         => "\xd1\xbe",
4865
      "\xd1\xbd"         => "\xd1\xbc",
4866
      "\xd1\xbb"         => "\xd1\xba",
4867
      "\xd1\xb9"         => "\xd1\xb8",
4868
      "\xd1\xb7"         => "\xd1\xb6",
4869
      "\xd1\xb5"         => "\xd1\xb4",
4870
      "\xd1\xb3"         => "\xd1\xb2",
4871
      "\xd1\xb1"         => "\xd1\xb0",
4872
      "\xd1\xaf"         => "\xd1\xae",
4873
      "\xd1\xad"         => "\xd1\xac",
4874
      "\xd1\xab"         => "\xd1\xaa",
4875
      "\xd1\xa9"         => "\xd1\xa8",
4876
      "\xd1\xa7"         => "\xd1\xa6",
4877
      "\xd1\xa5"         => "\xd1\xa4",
4878
      "\xd1\xa3"         => "\xd1\xa2",
4879
      "\xd1\xa1"         => "\xd1\xa0",
4880
      "\xd1\x9f"         => "\xd0\x8f",
4881
      "\xd1\x9e"         => "\xd0\x8e",
4882
      "\xd1\x9d"         => "\xd0\x8d",
4883
      "\xd1\x9c"         => "\xd0\x8c",
4884
      "\xd1\x9b"         => "\xd0\x8b",
4885
      "\xd1\x9a"         => "\xd0\x8a",
4886
      "\xd1\x99"         => "\xd0\x89",
4887
      "\xd1\x98"         => "\xd0\x88",
4888
      "\xd1\x97"         => "\xd0\x87",
4889
      "\xd1\x96"         => "\xd0\x86",
4890
      "\xd1\x95"         => "\xd0\x85",
4891
      "\xd1\x94"         => "\xd0\x84",
4892
      "\xd1\x93"         => "\xd0\x83",
4893
      "\xd1\x92"         => "\xd0\x82",
4894
      "\xd1\x91"         => "\xd0\x81",
4895
      "\xd1\x90"         => "\xd0\x80",
4896
      "\xd1\x8f"         => "\xd0\xaf",
4897
      "\xd1\x8e"         => "\xd0\xae",
4898
      "\xd1\x8d"         => "\xd0\xad",
4899
      "\xd1\x8c"         => "\xd0\xac",
4900
      "\xd1\x8b"         => "\xd0\xab",
4901
      "\xd1\x8a"         => "\xd0\xaa",
4902
      "\xd1\x89"         => "\xd0\xa9",
4903
      "\xd1\x88"         => "\xd0\xa8",
4904
      "\xd1\x87"         => "\xd0\xa7",
4905
      "\xd1\x86"         => "\xd0\xa6",
4906
      "\xd1\x85"         => "\xd0\xa5",
4907
      "\xd1\x84"         => "\xd0\xa4",
4908
      "\xd1\x83"         => "\xd0\xa3",
4909
      "\xd1\x82"         => "\xd0\xa2",
4910
      "\xd1\x81"         => "\xd0\xa1",
4911
      "\xd1\x80"         => "\xd0\xa0",
4912
      "\xd0\xbf"         => "\xd0\x9f",
4913
      "\xd0\xbe"         => "\xd0\x9e",
4914
      "\xd0\xbd"         => "\xd0\x9d",
4915
      "\xd0\xbc"         => "\xd0\x9c",
4916
      "\xd0\xbb"         => "\xd0\x9b",
4917
      "\xd0\xba"         => "\xd0\x9a",
4918
      "\xd0\xb9"         => "\xd0\x99",
4919
      "\xd0\xb8"         => "\xd0\x98",
4920
      "\xd0\xb7"         => "\xd0\x97",
4921
      "\xd0\xb6"         => "\xd0\x96",
4922
      "\xd0\xb5"         => "\xd0\x95",
4923
      "\xd0\xb4"         => "\xd0\x94",
4924
      "\xd0\xb3"         => "\xd0\x93",
4925
      "\xd0\xb2"         => "\xd0\x92",
4926
      "\xd0\xb1"         => "\xd0\x91",
4927
      "\xd0\xb0"         => "\xd0\x90",
4928
      "\xcf\xbb"         => "\xcf\xba",
4929
      "\xcf\xb8"         => "\xcf\xb7",
4930
      "\xcf\xb5"         => "\xce\x95",
4931
      "\xcf\xb2"         => "\xcf\xb9",
4932
      "\xcf\xb1"         => "\xce\xa1",
4933
      "\xcf\xb0"         => "\xce\x9a",
4934
      "\xcf\xaf"         => "\xcf\xae",
4935
      "\xcf\xad"         => "\xcf\xac",
4936
      "\xcf\xab"         => "\xcf\xaa",
4937
      "\xcf\xa9"         => "\xcf\xa8",
4938
      "\xcf\xa7"         => "\xcf\xa6",
4939
      "\xcf\xa5"         => "\xcf\xa4",
4940
      "\xcf\xa3"         => "\xcf\xa2",
4941
      "\xcf\xa1"         => "\xcf\xa0",
4942
      "\xcf\x9f"         => "\xcf\x9e",
4943
      "\xcf\x9d"         => "\xcf\x9c",
4944
      "\xcf\x9b"         => "\xcf\x9a",
4945
      "\xcf\x99"         => "\xcf\x98",
4946
      "\xcf\x97"         => "\xcf\x8f",
4947
      "\xcf\x96"         => "\xce\xa0",
4948
      "\xcf\x95"         => "\xce\xa6",
4949
      "\xcf\x91"         => "\xce\x98",
4950
      "\xcf\x90"         => "\xce\x92",
4951
      "\xcf\x8e"         => "\xce\x8f",
4952
      "\xcf\x8d"         => "\xce\x8e",
4953
      "\xcf\x8c"         => "\xce\x8c",
4954
      "\xcf\x8b"         => "\xce\xab",
4955
      "\xcf\x8a"         => "\xce\xaa",
4956
      "\xcf\x89"         => "\xce\xa9",
4957
      "\xcf\x88"         => "\xce\xa8",
4958
      "\xcf\x87"         => "\xce\xa7",
4959
      "\xcf\x86"         => "\xce\xa6",
4960
      "\xcf\x85"         => "\xce\xa5",
4961
      "\xcf\x84"         => "\xce\xa4",
4962
      "\xcf\x83"         => "\xce\xa3",
4963
      "\xcf\x82"         => "\xce\xa3",
4964
      "\xcf\x81"         => "\xce\xa1",
4965
      "\xcf\x80"         => "\xce\xa0",
4966
      "\xce\xbf"         => "\xce\x9f",
4967
      "\xce\xbe"         => "\xce\x9e",
4968
      "\xce\xbd"         => "\xce\x9d",
4969
      "\xce\xbc"         => "\xce\x9c",
4970
      "\xce\xbb"         => "\xce\x9b",
4971
      "\xce\xba"         => "\xce\x9a",
4972
      "\xce\xb9"         => "\xce\x99",
4973
      "\xce\xb8"         => "\xce\x98",
4974
      "\xce\xb7"         => "\xce\x97",
4975
      "\xce\xb6"         => "\xce\x96",
4976
      "\xce\xb5"         => "\xce\x95",
4977
      "\xce\xb4"         => "\xce\x94",
4978
      "\xce\xb3"         => "\xce\x93",
4979
      "\xce\xb2"         => "\xce\x92",
4980
      "\xce\xb1"         => "\xce\x91",
4981
      "\xce\xaf"         => "\xce\x8a",
4982
      "\xce\xae"         => "\xce\x89",
4983
      "\xce\xad"         => "\xce\x88",
4984
      "\xce\xac"         => "\xce\x86",
4985
      "\xcd\xbd"         => "\xcf\xbf",
4986
      "\xcd\xbc"         => "\xcf\xbe",
4987
      "\xcd\xbb"         => "\xcf\xbd",
4988
      "\xcd\xb7"         => "\xcd\xb6",
4989
      "\xcd\xb3"         => "\xcd\xb2",
4990
      "\xcd\xb1"         => "\xcd\xb0",
4991
      "\xca\x92"         => "\xc6\xb7",
4992
      "\xca\x8c"         => "\xc9\x85",
4993
      "\xca\x8b"         => "\xc6\xb2",
4994
      "\xca\x8a"         => "\xc6\xb1",
4995
      "\xca\x89"         => "\xc9\x84",
4996
      "\xca\x88"         => "\xc6\xae",
4997
      "\xca\x83"         => "\xc6\xa9",
4998
      "\xca\x80"         => "\xc6\xa6",
4999
      "\xc9\xbd"         => "\xe2\xb1\xa4",
5000
      "\xc9\xb5"         => "\xc6\x9f",
5001
      "\xc9\xb2"         => "\xc6\x9d",
5002
      "\xc9\xb1"         => "\xe2\xb1\xae",
5003
      "\xc9\xaf"         => "\xc6\x9c",
5004
      "\xc9\xab"         => "\xe2\xb1\xa2",
5005
      "\xc9\xa9"         => "\xc6\x96",
5006
      "\xc9\xa8"         => "\xc6\x97",
5007
      "\xc9\xa5"         => "\xea\x9e\x8d",
5008
      "\xc9\xa3"         => "\xc6\x94",
5009
      "\xc9\xa0"         => "\xc6\x93",
5010
      "\xc9\x9b"         => "\xc6\x90",
5011
      "\xc9\x99"         => "\xc6\x8f",
5012
      "\xc9\x97"         => "\xc6\x8a",
5013
      "\xc9\x96"         => "\xc6\x89",
5014
      "\xc9\x94"         => "\xc6\x86",
5015
      "\xc9\x93"         => "\xc6\x81",
5016
      "\xc9\x92"         => "\xe2\xb1\xb0",
5017
      "\xc9\x91"         => "\xe2\xb1\xad",
5018
      "\xc9\x90"         => "\xe2\xb1\xaf",
5019
      "\xc9\x8f"         => "\xc9\x8e",
5020
      "\xc9\x8d"         => "\xc9\x8c",
5021
      "\xc9\x8b"         => "\xc9\x8a",
5022
      "\xc9\x89"         => "\xc9\x88",
5023
      "\xc9\x87"         => "\xc9\x86",
5024
      "\xc9\x82"         => "\xc9\x81",
5025
      "\xc9\x80"         => "\xe2\xb1\xbf",
5026
      "\xc8\xbf"         => "\xe2\xb1\xbe",
5027
      "\xc8\xbc"         => "\xc8\xbb",
5028
      "\xc8\xb3"         => "\xc8\xb2",
5029
      "\xc8\xb1"         => "\xc8\xb0",
5030
      "\xc8\xaf"         => "\xc8\xae",
5031
      "\xc8\xad"         => "\xc8\xac",
5032
      "\xc8\xab"         => "\xc8\xaa",
5033
      "\xc8\xa9"         => "\xc8\xa8",
5034
      "\xc8\xa7"         => "\xc8\xa6",
5035
      "\xc8\xa5"         => "\xc8\xa4",
5036
      "\xc8\xa3"         => "\xc8\xa2",
5037
      "\xc8\x9f"         => "\xc8\x9e",
5038
      "\xc8\x9d"         => "\xc8\x9c",
5039
      "\xc8\x9b"         => "\xc8\x9a",
5040
      "\xc8\x99"         => "\xc8\x98",
5041
      "\xc8\x97"         => "\xc8\x96",
5042
      "\xc8\x95"         => "\xc8\x94",
5043
      "\xc8\x93"         => "\xc8\x92",
5044
      "\xc8\x91"         => "\xc8\x90",
5045
      "\xc8\x8f"         => "\xc8\x8e",
5046
      "\xc8\x8d"         => "\xc8\x8c",
5047
      "\xc8\x8b"         => "\xc8\x8a",
5048
      "\xc8\x89"         => "\xc8\x88",
5049
      "\xc8\x87"         => "\xc8\x86",
5050
      "\xc8\x85"         => "\xc8\x84",
5051
      "\xc8\x83"         => "\xc8\x82",
5052
      "\xc8\x81"         => "\xc8\x80",
5053
      "\xc7\xbf"         => "\xc7\xbe",
5054
      "\xc7\xbd"         => "\xc7\xbc",
5055
      "\xc7\xbb"         => "\xc7\xba",
5056
      "\xc7\xb9"         => "\xc7\xb8",
5057
      "\xc7\xb5"         => "\xc7\xb4",
5058
      "\xc7\xb3"         => "\xc7\xb2",
5059
      "\xc7\xaf"         => "\xc7\xae",
5060
      "\xc7\xad"         => "\xc7\xac",
5061
      "\xc7\xab"         => "\xc7\xaa",
5062
      "\xc7\xa9"         => "\xc7\xa8",
5063
      "\xc7\xa7"         => "\xc7\xa6",
5064
      "\xc7\xa5"         => "\xc7\xa4",
5065
      "\xc7\xa3"         => "\xc7\xa2",
5066
      "\xc7\xa1"         => "\xc7\xa0",
5067
      "\xc7\x9f"         => "\xc7\x9e",
5068
      "\xc7\x9d"         => "\xc6\x8e",
5069
      "\xc7\x9c"         => "\xc7\x9b",
5070
      "\xc7\x9a"         => "\xc7\x99",
5071
      "\xc7\x98"         => "\xc7\x97",
5072
      "\xc7\x96"         => "\xc7\x95",
5073
      "\xc7\x94"         => "\xc7\x93",
5074
      "\xc7\x92"         => "\xc7\x91",
5075
      "\xc7\x90"         => "\xc7\x8f",
5076
      "\xc7\x8e"         => "\xc7\x8d",
5077
      "\xc7\x8c"         => "\xc7\x8b",
5078
      "\xc7\x89"         => "\xc7\x88",
5079
      "\xc7\x86"         => "\xc7\x85",
5080
      "\xc6\xbf"         => "\xc7\xb7",
5081
      "\xc6\xbd"         => "\xc6\xbc",
5082
      "\xc6\xb9"         => "\xc6\xb8",
5083
      "\xc6\xb6"         => "\xc6\xb5",
5084
      "\xc6\xb4"         => "\xc6\xb3",
5085
      "\xc6\xb0"         => "\xc6\xaf",
5086
      "\xc6\xad"         => "\xc6\xac",
5087
      "\xc6\xa8"         => "\xc6\xa7",
5088
      "\xc6\xa5"         => "\xc6\xa4",
5089
      "\xc6\xa3"         => "\xc6\xa2",
5090
      "\xc6\xa1"         => "\xc6\xa0",
5091
      "\xc6\x9e"         => "\xc8\xa0",
5092
      "\xc6\x9a"         => "\xc8\xbd",
5093
      "\xc6\x99"         => "\xc6\x98",
5094
      "\xc6\x95"         => "\xc7\xb6",
5095
      "\xc6\x92"         => "\xc6\x91",
5096
      "\xc6\x8c"         => "\xc6\x8b",
5097
      "\xc6\x88"         => "\xc6\x87",
5098
      "\xc6\x85"         => "\xc6\x84",
5099
      "\xc6\x83"         => "\xc6\x82",
5100
      "\xc6\x80"         => "\xc9\x83",
5101
      "\xc5\xbf"         => "\x53",
5102
      "\xc5\xbe"         => "\xc5\xbd",
5103
      "\xc5\xbc"         => "\xc5\xbb",
5104
      "\xc5\xba"         => "\xc5\xb9",
5105
      "\xc5\xb7"         => "\xc5\xb6",
5106
      "\xc5\xb5"         => "\xc5\xb4",
5107
      "\xc5\xb3"         => "\xc5\xb2",
5108
      "\xc5\xb1"         => "\xc5\xb0",
5109
      "\xc5\xaf"         => "\xc5\xae",
5110
      "\xc5\xad"         => "\xc5\xac",
5111
      "\xc5\xab"         => "\xc5\xaa",
5112
      "\xc5\xa9"         => "\xc5\xa8",
5113
      "\xc5\xa7"         => "\xc5\xa6",
5114
      "\xc5\xa5"         => "\xc5\xa4",
5115
      "\xc5\xa3"         => "\xc5\xa2",
5116
      "\xc5\xa1"         => "\xc5\xa0",
5117
      "\xc5\x9f"         => "\xc5\x9e",
5118
      "\xc5\x9d"         => "\xc5\x9c",
5119
      "\xc5\x9b"         => "\xc5\x9a",
5120
      "\xc5\x99"         => "\xc5\x98",
5121
      "\xc5\x97"         => "\xc5\x96",
5122
      "\xc5\x95"         => "\xc5\x94",
5123
      "\xc5\x93"         => "\xc5\x92",
5124
      "\xc5\x91"         => "\xc5\x90",
5125
      "\xc5\x8f"         => "\xc5\x8e",
5126
      "\xc5\x8d"         => "\xc5\x8c",
5127
      "\xc5\x8b"         => "\xc5\x8a",
5128
      "\xc5\x88"         => "\xc5\x87",
5129
      "\xc5\x86"         => "\xc5\x85",
5130
      "\xc5\x84"         => "\xc5\x83",
5131
      "\xc5\x82"         => "\xc5\x81",
5132
      "\xc5\x80"         => "\xc4\xbf",
5133
      "\xc4\xbe"         => "\xc4\xbd",
5134
      "\xc4\xbc"         => "\xc4\xbb",
5135
      "\xc4\xba"         => "\xc4\xb9",
5136
      "\xc4\xb7"         => "\xc4\xb6",
5137
      "\xc4\xb5"         => "\xc4\xb4",
5138
      "\xc4\xb3"         => "\xc4\xb2",
5139
      "\xc4\xb1"         => "\x49",
5140
      "\xc4\xaf"         => "\xc4\xae",
5141
      "\xc4\xad"         => "\xc4\xac",
5142
      "\xc4\xab"         => "\xc4\xaa",
5143
      "\xc4\xa9"         => "\xc4\xa8",
5144
      "\xc4\xa7"         => "\xc4\xa6",
5145
      "\xc4\xa5"         => "\xc4\xa4",
5146
      "\xc4\xa3"         => "\xc4\xa2",
5147
      "\xc4\xa1"         => "\xc4\xa0",
5148
      "\xc4\x9f"         => "\xc4\x9e",
5149
      "\xc4\x9d"         => "\xc4\x9c",
5150
      "\xc4\x9b"         => "\xc4\x9a",
5151
      "\xc4\x99"         => "\xc4\x98",
5152
      "\xc4\x97"         => "\xc4\x96",
5153
      "\xc4\x95"         => "\xc4\x94",
5154
      "\xc4\x93"         => "\xc4\x92",
5155
      "\xc4\x91"         => "\xc4\x90",
5156
      "\xc4\x8f"         => "\xc4\x8e",
5157
      "\xc4\x8d"         => "\xc4\x8c",
5158
      "\xc4\x8b"         => "\xc4\x8a",
5159
      "\xc4\x89"         => "\xc4\x88",
5160
      "\xc4\x87"         => "\xc4\x86",
5161
      "\xc4\x85"         => "\xc4\x84",
5162
      "\xc4\x83"         => "\xc4\x82",
5163
      "\xc4\x81"         => "\xc4\x80",
5164
      "\xc3\xbf"         => "\xc5\xb8",
5165
      "\xc3\xbe"         => "\xc3\x9e",
5166
      "\xc3\xbd"         => "\xc3\x9d",
5167
      "\xc3\xbc"         => "\xc3\x9c",
5168
      "\xc3\xbb"         => "\xc3\x9b",
5169
      "\xc3\xba"         => "\xc3\x9a",
5170
      "\xc3\xb9"         => "\xc3\x99",
5171
      "\xc3\xb8"         => "\xc3\x98",
5172
      "\xc3\xb6"         => "\xc3\x96",
5173
      "\xc3\xb5"         => "\xc3\x95",
5174
      "\xc3\xb4"         => "\xc3\x94",
5175
      "\xc3\xb3"         => "\xc3\x93",
5176
      "\xc3\xb2"         => "\xc3\x92",
5177
      "\xc3\xb1"         => "\xc3\x91",
5178
      "\xc3\xb0"         => "\xc3\x90",
5179
      "\xc3\xaf"         => "\xc3\x8f",
5180
      "\xc3\xae"         => "\xc3\x8e",
5181
      "\xc3\xad"         => "\xc3\x8d",
5182
      "\xc3\xac"         => "\xc3\x8c",
5183
      "\xc3\xab"         => "\xc3\x8b",
5184
      "\xc3\xaa"         => "\xc3\x8a",
5185
      "\xc3\xa9"         => "\xc3\x89",
5186
      "\xc3\xa8"         => "\xc3\x88",
5187
      "\xc3\xa7"         => "\xc3\x87",
5188
      "\xc3\xa6"         => "\xc3\x86",
5189
      "\xc3\xa5"         => "\xc3\x85",
5190
      "\xc3\xa4"         => "\xc3\x84",
5191
      "\xc3\xa3"         => "\xc3\x83",
5192
      "\xc3\xa2"         => "\xc3\x82",
5193
      "\xc3\xa1"         => "\xc3\x81",
5194
      "\xc3\xa0"         => "\xc3\x80",
5195
      "\xc2\xb5"         => "\xce\x9c",
5196
      "\x7a"             => "\x5a",
5197
      "\x79"             => "\x59",
5198 1
      "\x78"             => "\x58",
5199
      "\x77"             => "\x57",
5200 1
      "\x76"             => "\x56",
5201
      "\x75"             => "\x55",
5202
      "\x74"             => "\x54",
5203
      "\x73"             => "\x53",
5204
      "\x72"             => "\x52",
5205
      "\x71"             => "\x51",
5206
      "\x70"             => "\x50",
5207
      "\x6f"             => "\x4f",
5208
      "\x6e"             => "\x4e",
5209
      "\x6d"             => "\x4d",
5210
      "\x6c"             => "\x4c",
5211
      "\x6b"             => "\x4b",
5212
      "\x6a"             => "\x4a",
5213
      "\x69"             => "\x49",
5214
      "\x68"             => "\x48",
5215 1
      "\x67"             => "\x47",
5216
      "\x66"             => "\x46",
5217
      "\x65"             => "\x45",
5218
      "\x64"             => "\x44",
5219
      "\x63"             => "\x43",
5220
      "\x62"             => "\x42",
5221
      "\x61"             => "\x41",
5222
5223
    );
5224
5225
    return $case;
5226
  }
5227
5228
  /**
5229
   * Translate characters or replace sub-strings.
5230
   *
5231 1
   * @param string $s
5232
   * @param string $from
5233 1
   * @param string $to
5234 1
   *
5235
   * @return string
5236 1
   */
5237
  public static function strtr($s, $from, $to = INF)
5238
  {
5239
    if (INF !== $to) {
5240
      $from = self::str_split($from);
5241
      $to = self::str_split($to);
5242
      $a = count($from);
5243
      $b = count($to);
5244
5245
      if ($a > $b) {
5246
        $from = array_slice($from, 0, $b);
5247 8
      } elseif ($a < $b) {
5248
        $to = array_slice($to, 0, $a);
5249 8
      }
5250
5251
      $from = array_combine($from, $to);
5252
    }
5253
5254
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5237 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5255
  }
5256
5257
  /**
5258
   * Binary safe comparison of two strings from an offset, up to length characters.
5259
   *
5260 7
   * @param string  $main_str           The main string being compared.
5261
   * @param string  $str                The secondary string being compared.
5262 7
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5263 2
   *                                    end of the string.
5264
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5265
   *                                    the str compared to the length of main_str less the offset.
5266
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5267 6
   *
5268 6
   * @return int
5269
   */
5270 6
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5271 1
  {
5272 1
    $main_str = self::substr($main_str, $offset, $length);
5273 6
    $str = self::substr($str, 0, self::strlen($main_str));
5274
5275
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5276 6
  }
5277
5278 6
  /**
5279
   * Case-insensitive string comparison.
5280
   *
5281
   * @param string $str1
5282 1
   * @param string $str2
5283 1
   *
5284 1
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5285 6
   */
5286 6
  public static function strcasecmp($str1, $str2)
5287 6
  {
5288 6
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5289 6
  }
5290
5291 6
  /**
5292
   * Uppercase for all words in the string.
5293
   *
5294
   * @param  string $string
5295
   * @param array   $exceptions
5296
   *
5297
   * @return string
5298
   */
5299
  public static function ucwords($string, $exceptions = array())
5300
  {
5301
    if (!$string) {
5302
      return '';
5303
    }
5304 1
5305
    // init
5306 1
    $words = explode(' ', $string);
5307 1
    $newwords = array();
5308
5309
    if (count($exceptions) > 0) {
5310
      $useExceptions = true;
5311
    } else {
5312
      $useExceptions = false;
5313
    }
5314
5315
    foreach ($words as $word) {
5316
      if (
5317
          ($useExceptions === false)
5318
          ||
5319
          (
5320 1
              $useExceptions === true
5321
              &&
5322 1
              !in_array($word, $exceptions, true)
5323
          )
5324
      ) {
5325
        $word = self::ucfirst($word);
5326
      }
5327
      $newwords[] = $word;
5328
    }
5329
5330
    return self::ucfirst(implode(' ', $newwords));
5331
  }
5332
5333
  /**
5334
   * Format a number with grouped thousands.
5335
   *
5336
   * @param float  $number
5337
   * @param int    $decimals
5338
   * @param string $dec_point
5339
   * @param string $thousands_sep
5340
   *
5341
   * @return string
5342
   */
5343
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
5344
  {
5345
    if (Bootup::is_php('5.4') === true) {
5346
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
5347
        return str_replace(
5348
            array(
5349
                '.',
5350
                ',',
5351
            ),
5352
            array(
5353
                $dec_point,
5354
                $thousands_sep,
5355 12
            ),
5356
            number_format($number, $decimals, '.', ',')
5357 12
        );
5358
      }
5359
    }
5360
5361
    return number_format($number, $decimals, $dec_point, $thousands_sep);
5362
  }
5363
5364
  /**
5365
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
5366
   *
5367
   * (PHP 4, PHP 5)<br/>
5368
   * Replace all occurrences of the search string with the replacement string
5369
   *
5370 13
   * @link http://php.net/manual/en/function.str-replace.php
5371
   *
5372 13
   * @param mixed $search  <p>
5373
   *                       The value being searched for, otherwise known as the needle.
5374
   *                       An array may be used to designate multiple needles.
5375 13
   *                       </p>
5376 13
   * @param mixed $replace <p>
5377 1
   *                       The replacement value that replaces found search
5378 1
   *                       values. An array may be used to designate multiple replacements.
5379 12
   *                       </p>
5380
   * @param mixed $subject <p>
5381 13
   *                       The string or array being searched and replaced on,
5382
   *                       otherwise known as the haystack.
5383 13
   *                       </p>
5384 13
   *                       <p>
5385
   *                       If subject is an array, then the search and
5386 13
   *                       replace is performed with every entry of
5387
   *                       subject, and the return value is an array as
5388
   *                       well.
5389
   *                       </p>
5390
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5391
   *
5392
   * @return mixed This function returns a string or an array with the replaced values.
5393
   */
5394
  public static function str_replace($search, $replace, $subject, &$count = null)
5395
  {
5396 6
    return str_replace($search, $replace, $subject, $count);
5397
  }
5398 6
5399
  /**
5400
   * str_ireplace
5401
   *
5402
   * @param string $search
5403
   * @param string $replace
5404
   * @param string $subject
5405
   * @param null   $count
5406
   *
5407
   * @return string
5408
   */
5409
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5410 1
  {
5411
    $search = (array)$search;
5412 1
5413
    /** @noinspection AlterInForeachInspection */
5414
    foreach ($search as &$s) {
5415
      if ('' === $s .= '') {
5416
        $s = '/^(?<=.)$/';
5417
      } else {
5418
        $s = '/' . preg_quote($s, '/') . '/ui';
5419
      }
5420
    }
5421
5422
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
5423
    $count = $replace;
5424
5425
    return $subject;
5426
  }
5427
5428
  /**
5429
   * Makes string's first char lowercase.
5430
   *
5431
   * @param    string $str The input string
5432
   *
5433
   * @return   string The resulting string
5434
   */
5435
  public static function lcfirst($str)
5436
  {
5437 10
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
5438
  }
5439 10
5440 10
  /**
5441
   * Find position of last occurrence of a case-insensitive string.
5442 10
   *
5443 2
   * @param    string $haystack The string to look in
5444
   * @param    string $needle   The string to look for
5445
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5446
   *
5447 9
   * @return   int The position of offset
5448
   */
5449 9
  public static function strripos($haystack, $needle, $offset = 0)
5450
  {
5451
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5452
  }
5453 9
5454 9
  /**
5455
   * Find position of last occurrence of a string in a string.
5456 9
   *
5457
   * @link http://php.net/manual/en/function.mb-strrpos.php
5458
   *
5459 1
   * @param string  $haystack     <p>
5460 1
   *                              The string being checked, for the last occurrence
5461 1
   *                              of needle
5462
   *                              </p>
5463 9
   * @param string  $needle       <p>
5464 9
   *                              The string to find in haystack.
5465
   *                              </p>
5466
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
5467
   *                              the string. Negative values will stop searching at an arbitrary point
5468
   *                              prior to the end of the string.
5469
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
5470
   *
5471
   * @return int the numeric position of
5472
   * the last occurrence of needle in the
5473
   * haystack string. If
5474
   * needle is not found, it returns false.
5475
   */
5476
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5477
  {
5478
    $haystack = (string)$haystack;
5479
    $needle = (string)$needle;
5480
5481
    if (!isset($haystack[0]) || !isset($needle[0])) {
5482
      return false;
5483
    }
5484
5485
    // init
5486
    self::checkForSupport();
5487
5488
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5489
      $needle = self::chr($needle);
5490
    }
5491
5492
    $needle = (string)$needle;
5493
    $offset = (int)$offset;
5494
5495
    if ($cleanUtf8 === true) {
5496
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5497
5498
      $needle = self::clean($needle);
5499 1
      $haystack = self::clean($haystack);
5500
    }
5501 1
5502
    if (self::$support['mbstring'] === true) {
5503
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5504
    }
5505
5506
    if (self::$support['iconv'] === true) {
5507
      return grapheme_strrpos($haystack, $needle, $offset);
5508
    }
5509
5510
    // fallback
5511
5512
    if ($offset > 0) {
5513
      $haystack = self::substr($haystack, $offset);
5514
    } elseif ($offset < 0) {
5515
      $haystack = self::substr($haystack, 0, $offset);
5516
    }
5517
5518 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5519
      $left = substr($haystack, 0, $pos);
5520
5521
      // negative offset not supported in PHP strpos(), ignoring
5522
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5523
    }
5524
5525
    return false;
5526
  }
5527
5528
  /**
5529
   * Splits a string into smaller chunks and multiple lines, using the specified
5530
   * line ending character.
5531
   *
5532
   * @param    string $body     The original string to be split.
5533
   * @param    int    $chunklen The maximum character length of a chunk.
5534
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
5535
   *
5536
   * @return   string The chunked string
5537
   */
5538
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
5539
  {
5540
    return implode($end, self::split($body, $chunklen));
5541
  }
5542
5543
  /**
5544
   * alias for "UTF8::to_win1252()"
5545
   *
5546
   * @param   string $string
5547
   *
5548
   * @return  array|string
5549
   */
5550
  public static function to_iso8859($string)
5551
  {
5552
    return self::to_win1252($string);
5553
  }
5554
5555
  /**
5556
   * fix -> utf8-win1252 chars
5557
   *
5558
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
5559
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
5560
   * See: http://en.wikipedia.org/wiki/Windows-1252
5561
   *
5562
   * @deprecated use "UTF8::fix_simple_utf8()"
5563
   *
5564 1
   * @param   string $string
5565
   *
5566
   * @return  string
5567 1
   */
5568
  public static function utf8_fix_win1252_chars($string)
5569 1
  {
5570
    return self::fix_simple_utf8($string);
5571 1
  }
5572 1
5573
  /**
5574
   * Returns an array of Unicode White Space characters.
5575
   *
5576
   * @return   array An array with numeric code point as key and White Space Character as value.
5577
   */
5578
  public static function ws()
5579
  {
5580
    return self::$whitespace;
5581 1
  }
5582
5583 1
  /**
5584
   * Parses the string into variables.
5585 1
   *
5586 1
   * WARNING: This differs from parse_str() by returning the results
5587
   *    instead of placing them in the local scope!
5588
   *
5589 1
   * @link http://php.net/manual/en/function.parse-str.php
5590
   *
5591 1
   * @param string $str     <p>
5592 1
   *                        The input string.
5593 1
   *                        </p>
5594 1
   * @param array  $result  <p>
5595 1
   *                        If the second parameter arr is present,
5596 1
   *                        variables are stored in this variable as array elements instead.
5597 1
   *                        </p>
5598 1
   *
5599 1
   * @return void
5600 1
   */
5601 1
  public static function parse_str($str, &$result)
5602
  {
5603
    // init
5604
    self::checkForSupport();
5605
5606
    $str = self::filter($str);
5607
5608
    mb_parse_str($str, $result);
5609
  }
5610
5611
  /**
5612
   * Get character of a specific character.
5613
   *
5614
   * @param   string $chr Character.
5615
   *
5616
   * @return  string 'RTL' or 'LTR'
5617
   */
5618
  public static function getCharDirection($chr)
5619
  {
5620
    $c = static::chr_to_decimal($chr);
5621 1
5622 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
5623
      return 'LTR';
5624
    }
5625
5626
    if (0x85e >= $c) {
5627
5628
      if (0x5be === $c ||
5629
          0x5c0 === $c ||
5630
          0x5c3 === $c ||
5631
          0x5c6 === $c ||
5632
          (0x5d0 <= $c && 0x5ea >= $c) ||
5633
          (0x5f0 <= $c && 0x5f4 >= $c) ||
5634
          0x608 === $c ||
5635
          0x60b === $c ||
5636
          0x60d === $c ||
5637
          0x61b === $c ||
5638
          (0x61e <= $c && 0x64a >= $c) ||
5639
          (0x66d <= $c && 0x66f >= $c) ||
5640
          (0x671 <= $c && 0x6d5 >= $c) ||
5641
          (0x6e5 <= $c && 0x6e6 >= $c) ||
5642
          (0x6ee <= $c && 0x6ef >= $c) ||
5643
          (0x6fa <= $c && 0x70d >= $c) ||
5644
          0x710 === $c ||
5645
          (0x712 <= $c && 0x72f >= $c) ||
5646
          (0x74d <= $c && 0x7a5 >= $c) ||
5647
          0x7b1 === $c ||
5648
          (0x7c0 <= $c && 0x7ea >= $c) ||
5649
          (0x7f4 <= $c && 0x7f5 >= $c) ||
5650
          0x7fa === $c ||
5651
          (0x800 <= $c && 0x815 >= $c) ||
5652
          0x81a === $c ||
5653
          0x824 === $c ||
5654
          0x828 === $c ||
5655
          (0x830 <= $c && 0x83e >= $c) ||
5656
          (0x840 <= $c && 0x858 >= $c) ||
5657
          0x85e === $c
5658
      ) {
5659
        return 'RTL';
5660
      }
5661
5662
    } elseif (0x200f === $c) {
5663
5664
      return 'RTL';
5665
5666
    } elseif (0xfb1d <= $c) {
5667
5668
      if (0xfb1d === $c ||
5669
          (0xfb1f <= $c && 0xfb28 >= $c) ||
5670
          (0xfb2a <= $c && 0xfb36 >= $c) ||
5671
          (0xfb38 <= $c && 0xfb3c >= $c) ||
5672
          0xfb3e === $c ||
5673
          (0xfb40 <= $c && 0xfb41 >= $c) ||
5674
          (0xfb43 <= $c && 0xfb44 >= $c) ||
5675
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
5676
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
5677
          (0xfd50 <= $c && 0xfd8f >= $c) ||
5678
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
5679
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
5680
          (0xfe70 <= $c && 0xfe74 >= $c) ||
5681 2
          (0xfe76 <= $c && 0xfefc >= $c) ||
5682
          (0x10800 <= $c && 0x10805 >= $c) ||
5683 2
          0x10808 === $c ||
5684 2
          (0x1080a <= $c && 0x10835 >= $c) ||
5685 2
          (0x10837 <= $c && 0x10838 >= $c) ||
5686
          0x1083c === $c ||
5687 2
          (0x1083f <= $c && 0x10855 >= $c) ||
5688
          (0x10857 <= $c && 0x1085f >= $c) ||
5689 2
          (0x10900 <= $c && 0x1091b >= $c) ||
5690
          (0x10920 <= $c && 0x10939 >= $c) ||
5691
          0x1093f === $c ||
5692 2
          0x10a00 === $c ||
5693
          (0x10a10 <= $c && 0x10a13 >= $c) ||
5694 2
          (0x10a15 <= $c && 0x10a17 >= $c) ||
5695 2
          (0x10a19 <= $c && 0x10a33 >= $c) ||
5696 2
          (0x10a40 <= $c && 0x10a47 >= $c) ||
5697
          (0x10a50 <= $c && 0x10a58 >= $c) ||
5698 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
5699 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
5700 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
5701
          (0x10b58 <= $c && 0x10b72 >= $c) ||
5702
          (0x10b78 <= $c && 0x10b7f >= $c)
5703
      ) {
5704
        return 'RTL';
5705
      }
5706 2
    }
5707
5708 2
    return 'LTR';
5709 2
  }
5710
5711 2
  /**
5712
   * Get a decimal code representation of a specific character.
5713
   *
5714
   * @param   string $chr The input character
5715
   *
5716
   * @return  int
5717
   */
5718
  public static function chr_to_decimal($chr)
5719
  {
5720
    $chr = (string)$chr;
5721 1
    $code = self::ord($chr[0]);
5722
    $bytes = 1;
5723 1
5724
    if (!($code & 0x80)) {
5725 1
      // 0xxxxxxx
5726 1
      return $code;
5727 1
    }
5728
5729 1
    if (($code & 0xe0) === 0xc0) {
5730
      // 110xxxxx
5731
      $bytes = 2;
5732
      $code &= ~0xc0;
5733
    } elseif (($code & 0xf0) == 0xe0) {
5734
      // 1110xxxx
5735
      $bytes = 3;
5736
      $code &= ~0xe0;
5737
    } elseif (($code & 0xf8) === 0xf0) {
5738
      // 11110xxx
5739
      $bytes = 4;
5740
      $code &= ~0xf0;
5741
    }
5742
5743
    for ($i = 2; $i <= $bytes; $i++) {
5744
      // 10xxxxxx
5745
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
5746
    }
5747
5748
    return $code;
5749
  }
5750
5751
  /**
5752
   * Get a UTF-8 character from its decimal code representation.
5753
   *
5754
   * @param   int $code Code.
5755
   *
5756
   * @return  string
5757
   */
5758
  public static function decimal_to_chr($code)
5759
  {
5760
    self::checkForSupport();
5761
5762
    return mb_convert_encoding(
5763
        '&#x' . dechex($code) . ';',
5764
        'UTF-8',
5765
        'HTML-ENTITIES'
5766
    );
5767
  }
5768
5769
  /**
5770
   * Return a array with "urlencoded"-win1252 -> UTF-8
5771
   *
5772
   * @return mixed
5773
   */
5774
  protected static function urldecode_fix_win1252_chars()
5775
  {
5776
    static $array = array(
5777
        '%20' => ' ',
5778
        '%21' => '!',
5779
        '%22' => '"',
5780
        '%23' => '#',
5781
        '%24' => '$',
5782
        '%25' => '%',
5783
        '%26' => '&',
5784
        '%27' => "'",
5785
        '%28' => '(',
5786
        '%29' => ')',
5787
        '%2A' => '*',
5788
        '%2B' => '+',
5789
        '%2C' => ',',
5790
        '%2D' => '-',
5791
        '%2E' => '.',
5792
        '%2F' => '/',
5793
        '%30' => '0',
5794
        '%31' => '1',
5795
        '%32' => '2',
5796
        '%33' => '3',
5797
        '%34' => '4',
5798
        '%35' => '5',
5799
        '%36' => '6',
5800
        '%37' => '7',
5801
        '%38' => '8',
5802
        '%39' => '9',
5803
        '%3A' => ':',
5804
        '%3B' => ';',
5805
        '%3C' => '<',
5806
        '%3D' => '=',
5807
        '%3E' => '>',
5808
        '%3F' => '?',
5809
        '%40' => '@',
5810
        '%41' => 'A',
5811
        '%42' => 'B',
5812
        '%43' => 'C',
5813
        '%44' => 'D',
5814
        '%45' => 'E',
5815
        '%46' => 'F',
5816
        '%47' => 'G',
5817
        '%48' => 'H',
5818
        '%49' => 'I',
5819
        '%4A' => 'J',
5820
        '%4B' => 'K',
5821
        '%4C' => 'L',
5822
        '%4D' => 'M',
5823
        '%4E' => 'N',
5824
        '%4F' => 'O',
5825
        '%50' => 'P',
5826
        '%51' => 'Q',
5827
        '%52' => 'R',
5828
        '%53' => 'S',
5829
        '%54' => 'T',
5830
        '%55' => 'U',
5831
        '%56' => 'V',
5832
        '%57' => 'W',
5833
        '%58' => 'X',
5834
        '%59' => 'Y',
5835
        '%5A' => 'Z',
5836
        '%5B' => '[',
5837
        '%5C' => '\\',
5838
        '%5D' => ']',
5839
        '%5E' => '^',
5840
        '%5F' => '_',
5841
        '%60' => '`',
5842
        '%61' => 'a',
5843
        '%62' => 'b',
5844
        '%63' => 'c',
5845
        '%64' => 'd',
5846
        '%65' => 'e',
5847
        '%66' => 'f',
5848
        '%67' => 'g',
5849
        '%68' => 'h',
5850
        '%69' => 'i',
5851
        '%6A' => 'j',
5852
        '%6B' => 'k',
5853
        '%6C' => 'l',
5854
        '%6D' => 'm',
5855
        '%6E' => 'n',
5856
        '%6F' => 'o',
5857
        '%70' => 'p',
5858
        '%71' => 'q',
5859
        '%72' => 'r',
5860
        '%73' => 's',
5861
        '%74' => 't',
5862
        '%75' => 'u',
5863
        '%76' => 'v',
5864
        '%77' => 'w',
5865
        '%78' => 'x',
5866
        '%79' => 'y',
5867
        '%7A' => 'z',
5868
        '%7B' => '{',
5869
        '%7C' => '|',
5870
        '%7D' => '}',
5871
        '%7E' => '~',
5872
        '%7F' => '',
5873
        '%80' => '`',
5874
        '%81' => '',
5875
        '%82' => '‚',
5876
        '%83' => 'ƒ',
5877
        '%84' => '„',
5878
        '%85' => '…',
5879
        '%86' => '†',
5880
        '%87' => '‡',
5881
        '%88' => 'ˆ',
5882
        '%89' => '‰',
5883
        '%8A' => 'Š',
5884
        '%8B' => '‹',
5885
        '%8C' => 'Œ',
5886
        '%8D' => '',
5887
        '%8E' => 'Ž',
5888
        '%8F' => '',
5889
        '%90' => '',
5890
        '%91' => '‘',
5891
        '%92' => '’',
5892
        '%93' => '“',
5893
        '%94' => '”',
5894
        '%95' => '•',
5895
        '%96' => '–',
5896
        '%97' => '—',
5897
        '%98' => '˜',
5898
        '%99' => '™',
5899
        '%9A' => 'š',
5900
        '%9B' => '›',
5901
        '%9C' => 'œ',
5902
        '%9D' => '',
5903
        '%9E' => 'ž',
5904
        '%9F' => 'Ÿ',
5905
        '%A0' => '',
5906
        '%A1' => '¡',
5907
        '%A2' => '¢',
5908
        '%A3' => '£',
5909
        '%A4' => '¤',
5910
        '%A5' => '¥',
5911
        '%A6' => '¦',
5912
        '%A7' => '§',
5913
        '%A8' => '¨',
5914
        '%A9' => '©',
5915
        '%AA' => 'ª',
5916
        '%AB' => '«',
5917
        '%AC' => '¬',
5918
        '%AD' => '',
5919
        '%AE' => '®',
5920
        '%AF' => '¯',
5921
        '%B0' => '°',
5922
        '%B1' => '±',
5923
        '%B2' => '²',
5924
        '%B3' => '³',
5925
        '%B4' => '´',
5926
        '%B5' => 'µ',
5927
        '%B6' => '¶',
5928
        '%B7' => '·',
5929
        '%B8' => '¸',
5930
        '%B9' => '¹',
5931
        '%BA' => 'º',
5932
        '%BB' => '»',
5933
        '%BC' => '¼',
5934
        '%BD' => '½',
5935
        '%BE' => '¾',
5936
        '%BF' => '¿',
5937
        '%C0' => 'À',
5938
        '%C1' => 'Á',
5939
        '%C2' => 'Â',
5940
        '%C3' => 'Ã',
5941
        '%C4' => 'Ä',
5942
        '%C5' => 'Å',
5943
        '%C6' => 'Æ',
5944
        '%C7' => 'Ç',
5945
        '%C8' => 'È',
5946
        '%C9' => 'É',
5947
        '%CA' => 'Ê',
5948
        '%CB' => 'Ë',
5949
        '%CC' => 'Ì',
5950
        '%CD' => 'Í',
5951
        '%CE' => 'Î',
5952
        '%CF' => 'Ï',
5953
        '%D0' => 'Ð',
5954
        '%D1' => 'Ñ',
5955
        '%D2' => 'Ò',
5956
        '%D3' => 'Ó',
5957
        '%D4' => 'Ô',
5958
        '%D5' => 'Õ',
5959
        '%D6' => 'Ö',
5960
        '%D7' => '×',
5961
        '%D8' => 'Ø',
5962
        '%D9' => 'Ù',
5963
        '%DA' => 'Ú',
5964
        '%DB' => 'Û',
5965
        '%DC' => 'Ü',
5966
        '%DD' => 'Ý',
5967
        '%DE' => 'Þ',
5968
        '%DF' => 'ß',
5969
        '%E0' => 'à',
5970
        '%E1' => 'á',
5971
        '%E2' => 'â',
5972
        '%E3' => 'ã',
5973
        '%E4' => 'ä',
5974
        '%E5' => 'å',
5975
        '%E6' => 'æ',
5976
        '%E7' => 'ç',
5977
        '%E8' => 'è',
5978
        '%E9' => 'é',
5979
        '%EA' => 'ê',
5980
        '%EB' => 'ë',
5981
        '%EC' => 'ì',
5982
        '%ED' => 'í',
5983
        '%EE' => 'î',
5984
        '%EF' => 'ï',
5985
        '%F0' => 'ð',
5986
        '%F1' => 'ñ',
5987
        '%F2' => 'ò',
5988
        '%F3' => 'ó',
5989
        '%F4' => 'ô',
5990
        '%F5' => 'õ',
5991
        '%F6' => 'ö',
5992
        '%F7' => '÷',
5993
        '%F8' => 'ø',
5994
        '%F9' => 'ù',
5995
        '%FA' => 'ú',
5996
        '%FB' => 'û',
5997
        '%FC' => 'ü',
5998
        '%FD' => 'ý',
5999
        '%FE' => 'þ',
6000
        '%FF' => 'ÿ',
6001
    );
6002
6003
    return $array;
6004
  }
6005
6006
}
6007