Completed
Push — master ( 9e59ef...a54e7f )
by Lars
06:40
created

UTF8::substr_count()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 4.016

Importance

Changes 2
Bugs 1 Features 0
Metric Value
c 2
b 1
f 0
dl 0
loc 16
ccs 9
cts 10
cp 0.9
rs 9.2
cc 4
eloc 9
nc 4
nop 4
crap 4.016
1
<?php
2
3
namespace voku\helper;
4
5
use Patchwork\PHP\Shim\Intl;
6
use Patchwork\PHP\Shim\Normalizer;
7
use Patchwork\PHP\Shim\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * @var array
86
   */
87
  protected static $whitespace = array(
88
      0     => "\x0",
89
      //NUL Byte
90
      9     => "\x9",
91
      //Tab
92
      10    => "\xa",
93
      //New Line
94
      11    => "\xb",
95
      //Vertical Tab
96
      13    => "\xd",
97
      //Carriage Return
98
      32    => "\x20",
99
      //Ordinary Space
100
      160   => "\xc2\xa0",
101
      //NO-BREAK SPACE
102
      5760  => "\xe1\x9a\x80",
103
      //OGHAM SPACE MARK
104
      6158  => "\xe1\xa0\x8e",
105
      //MONGOLIAN VOWEL SEPARATOR
106
      8192  => "\xe2\x80\x80",
107
      //EN QUAD
108
      8193  => "\xe2\x80\x81",
109
      //EM QUAD
110
      8194  => "\xe2\x80\x82",
111
      //EN SPACE
112
      8195  => "\xe2\x80\x83",
113
      //EM SPACE
114
      8196  => "\xe2\x80\x84",
115
      //THREE-PER-EM SPACE
116
      8197  => "\xe2\x80\x85",
117
      //FOUR-PER-EM SPACE
118
      8198  => "\xe2\x80\x86",
119
      //SIX-PER-EM SPACE
120
      8199  => "\xe2\x80\x87",
121
      //FIGURE SPACE
122
      8200  => "\xe2\x80\x88",
123
      //PUNCTUATION SPACE
124
      8201  => "\xe2\x80\x89",
125
      //THIN SPACE
126
      8202  => "\xe2\x80\x8a",
127
      //HAIR SPACE
128
      8232  => "\xe2\x80\xa8",
129
      //LINE SEPARATOR
130
      8233  => "\xe2\x80\xa9",
131
      //PARAGRAPH SEPARATOR
132
      8239  => "\xe2\x80\xaf",
133
      //NARROW NO-BREAK SPACE
134
      8287  => "\xe2\x81\x9f",
135
      //MEDIUM MATHEMATICAL SPACE
136
      12288 => "\xe3\x80\x80"
137
      //IDEOGRAPHIC SPACE
138
  );
139
140
  /**
141
   * @var array
142
   */
143
  protected static $whitespaceTable = array(
144
      'SPACE'                     => "\x20",
145
      'NO-BREAK SPACE'            => "\xc2\xa0",
146
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
147
      'EN QUAD'                   => "\xe2\x80\x80",
148
      'EM QUAD'                   => "\xe2\x80\x81",
149
      'EN SPACE'                  => "\xe2\x80\x82",
150
      'EM SPACE'                  => "\xe2\x80\x83",
151
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
152
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
153
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
154
      'FIGURE SPACE'              => "\xe2\x80\x87",
155
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
156
      'THIN SPACE'                => "\xe2\x80\x89",
157
      'HAIR SPACE'                => "\xe2\x80\x8a",
158
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
159
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
160
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
161
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  protected static $commonCaseFold = array(
168
      'ſ'            => 's',
169
      "\xCD\x85"     => 'ι',
170
      'ς'            => 'σ',
171
      "\xCF\x90"     => 'β',
172
      "\xCF\x91"     => 'θ',
173
      "\xCF\x95"     => 'φ',
174
      "\xCF\x96"     => 'π',
175
      "\xCF\xB0"     => 'κ',
176
      "\xCF\xB1"     => 'ρ',
177
      "\xCF\xB5"     => 'ε',
178
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
179
      "\xE1\xBE\xBE" => 'ι',
180
  );
181
182
  /**
183
   * @var array
184
   */
185
  protected static $brokenUtf8ToUtf8 = array(
186
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
187
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
188
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
189
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
190
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
191
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
192
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
193
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
194
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
195
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
196
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
197
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
198
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
199
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
200
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
201
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
202
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
203
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
204
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
205
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
206
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
207
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
208
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
209
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
210
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
211
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
212
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
213
      'ü'       => 'ü',
214
      'ä'       => 'ä',
215
      'ö'       => 'ö',
216
      'Ö'       => 'Ö',
217
      'ß'       => 'ß',
218
      'Ã '       => 'à',
219
      'á'       => 'á',
220
      'â'       => 'â',
221
      'ã'       => 'ã',
222
      'ù'       => 'ù',
223
      'ú'       => 'ú',
224
      'û'       => 'û',
225
      'Ù'       => 'Ù',
226
      'Ú'       => 'Ú',
227
      'Û'       => 'Û',
228
      'Ü'       => 'Ü',
229
      'ò'       => 'ò',
230
      'ó'       => 'ó',
231
      'ô'       => 'ô',
232
      'è'       => 'è',
233
      'é'       => 'é',
234
      'ê'       => 'ê',
235
      'ë'       => 'ë',
236
      'À'       => 'À',
237
      'Á'       => 'Á',
238
      'Â'       => 'Â',
239
      'Ã'       => 'Ã',
240
      'Ä'       => 'Ä',
241
      'Ã…'       => 'Å',
242
      'Ç'       => 'Ç',
243
      'È'       => 'È',
244
      'É'       => 'É',
245
      'Ê'       => 'Ê',
246
      'Ë'       => 'Ë',
247
      'ÃŒ'       => 'Ì',
248
      'Í'       => 'Í',
249
      'ÃŽ'       => 'Î',
250
      'Ï'       => 'Ï',
251
      'Ñ'       => 'Ñ',
252
      'Ã’'       => 'Ò',
253
      'Ó'       => 'Ó',
254
      'Ô'       => 'Ô',
255
      'Õ'       => 'Õ',
256
      'Ø'       => 'Ø',
257
      'Ã¥'       => 'å',
258
      'æ'       => 'æ',
259
      'ç'       => 'ç',
260
      'ì'       => 'ì',
261
      'í'       => 'í',
262
      'î'       => 'î',
263
      'ï'       => 'ï',
264
      'ð'       => 'ð',
265
      'ñ'       => 'ñ',
266
      'õ'       => 'õ',
267
      'ø'       => 'ø',
268
      'ý'       => 'ý',
269
      'ÿ'       => 'ÿ',
270
      '€'      => '€',
271
  );
272
273
  /**
274
   * @var array
275
   */
276
  protected static $utf8ToWin1252 = array(
277
      "\xe2\x82\xac" => "\x80", // EURO SIGN
278
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
279
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
280
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
281
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
282
      "\xe2\x80\xa0" => "\x86", // DAGGER
283
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
284
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
285
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
286
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
287
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
288
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
289
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
290
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
291
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
292
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
293
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
294
      "\xe2\x80\xa2" => "\x95", // BULLET
295
      "\xe2\x80\x93" => "\x96", // EN DASH
296
      "\xe2\x80\x94" => "\x97", // EM DASH
297
      "\xcb\x9c"     => "\x98", // SMALL TILDE
298
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
299
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
300
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
301
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
302
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
303
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
304
  );
305
306
  /**
307
   * @var array
308
   */
309
  protected static $utf8MSWord = array(
310
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
311
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
312
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
313
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
314
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
315
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
316
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
317
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
318
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
319
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
320
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
321
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
322
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
323
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
324
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
325
  );
326
327
  /**
328
   * @var array
329
   */
330
  private static $support = array();
331
332
  /**
333
   * __construct()
334
   */
335 1
  public function __construct()
336
  {
337 1
    self::checkForSupport();
338 1
  }
339
340
  /**
341
   * check for UTF8-Support
342
   */
343 151
  public static function checkForSupport()
344
  {
345 151
    if (!isset(self::$support['mbstring'])) {
346
347 1
      self::$support['mbstring'] = self::mbstring_loaded();
348 1
      self::$support['iconv'] = self::iconv_loaded();
349 1
      self::$support['intl'] = self::intl_loaded();
350 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
351
352 1
      Bootup::initAll(); // Enables the portablity layer and configures PHP for UTF-8
353 1
      Bootup::filterRequestUri(); // Redirects to an UTF-8 encoded URL if it's not already the case
354 1
      Bootup::filterRequestInputs(); // Normalizes HTTP inputs to UTF-8 NFC
355 1
    }
356 151
  }
357
358
  /**
359
   * checks whether mbstring is available on the server
360
   *
361
   * @return   bool True if available, False otherwise
362
   */
363 2
  public static function mbstring_loaded()
364
  {
365 2
    $return = extension_loaded('mbstring');
366
367 2
    if ($return === true) {
368 2
      mb_internal_encoding('UTF-8');
369 2
    }
370
371 2
    return $return;
372
  }
373
374
  /**
375
   * checks whether iconv is available on the server
376
   *
377
   * @return   bool True if available, False otherwise
378
   */
379 1
  public static function iconv_loaded()
380
  {
381 1
    return extension_loaded('iconv') ? true : false;
382
  }
383
384
  /**
385
   * checks whether intl is available on the server
386
   *
387
   * @return   bool True if available, False otherwise
388
   */
389 1
  public static function intl_loaded()
390
  {
391 1
    return extension_loaded('intl') ? true : false;
392
  }
393
394
  /**
395
   * checks if \u modifier is available that enables Unicode support in PCRE.
396
   *
397
   * @return   bool True if support is available, false otherwise
398
   */
399 30
  public static function pcre_utf8_support()
400
  {
401
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
402 30
    return (bool)@preg_match('//u', '');
403
  }
404
405
  /**
406
   * alias for "UTF8::to_ascii()"
407
   *
408
   * @param string $s The input string e.g. a UTF-8 String
409
   * @param string $subst_chr
410
   *
411
   * @return string
412
   */
413 6
  public static function toAscii($s, $subst_chr = '?')
414
  {
415 6
    return self::to_ascii($s, $subst_chr);
416
  }
417
418
  /**
419
   * convert to ASCII
420
   *
421
   * @param string $s The input string e.g. a UTF-8 String
422
   * @param string $subst_chr
423
   *
424
   * @return string
425
   */
426 7
  public static function to_ascii($s, $subst_chr = '?')
427
  {
428 7
    static $translitExtra = null;
429
430 7
    $s = (string)$s;
431
432 7
    if (!isset($s[0])) {
433 2
      return '';
434
    }
435
436 6
    $s = self::clean($s);
437
438 6
    if (preg_match("/[\x80-\xFF]/", $s)) {
439 3
      $s = Normalizer::normalize($s, Normalizer::NFKC);
440
441 3
      $glibc = 'glibc' === ICONV_IMPL;
442
443 3
      preg_match_all('/./u', $s, $s);
444
445
      /** @noinspection AlterInForeachInspection */
446 3
      foreach ($s[0] as &$c) {
447
448 3
        if (!isset($c[1])) {
449 3
          continue;
450
        }
451
452 3
        if ($glibc) {
453 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
454 3
        } else {
455
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
456
457
          if ($t !== false && is_string($t)) {
458
            if (!isset($t[0])) {
459
              $t = '?';
460
            } elseif (isset($t[1])) {
461
              $t = ltrim($t, '\'`"^~');
462
            }
463
          }
464
        }
465
466 3
        if ('?' === $t) {
467
468 1
          if ($translitExtra === null) {
469 1
            $translitExtra = (array)self::getData('translit_extra');
470 1
          }
471
472 1
          if (isset($translitExtra[$c])) {
473 1
            $t = $translitExtra[$c];
474 1
          } else {
475 1
            $t = Normalizer::normalize($c, Normalizer::NFD);
476
477 1
            if ($t[0] < "\x80") {
478
              $t = $t[0];
479
            } else {
480 1
              $t = $subst_chr;
481
            }
482
          }
483 1
        }
484
485 3
        if ('?' === $t) {
486 1
          $t = self::str_transliterate($c, $subst_chr);
487 1
        }
488
489 3
        $c = $t;
490 3
      }
491
492 3
      $s = implode('', $s[0]);
493 3
    }
494
495 6
    return $s;
496
  }
497
498
  /**
499
   * accepts a string and removes all non-UTF-8 characters from it.
500
   *
501
   * @param string $str                     The string to be sanitized.
502
   * @param bool   $remove_bom
503
   * @param bool   $normalize_whitespace
504
   * @param bool   $normalize_msword        e.g.: "…" => "..."
505
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
506
   *
507
   * @return string Clean UTF-8 encoded string
508
   */
509 24
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
510
  {
511
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
512
    // caused connection reset problem on larger strings
513
514
    $regx = '/
515
       (
516
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
517
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
518
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
519
        |   [\xE1-\xEC][\x80-\xBF]{2}
520
        |   \xED[\x80-\x9F][\x80-\xBF]
521
        |   [\xEE-\xEF][\x80-\xBF]{2}
522
        ){1,50}                          # ...one or more times
523
       )
524
       | .                               # anything else
525 24
       /x';
526 24
    $str = preg_replace($regx, '$1', $str);
527
528 24
    $str = self::replace_diamond_question_mark($str, '');
529 24
    $str = self::remove_invisible_characters($str);
530
531 24
    if ($normalize_whitespace === true) {
532 7
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
533 7
    }
534
535 24
    if ($normalize_msword === true) {
536 1
      $str = self::normalize_msword($str);
537 1
    }
538
539 24
    if ($remove_bom === true) {
540 6
      $str = self::removeBOM($str);
541 6
    }
542
543 24
    return $str;
544
  }
545
546
  /**
547
   * replace diamond question mark (�)
548
   *
549
   * @param string $str
550
   * @param string $unknown
551
   *
552
   * @return string
553
   */
554 25
  public static function replace_diamond_question_mark($str, $unknown = '?')
555
  {
556 25
    return str_replace(
557
        array(
558 25
            "\xEF\xBF\xBD",
559 25
            '�',
560 25
        ),
561
        array(
562 25
            $unknown,
563 25
            $unknown,
564 25
        ),
565
        $str
566 25
    );
567
  }
568
569
  /**
570
   * Remove Invisible Characters
571
   *
572
   * This prevents sandwiching null characters
573
   * between ascii characters, like Java\0script.
574
   *
575
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
576
   *
577
   * @param  string $str
578
   * @param  bool   $url_encoded
579
   *
580
   * @return  string
581
   */
582 25
  public static function remove_invisible_characters($str, $url_encoded = true)
583
  {
584
    // init
585 25
    $non_displayables = array();
586
587
    // every control character except newline (dec 10),
588
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
589 25
    if ($url_encoded) {
590 25
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
591 25
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
592 25
    }
593
594 25
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
595
596
    do {
597 25
      $str = preg_replace($non_displayables, '', $str, -1, $count);
598 25
    } while ($count !== 0);
599
600 25
    return $str;
601
  }
602
603
  /**
604
   * Normalize the whitespace.
605
   *
606
   * @param string $str                  The string to be normalized.
607
   * @param bool   $keepNonBreakingSpace Set to true, to keep non-breaking-spaces.
608
   *
609
   * @return string
610
   */
611 8
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false)
612
  {
613 8
    static $whitespaces = array();
614
615 8
    if (!isset($whitespaces[$keepNonBreakingSpace])) {
616
617 2
      $whitespaces[$keepNonBreakingSpace] = self::$whitespaceTable;
618
619 2
      if ($keepNonBreakingSpace === true) {
620
        /** @noinspection OffsetOperationsInspection */
621 1
        unset($whitespaces[$keepNonBreakingSpace]['NO-BREAK SPACE']);
622 1
      }
623
624 2
      $whitespaces[$keepNonBreakingSpace] = array_values($whitespaces[$keepNonBreakingSpace]);
625 2
    }
626
627 8
    return str_replace($whitespaces[$keepNonBreakingSpace], ' ', $str);
628
  }
629
630
  /**
631
   * Returns an array with all utf8 whitespace characters.
632
   *
633
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
634
   *
635
   * @author: Derek E. [email protected]
636
   *
637
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
638
   *         as defined in above URL
639 1
   */
640
  public static function whitespace_table()
641 1
  {
642
    return self::$whitespaceTable;
643
  }
644
645
  /**
646
   * Normalize MS Word special characters.
647
   *
648
   * @param string $str The string to be normalized.
649
   *
650
   * @return string
651 2
   */
652
  public static function normalize_msword($str)
653 2
  {
654 2
    static $utf8MSWordKeys = null;
655
    static $utf8MSWordValues = null;
656 2
657 1
    if ($utf8MSWordKeys === null) {
658 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
659 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
660
    }
661 2
662
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
663
  }
664
665
  /**
666
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
667
   *
668
   * @param string $str
669
   *
670
   * @return string
671 7
   */
672
  public static function removeBOM($str = '')
673
  {
674
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
675 7
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
676 1
    /** @noinspection SubStrUsedAsStrPosInspection */
677 1 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
678
      $str = substr($str, 4);
679
    }
680 7
681 1
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
682 1
683
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
684
    /** @noinspection SubStrUsedAsStrPosInspection */
685 7 View Code Duplication
    if (substr($str, 0, 4) == @pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
686 2
      $str = substr($str, 4);
687 2
    }
688
689
    // UTF-8
690 7
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
691 1
    /** @noinspection SubStrUsedAsStrPosInspection */
692 1 View Code Duplication
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
693
      $str = substr($str, 3);
694
    }
695 7
696 1
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
697 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
698
    /** @noinspection SubStrUsedAsStrPosInspection */
699 7 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
700
      $str = substr($str, 2);
701
    }
702
703
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
704
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
705
    /** @noinspection SubStrUsedAsStrPosInspection */
706 View Code Duplication
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
707
      $str = substr($str, 2);
708
    }
709 2
710
    return $str;
711 2
  }
712 2
713 2
  /**
714
   * get data from "/data/*.ser"
715
   *
716
   * @param string $file
717
   *
718
   * @return bool|string|array|int false on error
719
   */
720
  protected static function getData($file)
721
  {
722
    $file = __DIR__ . '/data/' . $file . '.ser';
723
    if (file_exists($file)) {
724
      return unserialize(file_get_contents($file));
725
    } else {
726
      return false;
727
    }
728
  }
729
730
  /**
731
   * US-ASCII transliterations of Unicode text.
732
   *
733
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
734
   * Warning: you should only pass this well formed UTF-8!
735
   * Be aware it works by making a copy of the input string which it appends transliterated
736 8
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
737
   * requiring up to the same amount again as the input string
738 8
   *
739
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
740 8
   *
741
   * @author <[email protected]>
742 8
   *
743 2
   * @param string $str     UTF-8 string to convert
744
   * @param string $unknown Character use if character unknown. (default is ?)
745
   *
746 7
   * @return string US-ASCII string
747
   */
748 7
  public static function str_transliterate($str, $unknown = '?')
749 7
  {
750 7
    static $UTF8_TO_ASCII;
751
752 7
    $str = (string)$str;
753
754 7
    if (!isset($str[0])) {
755 6
      return '';
756
    }
757
758 4
    $str = self::clean($str);
759
760
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
761 4
    $chars = $ar[0];
762 4
    foreach ($chars as &$c) {
763 4
764
      $ordC0 = ord($c[0]);
765 4
766 3
      if ($ordC0 >= 0 && $ordC0 <= 127) {
767
        continue;
768 3
      }
769 3
770 3
      $ordC1 = ord($c[1]);
771
772 3
      // ASCII - next please
773
      if ($ordC0 >= 192 && $ordC0 <= 223) {
774
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
775
      }
776
777
      if ($ordC0 >= 224) {
778
        $ordC2 = ord($c[2]);
779
780
        if ($ordC0 <= 239) {
781
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
782
        }
783
784
        if ($ordC0 >= 240) {
785
          $ordC3 = ord($c[3]);
786
787
          if ($ordC0 <= 247) {
788
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
789
          }
790
791
          if ($ordC0 >= 248) {
792
            $ordC4 = ord($c[4]);
793
794 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
795 3
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
796
            }
797 4
798
            if ($ordC0 >= 252) {
799
              $ordC5 = ord($c[5]);
800
801 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
802 4
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
803
              }
804
            }
805
          }
806
        }
807 4
      }
808 4
809 2
      if ($ordC0 >= 254 && $ordC0 <= 255) {
810 2
        $c = $unknown;
811
        continue;
812 2
      }
813 2
814
      if (!isset($ord)) {
815
        $c = $unknown;
816 2
        continue;
817
      }
818 4
819 4
      $bank = $ord >> 8;
820 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
821 4
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
822
        if (file_exists($bankfile)) {
823
          /** @noinspection PhpIncludeInspection */
824 7
          include $bankfile;
825
        } else {
826 7
          $UTF8_TO_ASCII[$bank] = array();
827
        }
828
      }
829
830
      $newchar = $ord & 255;
831
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
832
        $c = $UTF8_TO_ASCII[$bank][$newchar];
833
      } else {
834
        $c = $unknown;
835
      }
836
    }
837
838
    return implode('', $chars);
839
  }
840
841
  /**
842
   * Echo native UTF8-Support libs, e.g. for debugging.
843
   */
844
  public static function showSupport()
845
  {
846
    foreach (self::$support as $utf8Support) {
847
      echo $utf8Support . "\n<br>";
848
    }
849
  }
850
851
  /**
852
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
853
   *
854
   * @link http://php.net/manual/en/function.htmlentities.php
855
   *
856
   * @param string $string        <p>
857
   *                              The input string.
858
   *                              </p>
859
   * @param int    $flags         [optional] <p>
860
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
861
   *                              invalid code unit sequences and the used document type. The default is
862
   *                              ENT_COMPAT | ENT_HTML401.
863
   *                              <table>
864
   *                              Available <i>flags</i> constants
865
   *                              <tr valign="top">
866
   *                              <td>Constant Name</td>
867
   *                              <td>Description</td>
868
   *                              </tr>
869
   *                              <tr valign="top">
870
   *                              <td><b>ENT_COMPAT</b></td>
871
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
872
   *                              </tr>
873
   *                              <tr valign="top">
874
   *                              <td><b>ENT_QUOTES</b></td>
875
   *                              <td>Will convert both double and single quotes.</td>
876
   *                              </tr>
877
   *                              <tr valign="top">
878
   *                              <td><b>ENT_NOQUOTES</b></td>
879
   *                              <td>Will leave both double and single quotes unconverted.</td>
880
   *                              </tr>
881
   *                              <tr valign="top">
882
   *                              <td><b>ENT_IGNORE</b></td>
883
   *                              <td>
884
   *                              Silently discard invalid code unit sequences instead of returning
885
   *                              an empty string. Using this flag is discouraged as it
886
   *                              may have security implications.
887
   *                              </td>
888
   *                              </tr>
889
   *                              <tr valign="top">
890
   *                              <td><b>ENT_SUBSTITUTE</b></td>
891
   *                              <td>
892
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
893
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
894
   *                              </td>
895
   *                              </tr>
896
   *                              <tr valign="top">
897
   *                              <td><b>ENT_DISALLOWED</b></td>
898
   *                              <td>
899
   *                              Replace invalid code points for the given document type with a
900
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
901
   *                              (otherwise) instead of leaving them as is. This may be useful, for
902
   *                              instance, to ensure the well-formedness of XML documents with
903
   *                              embedded external content.
904
   *                              </td>
905
   *                              </tr>
906
   *                              <tr valign="top">
907
   *                              <td><b>ENT_HTML401</b></td>
908
   *                              <td>
909
   *                              Handle code as HTML 4.01.
910
   *                              </td>
911
   *                              </tr>
912
   *                              <tr valign="top">
913
   *                              <td><b>ENT_XML1</b></td>
914
   *                              <td>
915
   *                              Handle code as XML 1.
916
   *                              </td>
917
   *                              </tr>
918
   *                              <tr valign="top">
919
   *                              <td><b>ENT_XHTML</b></td>
920
   *                              <td>
921
   *                              Handle code as XHTML.
922
   *                              </td>
923
   *                              </tr>
924
   *                              <tr valign="top">
925
   *                              <td><b>ENT_HTML5</b></td>
926
   *                              <td>
927
   *                              Handle code as HTML 5.
928
   *                              </td>
929
   *                              </tr>
930
   *                              </table>
931
   *                              </p>
932
   * @param string $encoding      [optional] <p>
933
   *                              Like <b>htmlspecialchars</b>,
934
   *                              <b>htmlentities</b> takes an optional third argument
935
   *                              <i>encoding</i> which defines encoding used in
936
   *                              conversion.
937
   *                              Although this argument is technically optional, you are highly
938
   *                              encouraged to specify the correct value for your code.
939
   *                              </p>
940
   * @param bool   $double_encode [optional] <p>
941
   *                              When <i>double_encode</i> is turned off PHP will not
942
   *                              encode existing html entities. The default is to convert everything.
943
   *                              </p>
944 2
   *
945
   *
946 2
   * @return string the encoded string.
947
   * </p>
948
   * <p>
949
   * If the input <i>string</i> contains an invalid code unit
950
   * sequence within the given <i>encoding</i> an empty string
951
   * will be returned, unless either the <b>ENT_IGNORE</b> or
952
   * <b>ENT_SUBSTITUTE</b> flags are set.
953
   */
954
  public static function htmlentities($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
955
  {
956
    return htmlentities($string, $flags, $encoding, $double_encode);
957
  }
958
959
  /**
960
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
961
   *
962
   * @link http://php.net/manual/en/function.htmlspecialchars.php
963
   *
964
   * @param string $string        <p>
965
   *                              The string being converted.
966
   *                              </p>
967
   * @param int    $flags         [optional] <p>
968
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
969
   *                              invalid code unit sequences and the used document type. The default is
970
   *                              ENT_COMPAT | ENT_HTML401.
971
   *                              <table>
972
   *                              Available <i>flags</i> constants
973
   *                              <tr valign="top">
974
   *                              <td>Constant Name</td>
975
   *                              <td>Description</td>
976
   *                              </tr>
977
   *                              <tr valign="top">
978
   *                              <td><b>ENT_COMPAT</b></td>
979
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
980
   *                              </tr>
981
   *                              <tr valign="top">
982
   *                              <td><b>ENT_QUOTES</b></td>
983
   *                              <td>Will convert both double and single quotes.</td>
984
   *                              </tr>
985
   *                              <tr valign="top">
986
   *                              <td><b>ENT_NOQUOTES</b></td>
987
   *                              <td>Will leave both double and single quotes unconverted.</td>
988
   *                              </tr>
989
   *                              <tr valign="top">
990
   *                              <td><b>ENT_IGNORE</b></td>
991
   *                              <td>
992
   *                              Silently discard invalid code unit sequences instead of returning
993
   *                              an empty string. Using this flag is discouraged as it
994
   *                              may have security implications.
995
   *                              </td>
996
   *                              </tr>
997
   *                              <tr valign="top">
998
   *                              <td><b>ENT_SUBSTITUTE</b></td>
999
   *                              <td>
1000
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
1001
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
1002
   *                              </td>
1003
   *                              </tr>
1004
   *                              <tr valign="top">
1005
   *                              <td><b>ENT_DISALLOWED</b></td>
1006
   *                              <td>
1007
   *                              Replace invalid code points for the given document type with a
1008
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1009
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1010
   *                              instance, to ensure the well-formedness of XML documents with
1011
   *                              embedded external content.
1012
   *                              </td>
1013
   *                              </tr>
1014
   *                              <tr valign="top">
1015
   *                              <td><b>ENT_HTML401</b></td>
1016
   *                              <td>
1017
   *                              Handle code as HTML 4.01.
1018
   *                              </td>
1019
   *                              </tr>
1020
   *                              <tr valign="top">
1021
   *                              <td><b>ENT_XML1</b></td>
1022
   *                              <td>
1023
   *                              Handle code as XML 1.
1024
   *                              </td>
1025
   *                              </tr>
1026
   *                              <tr valign="top">
1027
   *                              <td><b>ENT_XHTML</b></td>
1028
   *                              <td>
1029
   *                              Handle code as XHTML.
1030
   *                              </td>
1031
   *                              </tr>
1032
   *                              <tr valign="top">
1033
   *                              <td><b>ENT_HTML5</b></td>
1034
   *                              <td>
1035
   *                              Handle code as HTML 5.
1036
   *                              </td>
1037
   *                              </tr>
1038
   *                              </table>
1039
   *                              </p>
1040
   * @param string $encoding      [optional] <p>
1041
   *                              Defines encoding used in conversion.
1042
   *                              </p>
1043
   *                              <p>
1044
   *                              For the purposes of this function, the encodings
1045
   *                              ISO-8859-1, ISO-8859-15,
1046
   *                              UTF-8, cp866,
1047
   *                              cp1251, cp1252, and
1048
   *                              KOI8-R are effectively equivalent, provided the
1049
   *                              <i>string</i> itself is valid for the encoding, as
1050
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1051
   *                              the same positions in all of these encodings.
1052
   *                              </p>
1053
   * @param bool   $double_encode [optional] <p>
1054
   *                              When <i>double_encode</i> is turned off PHP will not
1055
   *                              encode existing html entities, the default is to convert everything.
1056
   *                              </p>
1057
   *
1058 1
   * @return string The converted string.
1059
   * </p>
1060 1
   * <p>
1061
   * If the input <i>string</i> contains an invalid code unit
1062
   * sequence within the given <i>encoding</i> an empty string
1063
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1064
   * <b>ENT_SUBSTITUTE</b> flags are set.
1065
   */
1066
  public static function htmlspecialchars($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1067
  {
1068
    return htmlspecialchars($string, $flags, $encoding, $double_encode);
1069
  }
1070 16
1071
  /**
1072 16
   * alias for "UTF8::is_utf8"
1073
   *
1074
   * @param string $str
1075
   *
1076
   * @return bool
1077
   */
1078
  public static function isUtf8($str)
1079
  {
1080
    return self::is_utf8($str);
1081
  }
1082
1083
  /**
1084
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
1085
   *
1086
   * @see    http://hsivonen.iki.fi/php-utf8/
1087 31
   *
1088
   * @param    string $str The string to be checked.
1089 31
   *
1090
   * @return   bool
1091 31
   */
1092 3
  public static function is_utf8($str)
1093
  {
1094
    $str = (string)$str;
1095 29
1096
    if (!isset($str[0])) {
1097
      return true;
1098
    }
1099
1100
    if (self::pcre_utf8_support() !== true) {
1101
1102 29
      // If even just the first character can be matched, when the /u
1103
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
1104 29
      // invalid, nothing at all will match, even if the string contains
1105 29
      // some valid sequences
1106 29
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
1107 29
1108 29
    } else {
1109 29
1110
      $mState = 0; // cached expected number of octets after the current octet
1111
      // until the beginning of the next UTF8 character sequence
1112 29
      $mUcs4 = 0; // cached Unicode character
1113
      $mBytes = 1; // cached expected number of octets in the current sequence
1114 27
      $len = strlen($str);
1115 29
1116
      /** @noinspection ForeachInvariantsInspection */
1117 25
      for ($i = 0; $i < $len; $i++) {
1118 25
        $in = ord($str[$i]);
1119 25
        if ($mState == 0) {
1120 25
          // When mState is zero we expect either a US-ASCII character or a
1121 27
          // multi-octet sequence.
1122
          if (0 == (0x80 & $in)) {
1123 11
            // US-ASCII, pass straight through.
1124 11
            $mBytes = 1;
1125 11 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1126 11
            // First octet of 2 octet sequence.
1127 21
            $mUcs4 = $in;
1128
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
1129 5
            $mState = 1;
1130 5
            $mBytes = 2;
1131 5
          } elseif (0xE0 == (0xF0 & $in)) {
1132 5
            // First octet of 3 octet sequence.
1133 11
            $mUcs4 = $in;
1134
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
1135
            $mState = 2;
1136
            $mBytes = 3;
1137 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1138
            // First octet of 4 octet sequence.
1139
            $mUcs4 = $in;
1140
            $mUcs4 = ($mUcs4 & 0x07) << 18;
1141
            $mState = 3;
1142 3
            $mBytes = 4;
1143 3
          } elseif (0xF8 == (0xFC & $in)) {
1144 3
            /* First octet of 5 octet sequence.
1145 3
            *
1146 7
            * This is illegal because the encoded codepoint must be either
1147
            * (a) not the shortest form or
1148 3
            * (b) outside the Unicode range of 0-0x10FFFF.
1149 3
            * Rather than trying to resynchronize, we will carry on until the end
1150 3
            * of the sequence and let the later error handling code catch it.
1151 3
            */
1152 3
            $mUcs4 = $in;
1153
            $mUcs4 = ($mUcs4 & 0x03) << 24;
1154
            $mState = 4;
1155
            $mBytes = 5;
1156 3 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1157
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
1158 29
            $mUcs4 = $in;
1159
            $mUcs4 = ($mUcs4 & 1) << 30;
1160
            $mState = 5;
1161 27
            $mBytes = 6;
1162
          } else {
1163 25
            /* Current octet is neither in the US-ASCII range nor a legal first
1164 25
             * octet of a multi-octet sequence.
1165 25
             */
1166 25
            return false;
1167
          }
1168
        } else {
1169
          // When mState is non-zero, we expect a continuation of the multi-octet
1170
          // sequence
1171 25
          if (0x80 == (0xC0 & $in)) {
1172
            // Legal continuation.
1173
            $shift = ($mState - 1) * 6;
1174
            $tmp = $in;
1175
            $tmp = ($tmp & 0x0000003F) << $shift;
1176
            $mUcs4 |= $tmp;
1177 25
            /**
1178 25
             * End of the multi-octet sequence. mUcs4 now contains the final
1179 25
             * Unicode code point to be output
1180 25
             */
1181
            if (0 == --$mState) {
1182 25
              /*
1183
              * Check for illegal sequences and code points.
1184 25
              */
1185 25
              // From Unicode 3.1, non-shortest form is illegal
1186 5
              if (
1187
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
1188
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
1189 25
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
1190 25
                  (4 < $mBytes) ||
1191 25
                  // From Unicode 3.2, surrogate characters are illegal.
1192 25
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
1193 25
                  // Code points outside the Unicode range are illegal.
1194
                  ($mUcs4 > 0x10FFFF)
1195
              ) {
1196
                return false;
1197
              }
1198 13
              // initialize UTF8 cache
1199
              $mState = 0;
1200
              $mUcs4 = 0;
1201 29
              $mBytes = 1;
1202
            }
1203 11
          } else {
1204
            /**
1205
             *((0xC0 & (*in) != 0x80) && (mState != 0))
1206
             * Incomplete multi-octet sequence.
1207
             */
1208
            return false;
1209
          }
1210
        }
1211
      }
1212
1213
      return true;
1214
    }
1215
  }
1216
1217
  /**
1218 6
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
1219
   * mask.
1220 6
   *
1221
   * @param string $s
1222
   * @param string $mask
1223
   * @param int    $start
1224 6
   * @param int    $len
1225
   *
1226
   * @return int|null
1227
   */
1228
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
1229
  {
1230
    if ($start || 2147483647 != $len) {
1231
      $s = self::substr($s, $start, $len);
1232
    }
1233
1234
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
1235
  }
1236
1237
  /**
1238
   * Get part of a string.
1239
   *
1240
   * @link http://php.net/manual/en/function.mb-substr.php
1241
   *
1242
   * @param string  $str       <p>
1243
   *                           The string being checked.
1244
   *                           </p>
1245
   * @param int     $start     <p>
1246
   *                           The first position used in str.
1247 37
   *                           </p>
1248
   * @param int     $length    [optional] <p>
1249 37
   *                           The maximum length of the returned string.
1250
   *                           </p>
1251 37
   * @param string  $encoding
1252
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1253 37
   *
1254 9
   * @return string mb_substr returns the portion of
1255
   * str specified by the start and length parameters.
1256
   */
1257
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
1258 35
  {
1259
    static $bug62759;
1260 35
1261
    $str = (string)$str;
1262
1263
    if (!isset($str[0])) {
1264 1
      return '';
1265 1
    }
1266
1267 35
    // init
1268 21
    self::checkForSupport();
1269 21
1270 31
    if ($cleanUtf8 === true) {
1271
      // iconv and mbstring are not tolerant to invalid encoding
1272
      // further, their behaviour is inconsistent with that of PHP's substr
1273 35
1274
      $str = self::clean($str);
1275
    }
1276 35
1277 1
    if ($length === null) {
1278 1
      $length = (int)self::strlen($str);
1279
    } else {
1280 35
      $length = (int)$length;
1281
    }
1282
1283
    if (self::$support['mbstring'] === true) {
1284
1285
      // INFO: this is only a fallback for old versions
1286
      if ($encoding === true || $encoding === false) {
1287
        $encoding = 'UTF-8';
1288
      }
1289
1290
      return mb_substr($str, $start, $length, $encoding);
1291
    }
1292
1293
    if (self::$support['iconv'] === true) {
1294
1295
      if (!isset($bug62759)) {
1296
        $bug62759 = ('à' === grapheme_substr('éà', 1, -2));
1297
      }
1298
1299
      if ($bug62759) {
1300
        return (string)Intl::grapheme_substr_workaround62759($str, $start, $length);
1301
      } else {
1302
        return (string)grapheme_substr($str, $start, $length);
1303
      }
1304
    }
1305
1306
    // fallback
1307
1308
    // split to array, and remove invalid characters
1309
    $array = self::split($str);
1310
1311
    // extract relevant part, and join to make sting again
1312
    return implode(array_slice($array, $start, $length));
1313
  }
1314
1315
  /**
1316
   * Get the string length, not the byte-length!
1317
   *
1318
   * @link     http://php.net/manual/en/function.mb-strlen.php
1319 63
   *
1320
   * @param string  $string    The string being checked for length.
1321 63
   * @param string  $encoding  Set the charset for e.g. "mb_" function
1322
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1323 63
   *
1324 4
   * @return int the number of characters in
1325
   *           string str having character encoding
1326
   *           encoding. A multi-byte character is
1327
   *           counted as 1.
1328 62
   */
1329
  public static function strlen($string, $encoding = 'UTF-8', $cleanUtf8 = false)
1330
  {
1331 62
    $string = (string)$string;
1332
1333
    if (!isset($string[0])) {
1334
      return 0;
1335 62
    }
1336
1337
    // init
1338 62
    self::checkForSupport();
1339
1340
    // INFO: this is only a fallback for old versions
1341 62
    if ($encoding === true || $encoding === false) {
1342
      $encoding = 'UTF-8';
1343
    }
1344
1345
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
1346
      $str = self::clean($string);
1347
    } else {
1348
      $str = $string;
1349
    }
1350
1351
    return mb_strlen($str, $encoding);
1352
  }
1353 24
1354
  /**
1355 24
   * Convert a string to an array of Unicode characters.
1356
   *
1357 24
   * @param    string  $str       The string to split into array.
1358 5
   * @param    int     $length    Max character length of each array element.
1359
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
1360
   *
1361
   * @return   array An array containing chunks of the string.
1362 23
   */
1363 23
  public static function split($str, $length = 1, $cleanUtf8 = false)
1364 23
  {
1365
    $str = (string)$str;
1366 23
1367
    if (!isset($str[0])) {
1368 23
      return array();
1369
    }
1370
1371
    // init
1372 23
    self::checkForSupport();
1373 23
    $str = (string)$str;
1374 23
    $ret = array();
1375 23
1376 23
    if (self::$support['pcre_utf8'] === true) {
1377
1378 23
      if ($cleanUtf8 === true) {
1379
        $str = self::clean($str);
1380
      }
1381
1382
      preg_match_all('/./us', $str, $retArray);
1383
      if (isset($retArray[0])) {
1384
        $ret = $retArray[0];
1385
      }
1386
      unset($retArray);
1387
1388
    } else {
1389
1390
      // fallback
1391
1392
      $len = strlen($str);
1393
1394
      /** @noinspection ForeachInvariantsInspection */
1395
      for ($i = 0; $i < $len; $i++) {
1396
        if (($str[$i] & "\x80") === "\x00") {
1397
          $ret[] = $str[$i];
1398
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
1399
          if (($str[$i + 1] & "\xC0") === "\x80") {
1400
            $ret[] = $str[$i] . $str[$i + 1];
1401
1402
            $i++;
1403
          }
1404 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1405
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
1406
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
1407
1408
            $i += 2;
1409 23
          }
1410 5
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
1411 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1412 5
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
1413 5
1414
            $i += 3;
1415 23
          }
1416
        }
1417
      }
1418
    }
1419 23
1420
    if ($length > 1) {
1421
      $ret = array_chunk($ret, $length);
1422
1423
      $ret = array_map('implode', $ret);
1424
    }
1425
1426
    if (isset($ret[0]) && $ret[0] === '') {
1427
      return array();
1428
    }
1429
1430 40
    return $ret;
1431
  }
1432 40
1433
  /**
1434 40
   * rxClass
1435
   *
1436 40
   * @param string $s
1437 30
   * @param string $class
1438
   *
1439
   * @return string
1440 16
   */
1441
  protected static function rxClass($s, $class = '')
1442 16
  {
1443 15
    static $rxClassCache = array();
1444
1445 15
    $cacheKey = $s . $class;
1446 14
1447 15
    if (isset($rxClassCache[$cacheKey])) {
1448 1
      return $rxClassCache[$cacheKey];
1449 1
    }
1450
1451
    $class = array($class);
1452 16
1453
    foreach (self::str_split($s) as $s) {
1454 16
      if ('-' === $s) {
1455
        $class[0] = '-' . $class[0];
1456 16
      } elseif (!isset($s[2])) {
1457 16
        $class[0] .= preg_quote($s, '/');
1458 16
      } elseif (1 === self::strlen($s)) {
1459
        $class[0] .= $s;
1460
      } else {
1461
        $class[] = $s;
1462 16
      }
1463
    }
1464 16
1465
    $class[0] = '[' . $class[0] . ']';
1466
1467
    if (1 === count($class)) {
1468
      $return = $class[0];
1469
    } else {
1470
      $return = '(?:' . implode('|', $class) . ')';
1471
    }
1472
1473
    $rxClassCache[$cacheKey] = $return;
1474
1475 17
    return $return;
1476
  }
1477
1478 17
  /**
1479
   * Convert a string to an array.
1480 17
   *
1481
   * @param string $string
1482
   * @param int    $len
1483
   *
1484
   * @return array
1485
   */
1486 17
  public static function str_split($string, $len = 1)
1487 17
  {
1488 17
    // init
1489 17
    self::checkForSupport();
1490 17
1491 16
    if (1 > $len = (int)$len) {
1492 16
      $len = func_get_arg(1);
1493 17
1494
      return str_split($string, $len);
1495
    }
1496
1497
    if (self::$support['intl'] === true) {
1498 17
      $a = array();
1499 17
      $p = 0;
1500
      $l = strlen($string);
1501
      while ($p < $l) {
1502 1
        $a[] = grapheme_extract($string, 1, GRAPHEME_EXTR_COUNT, $p, $p);
1503 1
      }
1504
    } else {
1505
      preg_match_all('/' . GRAPHEME_CLUSTER_RX . '/u', $string, $a);
1506 1
      $a = $a[0];
1507 1
    }
1508 1
1509 1
    if (1 == $len) {
1510 1
      return $a;
1511
    }
1512 1
1513
    $arrayOutput = array();
1514 1
    $p = -1;
1515
1516
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
1517
    foreach ($a as $l => $a) {
1518
      if ($l % $len) {
1519
        $arrayOutput[$p] .= $a;
1520
      } else {
1521
        $arrayOutput[++$p] = $a;
1522
      }
1523
    }
1524 1
1525
    return $arrayOutput;
1526
  }
1527 1
1528
  /**
1529 1
   * Return the width of a string.
1530
   *
1531
   * @param string $s
1532
   *
1533
   * @return int
1534
   */
1535
  public static function strwidth($s)
1536
  {
1537
    // init
1538
    self::checkForSupport();
1539
1540
    return mb_strwidth($s, 'UTF-8');
1541
  }
1542 5
1543
  /**
1544 5
   * Limit the number of words in a string.
1545
   *
1546
   * @param  string $str
1547
   * @param  int    $words
1548 5
   * @param  string $strAddOn
1549
   *
1550
   * @return string
1551 5
   */
1552
  public static function words_limit($str, $words = 100, $strAddOn = '...')
1553
  {
1554
    if (!isset($str[0])) {
1555 5
      return '';
1556 5
    }
1557
1558
    $words = (int)$words;
1559
1560
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
1561
1562
    if (
1563
        !isset($matches[0])
1564
        ||
1565
        self::strlen($str) === self::strlen($matches[0])
1566
    ) {
1567
      return $str;
1568
    }
1569
1570
    return self::rtrim($matches[0]) . $strAddOn;
1571
  }
1572 1
1573
  /**
1574 1
   * Limit the number of characters in a string.
1575
   *
1576
   * @param  string $str
1577
   * @param  int    $length
1578
   * @param  string $strAddOn
1579
   *
1580
   * @return string
1581
   */
1582
  public static function str_limit($str, $length = 100, $strAddOn = '...')
1583
  {
1584
    if (!isset($str[0])) {
1585
      return '';
1586 7
    }
1587
1588 7
    $length = (int)$length;
1589 2
1590
    if (self::strlen($str) <= $length) {
1591
      return $str;
1592
    }
1593 5
1594
    if (self::substr($str, $length - 1, 1) === ' ') {
1595 5
      return self::substr($str, 0, $length - 1) . $strAddOn;
1596
    }
1597
1598
    $str = self::substr($str, 0, $length);
1599
    $array = explode(' ', $str);
1600
    array_pop($array);
1601
    $new_str = implode(' ', $array);
1602
1603
    if ($new_str == '') {
1604
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
1605
    } else {
1606
      $str = $new_str . $strAddOn;
1607 1
    }
1608
1609 1
    return $str;
1610
  }
1611
1612
  /**
1613
   * Find length of initial segment not matching mask.
1614
   *
1615
   * @param string $str
1616
   * @param string $charlist
1617
   * @param int    $start
1618
   * @param int    $len
1619
   *
1620
   * @return int|null
1621 2
   */
1622
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
1623 2
  {
1624
    if ('' === $charlist .= '') {
1625
      return null;
1626
    }
1627
1628
    if ($start || 2147483647 != $len) {
1629
      $str = (string)self::substr($str, $start, $len);
1630
    } else {
1631
      $str = (string)$str;
1632
    }
1633 3
1634
    /* @var $len array */
1635 3
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
1636
      return self::strlen($len[1]);
1637
    } else {
1638
      return self::strlen($str);
1639
    }
1640
  }
1641
1642
  /**
1643
   * Checks if the number of Unicode characters in a string are not
1644
   * more than the specified integer.
1645
   *
1646
   * @param    string $str      The original string to be checked.
1647
   * @param    int    $box_size The size in number of chars to be checked against string.
1648 10
   *
1649
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
1650 10
   */
1651 10
  public static function fits_inside($str, $box_size)
1652 10
  {
1653
    return (self::strlen($str) <= $box_size);
1654 10
  }
1655 1
1656 1
  /**
1657 1
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
1658
   *
1659 10
   * @param string $string
1660
   * @param string $needle
1661 10
   * @param bool   $before_needle
1662
   *
1663 10
   * @return false|string
1664 1
   */
1665 1
  public static function stristr($string, $needle, $before_needle = false)
1666
  {
1667
    if ('' === $needle .= '') {
1668 10
      return false;
1669 10
    }
1670
1671 10
    // init
1672
    self::checkForSupport();
1673
1674
    return mb_stristr($string, $needle, $before_needle, 'UTF-8');
1675
  }
1676
1677
  /**
1678
   * Case insensitive string comparisons using a "natural order" algorithm.
1679
   *
1680
   * @param string $str1
1681
   * @param string $str2
1682
   *
1683
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
1684
   *             str1 is greater than str2, and 0 if they are equal.
1685
   */
1686
  public static function strnatcasecmp($str1, $str2)
1687 19
  {
1688
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
1689 19
  }
1690
1691 19
  /**
1692 5
   * String comparisons using a "natural order" algorithm.
1693
   *
1694
   * @param string $str1
1695
   * @param string $str2
1696 17
   *
1697
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
1698 17
   *             str1 is greater than str2, and 0 if they are equal.
1699
   */
1700
  public static function strnatcmp($str1, $str2)
1701
  {
1702
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
1703
  }
1704
1705
  /**
1706
   * Generic case sensitive transformation for collation matching.
1707
   *
1708
   * @param string $s
1709
   *
1710 1
   * @return string
1711
   */
1712 1
  protected static function strtonatfold($s)
1713
  {
1714 1
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
1715 1
  }
1716
1717
  /**
1718 1
   * Unicode transformation for case-less matching.
1719
   *
1720 1
   * @link http://unicode.org/reports/tr21/tr21-5.html
1721
   *
1722 1
   * @param string $string
1723 1
   * @param bool   $full
1724 1
   *
1725 1
   * @return string
1726
   */
1727 1
  public static function strtocasefold($string, $full = true)
1728 1
  {
1729 1
    static $fullCaseFold = null;
1730
    static $commonCaseFoldKeys = null;
1731 1
    static $commonCaseFoldValues = null;
1732
1733
    if ($commonCaseFoldKeys === null) {
1734
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
1735
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
1736
    }
1737
1738
    $string = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $string);
1739
1740
    if ($full) {
1741 8
1742
      if ($fullCaseFold === null) {
1743 8
        $fullCaseFold = self::getData('caseFolding_full');
1744 8
      }
1745
1746 8
      /** @noinspection OffsetOperationsInspection */
1747
      $string = str_replace($fullCaseFold[0], $fullCaseFold[1], $string);
1748 8
    }
1749 2
1750
    return self::strtolower($string);
1751
  }
1752 8
1753 1
  /**
1754 1
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
1755 1
   * Make a string lowercase.
1756
   *
1757 8
   * @link http://php.net/manual/en/function.mb-strtolower.php
1758
   *
1759
   * @param string $str <p>
1760
   *                    The string being lowercased.
1761
   *                    </p>
1762
   * @param string $encoding
1763
   *
1764
   * @return string str with all alphabetic characters converted to lowercase.
1765
   */
1766
  public static function strtolower($str, $encoding = 'UTF-8')
1767
  {
1768
    $str = (string)$str;
1769
1770
    if (!isset($str[0])) {
1771
      return '';
1772
    }
1773
1774
    // init
1775
    self::checkForSupport();
1776
1777
    return mb_strtolower($str, $encoding);
1778
  }
1779
1780
  /**
1781
   * Multi decode html entity & fix urlencoded-win1252-chars.
1782
   *
1783
   * e.g:
1784
   * 'D&#252;sseldorf'               => 'Düsseldorf'
1785
   * 'D%FCsseldorf'                  => 'Düsseldorf'
1786
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
1787
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
1788
   * 'Düsseldorf'                   => 'Düsseldorf'
1789
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
1790
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
1791
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
1792
   *
1793
   * @param string $str
1794
   *
1795
   * @return string
1796
   */
1797
  public static function urldecode($str)
1798
  {
1799
    $str = (string)$str;
1800
1801
    if (!isset($str[0])) {
1802
      return '';
1803
    }
1804
1805
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
1806
1807
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
1808
1809
    $str = self::fix_simple_utf8(
1810
        rawurldecode(
1811
            self::html_entity_decode(
1812
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
1813
                $flags
1814
            )
1815
        )
1816
    );
1817
1818
    return (string)$str;
1819
  }
1820
1821
  /**
1822
   * Fixing a broken UTF-8 string.
1823
   *
1824
   * @param string $str
1825
   *
1826
   * @return string
1827
   */
1828
  public static function fix_simple_utf8($str)
1829
  {
1830 14
    static $brokenUtf8ToUtf8Keys = null;
1831
    static $brokenUtf8ToUtf8Values = null;
1832 14
1833
    $str = (string)$str;
1834 14
1835 3
    if (!isset($str[0])) {
1836
      return '';
1837
    }
1838 14
1839 4
    if ($brokenUtf8ToUtf8Keys === null) {
1840
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
1841
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
1842 14
    }
1843 2
1844 2
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
1845 2
  }
1846
1847
  /**
1848 2
   * UTF-8 version of html_entity_decode()
1849
   *
1850
   * The reason we are not using html_entity_decode() by itself is because
1851 14
   * while it is not technically correct to leave out the semicolon
1852
   * at the end of an entity most browsers will still interpret the entity
1853
   * correctly. html_entity_decode() does not convert entities without
1854 14
   * semicolons, so we are left with our own little solution here. Bummer.
1855 14
   *
1856 14
   * Convert all HTML entities to their applicable characters
1857
   *
1858 14
   * @link http://php.net/manual/en/function.html-entity-decode.php
1859 14
   *
1860
   * @param string $string   <p>
1861 14
   *                         The input string.
1862
   *                         </p>
1863
   * @param int    $flags    [optional] <p>
1864
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1865
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1866
   *                         <table>
1867
   *                         Available <i>flags</i> constants
1868
   *                         <tr valign="top">
1869
   *                         <td>Constant Name</td>
1870
   *                         <td>Description</td>
1871
   *                         </tr>
1872
   *                         <tr valign="top">
1873
   *                         <td><b>ENT_COMPAT</b></td>
1874
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1875
   *                         </tr>
1876
   *                         <tr valign="top">
1877
   *                         <td><b>ENT_QUOTES</b></td>
1878
   *                         <td>Will convert both double and single quotes.</td>
1879
   *                         </tr>
1880
   *                         <tr valign="top">
1881
   *                         <td><b>ENT_NOQUOTES</b></td>
1882
   *                         <td>Will leave both double and single quotes unconverted.</td>
1883
   *                         </tr>
1884
   *                         <tr valign="top">
1885
   *                         <td><b>ENT_HTML401</b></td>
1886
   *                         <td>
1887
   *                         Handle code as HTML 4.01.
1888
   *                         </td>
1889
   *                         </tr>
1890 20
   *                         <tr valign="top">
1891
   *                         <td><b>ENT_XML1</b></td>
1892 20
   *                         <td>
1893 2
   *                         Handle code as XML 1.
1894
   *                         </td>
1895 2
   *                         </tr>
1896 2
   *                         <tr valign="top">
1897
   *                         <td><b>ENT_XHTML</b></td>
1898 2
   *                         <td>
1899
   *                         Handle code as XHTML.
1900
   *                         </td>
1901 20
   *                         </tr>
1902
   *                         <tr valign="top">
1903 20
   *                         <td><b>ENT_HTML5</b></td>
1904 9
   *                         <td>
1905
   *                         Handle code as HTML 5.
1906
   *                         </td>
1907 20
   *                         </tr>
1908
   *                         </table>
1909 20
   *                         </p>
1910 20
   * @param string $encoding [optional] <p>
1911 20
   *                         Encoding to use.
1912
   *                         </p>
1913 20
   *
1914 20
   * @return string the decoded string.
1915 20
   */
1916 20
  public static function html_entity_decode($string, $flags = null, $encoding = 'UTF-8')
1917
  {
1918 20
    $string = (string)$string;
1919
1920 18
    if (!isset($string[0])) {
1921 17
      return '';
1922 17
    }
1923 17
1924 5
    if (strpos($string, '&') === false) {
1925 5
      return $string;
1926 5
    }
1927
1928
    if ($flags === null) {
1929 20
      if (Bootup::is_php('5.4') === true) {
1930
        $flags = ENT_COMPAT | ENT_HTML5;
1931 18
      } else {
1932 14
        $flags = ENT_COMPAT;
1933 14
      }
1934 14
    }
1935 8
1936 8
    do {
1937 8
      $str_compare = $string;
1938
1939
      // decode numeric & UTF16 two byte entities
1940 19
      $string = html_entity_decode(
1941
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $string),
1942 7
          $flags,
1943 1
          $encoding
1944 1
      );
1945 1
    } while ($str_compare !== $string);
1946 6
1947 6
    return $string;
1948 6
  }
1949
1950
  /**
1951 7
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
1952 6
   *
1953 6
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
1954 6
   *
1955
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
1956
   *
1957 20
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
1958
   *    are followed by any of these:  ("group B")
1959 2
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
1960 2
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
1961
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
1962
   * is also a valid unicode character, and will be left unchanged.
1963 2
   *
1964 2
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
1965 2
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
1966
   *
1967
   * @param string $string Any string or array.
1968 2
   *
1969 18
   * @return string The same string, but UTF8 encoded.
1970
   */
1971 20
  public static function to_utf8($string)
1972
  {
1973 20 View Code Duplication
    if (is_array($string)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1974
      foreach ($string as $k => $v) {
1975
        /** @noinspection AlterInForeachInspection */
1976 20
        $string[$k] = self::to_utf8($v);
1977 20
      }
1978
1979 3
      return $string;
1980 20
    }
1981
1982 20
    $string = (string)$string;
1983
1984
    if (!isset($string[0])) {
1985 20
      return $string;
1986 20
    }
1987 20
1988 2
    $max = self::strlen($string, '8bit');
1989 20
1990
    $buf = '';
1991 20
    /** @noinspection ForeachInvariantsInspection */
1992
    for ($i = 0; $i < $max; $i++) {
1993 20
      $c1 = $string[$i];
1994
1995
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
1996
        $c2 = $i + 1 >= $max ? "\x00" : $string[$i + 1];
1997
        $c3 = $i + 2 >= $max ? "\x00" : $string[$i + 2];
1998
        $c4 = $i + 3 >= $max ? "\x00" : $string[$i + 3];
1999
2000
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
2001
2002
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
2003
            $buf .= $c1 . $c2;
2004
            $i++;
2005
          } else { // not valid UTF8 - convert it
2006
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
2007
            $cc2 = ($c1 & "\x3f") | "\x80";
2008
            $buf .= $cc1 . $cc2;
2009
          }
2010
2011 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2012
2013
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
2014
            $buf .= $c1 . $c2 . $c3;
2015
            $i += 2;
2016
          } else { // not valid UTF8 - convert it
2017
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
2018
            $cc2 = ($c1 & "\x3f") | "\x80";
2019
            $buf .= $cc1 . $cc2;
2020
          }
2021
2022
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
2023
2024 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2025
            $buf .= $c1 . $c2 . $c3 . $c4;
2026
            $i += 3;
2027
          } else { // not valid UTF8 - convert it
2028
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
2029
            $cc2 = ($c1 & "\x3f") | "\x80";
2030
            $buf .= $cc1 . $cc2;
2031
          }
2032
2033
        } else { // doesn't look like UTF8, but should be converted
2034
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
2035
          $cc2 = (($c1 & "\x3f") | "\x80");
2036
          $buf .= $cc1 . $cc2;
2037
        }
2038
2039
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
2040
2041
        $ordC1 = ord($c1);
2042
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
2043
          $buf .= self::$win1252ToUtf8[$ordC1];
2044
        } else {
2045
          $cc1 = (chr($ordC1 / 64) | "\xc0");
2046
          $cc2 = (($c1 & "\x3f") | "\x80");
2047
          $buf .= $cc1 . $cc2;
2048
        }
2049
2050
      } else { // it doesn't need conversion
2051
        $buf .= $c1;
2052
      }
2053
    }
2054 1
2055
    self::checkForSupport();
2056 1
2057
    // decode unicode escape sequences
2058 1
    $buf = preg_replace_callback(
2059
        '/\\\\u([0-9a-f]{4})/i',
2060
        function ($match) {
2061
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
2062
        },
2063
        $buf
2064
    );
2065
2066
    // decode UTF-8 codepoints
2067
    $buf = preg_replace_callback(
2068
        '/&#\d{2,4};/',
2069
        function ($match) {
2070
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
2071
        },
2072
        $buf
2073
    );
2074
2075
    return $buf;
2076
  }
2077
2078
  /**
2079
   * alias for "UTF8::to_utf8"
2080
   *
2081
   * @param string $string
2082
   *
2083
   * @return string
2084
   */
2085
  public static function toUTF8($string)
2086
  {
2087
    return self::to_utf8($string);
2088
  }
2089
2090
  /**
2091
   * Try to check if a string is a json-string...
2092
   *
2093
   * @param $string
2094
   *
2095
   * @return bool
2096
   *
2097
   * @deprecated
2098
   */
2099
  public static function isJson($string)
2100
  {
2101
    $string = (string)$string;
2102
2103
    if (!isset($string[0])) {
2104
      return false;
2105
    }
2106
2107
    if (
2108
        is_object(json_decode($string))
2109
        &&
2110
        json_last_error() == JSON_ERROR_NONE
2111
    ) {
2112
      return true;
2113
    } else {
2114
      return false;
2115
    }
2116
  }
2117
2118
  /**
2119
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
2120
   *
2121
   * @link http://php.net/manual/en/function.grapheme-strstr.php
2122
   *
2123
   * @param string $haystack      <p>
2124
   *                              The input string. Must be valid UTF-8.
2125
   *                              </p>
2126
   * @param string $needle        <p>
2127
   *                              The string to look for. Must be valid UTF-8.
2128
   *                              </p>
2129
   * @param bool   $before_needle [optional] <p>
2130
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
2131
   *                              haystack before the first occurrence of the needle (excluding the needle).
2132
   *                              </p>
2133
   *
2134
   * @return string the portion of string, or FALSE if needle is not found.
2135
   */
2136
  public static function strstr($haystack, $needle, $before_needle = false)
2137
  {
2138
    self::checkForSupport();
2139
2140
    return grapheme_strstr($haystack, $needle, $before_needle);
2141 2
  }
2142
2143
  /**
2144 2
   * Reads entire file into a string.
2145 2
   *
2146
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2147 2
   *
2148 2
   * @link http://php.net/manual/en/function.file-get-contents.php
2149
   *
2150
   * @param string   $filename      <p>
2151
   *                                Name of the file to read.
2152 2
   *                                </p>
2153 2
   * @param int      $flags         [optional] <p>
2154
   *                                Prior to PHP 6, this parameter is called
2155 2
   *                                use_include_path and is a bool.
2156 2
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2157
   *                                to trigger include path
2158 2
   *                                search.
2159 1
   *                                </p>
2160 1
   *                                <p>
2161 2
   *                                The value of flags can be any combination of
2162
   *                                the following flags (with some restrictions), joined with the
2163
   *                                binary OR (|)
2164
   *                                operator.
2165 2
   *                                </p>
2166
   *                                <p>
2167
   *                                <table>
2168
   *                                Available flags
2169 2
   *                                <tr valign="top">
2170 2
   *                                <td>Flag</td>
2171
   *                                <td>Description</td>
2172 2
   *                                </tr>
2173 2
   *                                <tr valign="top">
2174 1
   *                                <td>
2175 1
   *                                FILE_USE_INCLUDE_PATH
2176
   *                                </td>
2177 2
   *                                <td>
2178 2
   *                                Search for filename in the include directory.
2179
   *                                See include_path for more
2180
   *                                information.
2181 2
   *                                </td>
2182
   *                                </tr>
2183
   *                                <tr valign="top">
2184
   *                                <td>
2185
   *                                FILE_TEXT
2186
   *                                </td>
2187
   *                                <td>
2188
   *                                As of PHP 6, the default encoding of the read
2189
   *                                data is UTF-8. You can specify a different encoding by creating a
2190
   *                                custom context or by changing the default using
2191 2
   *                                stream_default_encoding. This flag cannot be
2192
   *                                used with FILE_BINARY.
2193
   *                                </td>
2194 2
   *                                </tr>
2195
   *                                <tr valign="top">
2196
   *                                <td>
2197
   *                                FILE_BINARY
2198 2
   *                                </td>
2199
   *                                <td>
2200
   *                                With this flag, the file is read in binary mode. This is the default
2201
   *                                setting and cannot be used with FILE_TEXT.
2202
   *                                </td>
2203
   *                                </tr>
2204 2
   *                                </table>
2205
   *                                </p>
2206
   * @param resource $context       [optional] <p>
2207
   *                                A valid context resource created with
2208
   *                                stream_context_create. If you don't need to use a
2209
   *                                custom context, you can skip this parameter by &null;.
2210 2
   *                                </p>
2211
   * @param int      $offset        [optional] <p>
2212
   *                                The offset where the reading starts.
2213
   *                                </p>
2214
   * @param int      $maxlen        [optional] <p>
2215
   *                                Maximum length of data read. The default is to read until end
2216 2
   *                                of file is reached.
2217
   *                                </p>
2218
   * @param int      $timeout
2219
   *
2220
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2221
   *                                default utf-8 chars
2222 2
   *
2223
   * @return string The function returns the read data or false on failure.
2224
   */
2225
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2226 2
  {
2227 2
    // init
2228
    $timeout = (int)$timeout;
2229
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2230
2231 2
    if ($timeout && $context === null) {
2232 2
      $context = stream_context_create(
2233 2
          array(
2234 2
              'http' =>
2235 2
                  array(
2236 2
                      'timeout' => $timeout,
2237
                  ),
2238 2
          )
2239 1
      );
2240 1
    }
2241 1
2242 1
    if (is_int($maxlen)) {
2243 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2244
    } else {
2245 1
      $data = file_get_contents($filename, $flags, $context, $offset);
2246
    }
2247
2248 1
    // return false on error
2249
    if ($data === false) {
2250 2
      return false;
2251
    }
2252
2253
    if ($convertToUtf8 === true) {
2254 2
      self::checkForSupport();
2255
2256
      $encoding = self::str_detect_encoding($data);
2257
      if ($encoding != 'UTF-8') {
2258
        $data = mb_convert_encoding($data, 'UTF-8', $encoding);
2259
      }
2260
2261
      $data = self::cleanup($data);
2262
    }
2263
2264 3
    // clean utf-8 string
2265
    return $data;
2266
  }
2267 3
2268
  /**
2269
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
2270 3
   *
2271
   * @param string $str
2272 3
   *
2273 3
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
2274 3
   *                      otherwise it will return false.
2275 3
   */
2276 2
  public static function str_detect_encoding($str)
2277
  {
2278 3
    // init
2279
    $encoding = '';
2280
2281
    // UTF-8
2282
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2283
    /** @noinspection SubStrUsedAsStrPosInspection */
2284
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
2285
      return 'UTF-8';
2286
    }
2287
2288
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2289 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2290
    /** @noinspection SubStrUsedAsStrPosInspection */
2291 1
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
2292 1
      return 'UTF-16BE';
2293
    }
2294 1
2295 1
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2296 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2297 1
    /** @noinspection SubStrUsedAsStrPosInspection */
2298 1
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
2299 1
      return 'UTF-16LE';
2300 1
    }
2301 1
2302 1
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2303 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2304 1
    /** @noinspection SubStrUsedAsStrPosInspection */
2305 1
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
2306 1
      return 'UTF-32BE';
2307 1
    }
2308
2309 1
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2310 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2311 1
    /** @noinspection SubStrUsedAsStrPosInspection */
2312 1
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
2313 1
      return 'UTF32LE';
2314 1
    }
2315 1
2316 1
    if (!$encoding) {
2317 1
      self::checkForSupport();
2318 1
2319 1
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
2320 1
      $detectOrder = array(
2321 1
          'UTF-8',
2322 1
          'windows-1251',
2323
          'ISO-8859-1',
2324 1
      );
2325 1
      $encoding = mb_detect_encoding($str, $detectOrder, true);
2326 1
    }
2327
2328 1
    if (self::is_binary($str)) {
2329
      if (self::is_utf16($str) == 1) {
2330
        return 'UTF-16LE';
2331
      } elseif (self::is_utf16($str) == 2) {
2332 1
        return 'UTF-16BE';
2333
      } elseif (self::is_utf32($str) == 1) {
2334 1
        return 'UTF-32LE';
2335
      } elseif (self::is_utf32($str) == 2) {
2336
        return 'UTF-32BE';
2337
      }
2338
    }
2339
2340
    if (!$encoding) {
2341
      $encoding = false;
2342
    }
2343
2344
    return $encoding;
2345 2
  }
2346
2347 2
  /**
2348
   * Check if the input is binary... (is look like a hack)
2349 2
   *
2350
   * @param string $input
2351 2
   *
2352
   * @return bool
2353
   */
2354
  public static function is_binary($input)
2355
  {
2356
2357
    $testLength = strlen($input);
2358
2359
    if (
2360
        preg_match('~^[01]+$~', $input)
2361 1
        ||
2362
        substr_count($input, "\x00") > 0
2363 1
        ||
2364 1
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2365
    ) {
2366 1
      return true;
2367 1
    } else {
2368 1
      return false;
2369 1
    }
2370 1
  }
2371 1
2372 1
  /**
2373 1
   * Check if the string is UTF-16.
2374 1
   *
2375
   * @param string $string
2376
   *
2377 1
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2378 1
   */
2379 1 View Code Duplication
  public static function is_utf16($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2380
  {
2381 1
    if (self::is_binary($string)) {
2382 1
      self::checkForSupport();
2383 1
2384
      $maybeUTF16LE = 0;
2385
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16LE');
2386
      if ($test !== false && strlen($test) > 1) {
2387
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2388
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2389
        if ($test3 == $test) {
2390
          $stringChars = self::count_chars($string);
2391
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2392
            if (in_array($test3char, $stringChars, true) === true) {
2393
              $maybeUTF16LE++;
2394
            }
2395
          }
2396 1
        }
2397
      }
2398
2399
      $maybeUTF16BE = 0;
2400
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16BE');
2401
      if ($test !== false && strlen($test) > 1) {
2402
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2403
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2404 1
        if ($test3 == $test) {
2405
          $stringChars = self::count_chars($string);
2406 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2407
            if (in_array($test3char, $stringChars, true) === true) {
2408
              $maybeUTF16BE++;
2409
            }
2410
          }
2411
        }
2412
      }
2413
2414
      if ($maybeUTF16BE != $maybeUTF16LE) {
2415
        if ($maybeUTF16LE > $maybeUTF16BE) {
2416 5
          return 1;
2417
        } else {
2418 5
          return 2;
2419
        }
2420 5
      }
2421
2422
    }
2423
2424
    return false;
2425 5
  }
2426
2427
  /**
2428 5
   * Returns count of characters used in a string.
2429
   *
2430
   * @param    string $str The input string.
2431
   *
2432
   * @return   array An associative array of Character as keys and
2433
   *           their count as values.
2434
   */
2435 5
  public static function count_chars($str) // there is no $mode parameters
2436
  {
2437 5
    $array = array_count_values(self::split($str));
2438
2439
    ksort($array);
2440
2441
    return $array;
2442
  }
2443
2444
  /**
2445
   * Check if the string is UTF-32.
2446
   *
2447
   * @param string $string
2448
   *
2449
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2450
   */
2451 View Code Duplication
  public static function is_utf32($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2452
  {
2453
    if (self::is_binary($string)) {
2454
      self::checkForSupport();
2455
2456
      $maybeUTF32LE = 0;
2457
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32LE');
2458
      if ($test !== false && strlen($test) > 1) {
2459
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2460
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2461
        if ($test3 == $test) {
2462
          $stringChars = self::count_chars($string);
2463
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2464
            if (in_array($test3char, $stringChars, true) === true) {
2465
              $maybeUTF32LE++;
2466
            }
2467
          }
2468
        }
2469
      }
2470
2471
      $maybeUTF32BE = 0;
2472
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32BE');
2473
      if ($test !== false && strlen($test) > 1) {
2474
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2475
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2476
        if ($test3 == $test) {
2477
          $stringChars = self::count_chars($string);
2478
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2479
            if (in_array($test3char, $stringChars, true) === true) {
2480
              $maybeUTF32BE++;
2481
            }
2482
          }
2483
        }
2484
      }
2485
2486
      if ($maybeUTF32BE != $maybeUTF32LE) {
2487
        if ($maybeUTF32LE > $maybeUTF32BE) {
2488 1
          return 1;
2489
        } else {
2490 1
          return 2;
2491
        }
2492 1
      }
2493
2494
    }
2495
2496
    return false;
2497
  }
2498
2499
  /**
2500
   * Clean-up a and show only printable UTF-8 chars at the end.
2501
   *
2502
   * @param string|false $string
2503
   *
2504
   * @return string
2505
   */
2506
  public static function cleanup($string)
2507
  {
2508
    $string = (string)$string;
2509
2510
    if (!isset($string[0])) {
2511
      return '';
2512
    }
2513
2514
    // init
2515
    self::checkForSupport();
2516
2517
    // fixed ISO <-> UTF-8 Errors
2518
    $string = self::fix_simple_utf8($string);
2519
2520
    // remove all none UTF-8 symbols
2521
    // && remove diamond question mark (�)
2522
    // && remove remove invisible characters (e.g. "\0")
2523 1
    // && remove BOM
2524
    // && normalize whitespace chars (but keep non-breaking-spaces)
2525 1
    $string = self::clean($string, true, true, false, true);
2526
2527 1
    return (string)$string;
2528
  }
2529
2530
  /**
2531
   * Check if the file is binary.
2532
   *
2533
   * @param string $file
2534
   *
2535
   * @return boolean
2536
   */
2537
  public static function is_binary_file($file)
2538
  {
2539 1
    try {
2540
      $fp = fopen($file, 'r');
2541 1
      $block = fread($fp, 512);
2542 1
      fclose($fp);
2543 1
    } catch (\Exception $e) {
2544 1
      $block = '';
2545
    }
2546
2547 1
    return self::is_binary($block);
2548
  }
2549
2550
  /**
2551
   * Finds the last occurrence of a character in a string within another.
2552
   *
2553
   * @link http://php.net/manual/en/function.mb-strrchr.php
2554
   *
2555
   * @param string $haystack <p>
2556
   *                         The string from which to get the last occurrence
2557
   *                         of needle
2558
   *                         </p>
2559 6
   * @param string $needle   <p>
2560
   *                         The string to find in haystack
2561 6
   *                         </p>
2562 6
   * @param bool   $part     [optional] <p>
2563 1
   *                         Determines which portion of haystack
2564
   *                         this function returns.
2565
   *                         If set to true, it returns all of haystack
2566 1
   *                         from the beginning to the last occurrence of needle.
2567 1
   *                         If set to false, it returns all of haystack
2568 6
   *                         from the last occurrence of needle to the end,
2569 1
   *                         </p>
2570 1
   * @param string $encoding [optional] <p>
2571 1
   *                         Character encoding name to use.
2572 1
   *                         If it is omitted, internal character encoding is used.
2573 6
   *                         </p>
2574 6
   *
2575
   * @return string the portion of haystack.
2576
   * or false if needle is not found.
2577
   */
2578 6
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2579 6
  {
2580 1
    self::checkForSupport();
2581 1
2582 6
    return mb_strrchr($haystack, $needle, $part, $encoding);
2583
  }
2584 6
2585 4
  /**
2586 4
   * Finds the last occurrence of a character in a string within another, case insensitive.
2587 4
   *
2588
   * @link http://php.net/manual/en/function.mb-strrichr.php
2589
   *
2590
   * @param string $haystack <p>
2591 6
   *                         The string from which to get the last occurrence
2592
   *                         of needle
2593
   *                         </p>
2594
   * @param string $needle   <p>
2595
   *                         The string to find in haystack
2596 6
   *                         </p>
2597 6
   * @param bool   $part     [optional] <p>
2598 6
   *                         Determines which portion of haystack
2599
   *                         this function returns.
2600 6
   *                         If set to true, it returns all of haystack
2601
   *                         from the beginning to the last occurrence of needle.
2602
   *                         If set to false, it returns all of haystack
2603
   *                         from the last occurrence of needle to the end,
2604
   *                         </p>
2605
   * @param string $encoding [optional] <p>
2606
   *                         Character encoding name to use.
2607
   *                         If it is omitted, internal character encoding is used.
2608
   *                         </p>
2609
   *
2610
   * @return string the portion of haystack.
2611
   * or false if needle is not found.
2612
   */
2613
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2614 11
  {
2615
    self::checkForSupport();
2616 11
2617
    return mb_strrichr($haystack, $needle, $part, $encoding);
2618 11
  }
2619 11
2620
  /**
2621
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2622 1
   *
2623 1
   * @param mixed $var
2624
   * @param int   $filter
2625
   * @param mixed $option
2626
   *
2627
   * @return mixed
2628
   */
2629 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2630
  {
2631
    if (3 > func_num_args()) {
2632
      $var = filter_var($var, $filter);
2633
    } else {
2634
      $var = filter_var($var, $filter, $option);
2635
    }
2636 11
2637
    return self::filter($var);
2638 11
  }
2639 11
2640
  /**
2641 11
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2642 11
   *
2643 11
   * @param mixed  $var
2644 11
   * @param int    $normalization_form
2645 11
   * @param string $leading_combining
2646 11
   *
2647 11
   * @return mixed
2648 11
   */
2649 11
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2650 11
  {
2651 11
    switch (gettype($var)) {
2652 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2653
        foreach ($var as $k => $v) {
2654
          /** @noinspection AlterInForeachInspection */
2655 11
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2656
        }
2657
        break;
2658 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2659
        foreach ($var as $k => $v) {
2660
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2661
        }
2662
        break;
2663
      case 'string':
2664 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2665 2
          // Workaround https://bugs.php.net/65732
2666
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2667 2
        }
2668 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2669
          if (Normalizer::isNormalized($var, $normalization_form)) {
2670
            $n = '-';
2671
          } else {
2672
            $n = Normalizer::normalize($var, $normalization_form);
2673
2674
            if (isset($n[0])) {
2675
              $var = $n;
2676
            } else {
2677 2
              $var = self::encode('UTF-8', $var);
2678
            }
2679 2
2680
          }
2681 1
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2682
            // Prevent leading combining chars
2683 1
            // for NFC-safe concatenations.
2684 1
            $var = $leading_combining . $var;
2685
          }
2686 1
        }
2687 2
        break;
2688 2
    }
2689
2690
    return $var;
2691
  }
2692
2693
  /**
2694
   * Encode to UTF8 or LATIN1.
2695
   *
2696
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2697
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2698
   *
2699
   * @param string $encodingLabel ISO-8859-1 || UTF-8
2700
   * @param string $string
2701 6
   *
2702
   * @return false|string Will return false on error.
2703 6
   */
2704 6
  public static function encode($encodingLabel, $string)
2705
  {
2706 6
    $encodingLabel = self::normalizeEncoding($encodingLabel);
2707
2708 6
    if ($encodingLabel === 'UTF-8') {
2709 5
      return self::to_utf8($string);
2710
    }
2711
2712
    if ($encodingLabel === 'ISO-8859-1') {
2713 6
      return self::to_latin1($string);
2714
    }
2715 6
2716
    return false;
2717 6
  }
2718 1
2719 1
  /**
2720 1
   * Normalize the encoding-name input.
2721
   *
2722 6
   * @param string $encodingLabel e.g.: ISO, UTF8, ISO88591, WIN1252, etc.
2723
   *
2724
   * @return string
2725
   */
2726
  protected static function normalizeEncoding($encodingLabel)
2727
  {
2728
    $encoding = strtoupper($encodingLabel);
2729
2730
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2731
2732
    $equivalences = array(
2733
        'ISO88591'    => 'ISO-8859-1',
2734
        'ISO8859'     => 'ISO-8859-1',
2735
        'ISO'         => 'ISO-8859-1',
2736
        'LATIN1'      => 'ISO-8859-1',
2737
        'LATIN'       => 'ISO-8859-1',
2738
        'UTF8'        => 'UTF-8',
2739
        'UTF'         => 'UTF-8',
2740
        'WIN1252'     => 'ISO-8859-1',
2741
        'WINDOWS1252' => 'ISO-8859-1',
2742
    );
2743
2744
    if (empty($equivalences[$encoding])) {
2745
      return 'UTF-8';
2746
    }
2747
2748
    return $equivalences[$encoding];
2749
  }
2750
2751
  /**
2752
   * alias for "UTF8::to_win1252()"
2753 6
   *
2754
   * @param $string
2755 6
   *
2756
   * @return string
2757 6
   */
2758 6
  public static function to_latin1($string)
2759
  {
2760
    return self::to_win1252($string);
2761 5
  }
2762 5
2763
  /**
2764 5
   * Convert a string into win1252.
2765 1
   *
2766 1
   * @param  string|array $string
2767 1
   *
2768
   * @return string
2769 5
   */
2770
  protected static function to_win1252($string)
2771
  {
2772
    if (is_array($string)) {
2773
2774
      foreach ($string as $k => $v) {
2775
        /** @noinspection AlterInForeachInspection */
2776
        $string[$k] = self::to_win1252($v);
2777
      }
2778
2779
      return $string;
2780
    } elseif (is_string($string)) {
2781
      return self::utf8_decode($string);
2782
    } else {
2783
      return $string;
2784
    }
2785
  }
2786
2787
  /**
2788
   * Decodes an UTF-8 string to ISO-8859-1.
2789
   *
2790
   * @param string $string
2791
   *
2792
   * @return string
2793
   */
2794
  public static function utf8_decode($string)
2795
  {
2796
    static $utf8ToWin1252Keys = null;
2797
    static $utf8ToWin1252Values = null;
2798
2799
    $string = (string)$string;
2800
2801
    if (!isset($string[0])) {
2802
      return '';
2803
    }
2804
2805
    // init
2806
    self::checkForSupport();
2807
2808
    $string = self::to_utf8($string);
2809 1
2810
    if ($utf8ToWin1252Keys === null) {
2811 1
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
2812
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
2813 1
    }
2814 1
2815 1
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $string));
2816
  }
2817
2818
  /**
2819 1
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2820
   *
2821
   * @param int    $type
2822
   * @param string $var
2823
   * @param int    $filter
2824
   * @param mixed  $option
2825
   *
2826
   * @return mixed
2827
   */
2828 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2829
  {
2830
    if (4 > func_num_args()) {
2831
      $var = filter_input($type, $var, $filter);
2832
    } else {
2833
      $var = filter_input($type, $var, $filter, $option);
2834
    }
2835
2836
    return self::filter($var);
2837
  }
2838
2839
  /**
2840
   * Encodes an ISO-8859-1 string to UTF-8.
2841
   *
2842
   * @param string $string
2843
   *
2844
   * @return string
2845
   */
2846
  public static function utf8_encode($string)
2847
  {
2848
    $string = utf8_encode($string);
2849
2850
    if (false === strpos($string, "\xC2")) {
2851
      return $string;
2852
    } else {
2853
2854
      static $cp1252ToUtf8Keys = null;
2855
      static $cp1252ToUtf8Values = null;
2856
2857
      if ($cp1252ToUtf8Keys === null) {
2858 2
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
2859
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
2860 2
      }
2861
2862 2
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $string);
2863 2
    }
2864 2
  }
2865
2866
  /**
2867
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2868 2
   * Returns the JSON representation of a value
2869
   *
2870
   * @link http://php.net/manual/en/function.json-encode.php
2871
   *
2872
   * @param mixed $value   <p>
2873
   *                       The <i>value</i> being encoded. Can be any type except
2874
   *                       a resource.
2875
   *                       </p>
2876
   *                       <p>
2877
   *                       All string data must be UTF-8 encoded.
2878
   *                       </p>
2879
   *                       <p>PHP implements a superset of
2880
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2881
   *                       only supports these values when they are nested inside an array or an object.
2882
   *                       </p>
2883
   * @param int   $options [optional] <p>
2884
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2885
   *                       <b>JSON_HEX_TAG</b>,
2886
   *                       <b>JSON_HEX_AMP</b>,
2887
   *                       <b>JSON_HEX_APOS</b>,
2888
   *                       <b>JSON_NUMERIC_CHECK</b>,
2889
   *                       <b>JSON_PRETTY_PRINT</b>,
2890
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2891
   *                       <b>JSON_FORCE_OBJECT</b>,
2892
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2893
   *                       constants is described on
2894
   *                       the JSON constants page.
2895
   *                       </p>
2896
   * @param int   $depth   [optional] <p>
2897
   *                       Set the maximum depth. Must be greater than zero.
2898
   *                       </p>
2899
   *
2900
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2901
   */
2902
  public static function json_encode($value, $options = 0, $depth = 512)
2903
  {
2904
    $value = self::filter($value);
2905
2906
    if (Bootup::is_php('5.5')) {
2907
      $json = json_encode($value, $options, $depth);
2908
    } else {
2909
      $json = json_encode($value, $options);
2910
    }
2911
2912
    return $json;
2913
  }
2914
2915
  /**
2916
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2917
   * Decodes a JSON string
2918
   *
2919
   * @link http://php.net/manual/en/function.json-decode.php
2920
   *
2921
   * @param string $json    <p>
2922
   *                        The <i>json</i> string being decoded.
2923
   *                        </p>
2924
   *                        <p>
2925
   *                        This function only works with UTF-8 encoded strings.
2926
   *                        </p>
2927
   *                        <p>PHP implements a superset of
2928
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2929
   *                        only supports these values when they are nested inside an array or an object.
2930
   *                        </p>
2931
   * @param bool   $assoc   [optional] <p>
2932
   *                        When <b>TRUE</b>, returned objects will be converted into
2933
   *                        associative arrays.
2934
   *                        </p>
2935
   * @param int    $depth   [optional] <p>
2936
   *                        User specified recursion depth.
2937
   *                        </p>
2938
   * @param int    $options [optional] <p>
2939
   *                        Bitmask of JSON decode options. Currently only
2940
   *                        <b>JSON_BIGINT_AS_STRING</b>
2941
   *                        is supported (default is to cast large integers as floats)
2942
   *                        </p>
2943
   *
2944 8
   * @return mixed the value encoded in <i>json</i> in appropriate
2945
   * PHP type. Values true, false and
2946 8
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2947 5
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2948 5
   * <i>json</i> cannot be decoded or if the encoded
2949 8
   * data is deeper than the recursion limit.
2950
   */
2951
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
2952
  {
2953
    $json = self::filter($json);
2954
2955
    if (Bootup::is_php('5.4') === true) {
2956
      $json = json_decode($json, $assoc, $depth, $options);
2957
    } else {
2958
      $json = json_decode($json, $assoc, $depth);
2959
    }
2960 1
2961
    return $json;
2962 1
  }
2963 1
2964 1
  /**
2965
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2966 1
   *
2967
   * @param array $data
2968
   * @param mixed $definition
2969
   * @param bool  $add_empty
2970
   *
2971
   * @return mixed
2972
   */
2973 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2974
  {
2975
    if (2 > func_num_args()) {
2976
      $a = filter_var_array($data);
2977
    } else {
2978
      $a = filter_var_array($data, $definition, $add_empty);
2979
    }
2980
2981
    return self::filter($a);
2982 2
  }
2983
2984 2
  /**
2985 2
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2986
   *
2987
   * @param int   $type
2988 2
   * @param mixed $definition
2989
   * @param bool  $add_empty
2990
   *
2991
   * @return mixed
2992
   */
2993 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2994
  {
2995
    if (2 > func_num_args()) {
2996
      $a = filter_input_array($type);
2997
    } else {
2998 2
      $a = filter_input_array($type, $definition, $add_empty);
2999
    }
3000 2
3001 1
    return self::filter($a);
3002
  }
3003
3004 2
  /**
3005
   * Search a string for any of a set of characters.
3006
   *
3007
   * @param string $s
3008
   * @param string $charList
3009
   *
3010
   * @return string|false
3011
   */
3012
  public static function strpbrk($s, $charList)
3013
  {
3014
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
3015 15
      return substr($s, strpos($s, $m[0]));
3016
    } else {
3017 15
      return false;
3018 2
    }
3019
  }
3020
3021 14
  /**
3022 14
   * Case-insensitive string comparison of the first n characters.
3023
   *
3024 14
   * @param string $str1
3025 2
   * @param string $str2
3026
   * @param int    $len
3027
   *
3028 13
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
3029 7
   */
3030
  public static function strncasecmp($str1, $str2, $len)
3031
  {
3032 12
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
3033 8
  }
3034
3035
  /**
3036 10
   * Comparison of the first n characters.
3037
   *
3038
   * @param string $str1
3039
   * @param string $str2
3040
   * @param int    $len
3041
   *
3042
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
3043
   *              <strong>> 0</strong> if str1 is greater than str2<br />
3044
   *              <strong>0</strong> if they are equal
3045
   */
3046
  public static function strncmp($str1, $str2, $len)
3047
  {
3048 1
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
3049
  }
3050 1
3051 1
  /**
3052
   * String comparison.
3053 1
   *
3054 1
   * @param string $str1
3055 1
   * @param string $str2
3056 1
   *
3057 1
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
3058 1
   *              <strong>> 0</strong> if str1 is greater than str2<br />
3059
   *              <strong>0</strong> if they are equal.
3060
   */
3061
  public static function strcmp($str1, $str2)
3062
  {
3063
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
3064
        Normalizer::normalize($str1, Normalizer::NFD),
3065
        Normalizer::normalize($str2, Normalizer::NFD)
3066
    );
3067
  }
3068 1
3069
  /**
3070 1
   * Calculates and returns the maximum number of bytes taken by any
3071
   * UTF-8 encoded character in the given string.
3072
   *
3073
   * @param    string $str The original Unicode string.
3074
   *
3075
   * @return   int An array of byte lengths of each character.
3076
   */
3077
  public static function max_chr_width($str)
3078
  {
3079
    $bytes = self::chr_size_list($str);
3080
    if (count($bytes) > 0) {
3081
      return (int)max($bytes);
3082 2
    } else {
3083
      return 0;
3084 2
    }
3085
  }
3086
3087
  /**
3088
   * Generates an array of byte length of each character of a Unicode string.
3089
   *
3090
   * 1 byte => U+0000  - U+007F
3091
   * 2 byte => U+0080  - U+07FF
3092 2
   * 3 byte => U+0800  - U+FFFF
3093
   * 4 byte => U+10000 - U+10FFFF
3094 2
   *
3095
   * @param    string $str The original Unicode string.
3096
   *
3097
   * @return   array An array of byte lengths of each character.
3098
   */
3099
  public static function chr_size_list($str)
3100
  {
3101
    if (!$str) {
3102
      return array();
3103
    }
3104
3105
    return array_map('strlen', self::split($str));
3106
  }
3107
3108
  /**
3109
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3110
   *
3111
   * @param    string $chr The Unicode character to be encoded as numbered entity.
3112
   *
3113
   * @return   string The HTML numbered entity.
3114
   */
3115
  public static function single_chr_html_encode($chr)
3116 1
  {
3117
    if (!$chr) {
3118 1
      return '';
3119
    }
3120
3121
    return '&#' . self::ord($chr) . ';';
3122
  }
3123
3124
  /**
3125
   * Calculates Unicode code point of the given UTF-8 encoded character.
3126
   *
3127
   * @param    string $s The character of which to calculate code point.
3128
   *
3129
   * @return   int Unicode code point of the given character,<br />
3130
   *           0 on invalid UTF-8 byte sequence.
3131
   */
3132
  public static function ord($s)
3133
  {
3134
    if (!$s) {
3135
      return 0;
3136
    }
3137
3138
    $s = unpack('C*', substr($s, 0, 4));
3139
    $a = $s ? $s[1] : 0;
3140
3141
    if (0xF0 <= $a && isset($s[4])) {
3142
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3143
    }
3144
3145
    if (0xE0 <= $a && isset($s[3])) {
3146
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3147
    }
3148
3149
    if (0xC0 <= $a && isset($s[2])) {
3150
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3151
    }
3152
3153
    return $a;
3154
  }
3155
3156
  /**
3157
   * Converts a UTF-8 string to a series of HTML numbered entities.
3158
   *
3159
   * e.g.: &#123;&#39;&#1740;
3160
   *
3161
   * @param  string $str The Unicode string to be encoded as numbered entities.
3162
   *
3163
   * @return string HTML numbered entities.
3164
   */
3165 4
  public static function html_encode($str)
3166
  {
3167 4
    return implode(
3168
        array_map(
3169
            array(
3170
                '\\voku\\helper\\UTF8',
3171 4
                'single_chr_html_encode',
3172 4
            ),
3173 4
            self::split($str)
3174
        )
3175 4
    );
3176 4
  }
3177 4
3178 4
  /**
3179
   * Checks if a file starts with BOM character.
3180 4
   *
3181
   * @param    string $file_path Path to a valid file.
3182
   *
3183
   * @return   bool True if the file has BOM at the start, False otherwise.
3184 4
   */
3185
  public static function file_has_bom($file_path)
3186 4
  {
3187
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
3188
  }
3189
3190
  /**
3191 4
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3192 4
   *
3193
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3194 4
   *
3195 4
   * @param    string $utf8_chr The input string.
3196 4
   *
3197 4
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3198 4
   */
3199
  public static function is_bom($utf8_chr)
3200 4
  {
3201 4
    return ($utf8_chr === self::bom());
3202 4
  }
3203 4
3204
  /**
3205 4
   * Returns the Byte Order Mark Character.
3206 3
   *
3207 3
   * @return   string Byte Order Mark
3208 3
   */
3209 3
  public static function bom()
3210
  {
3211 3
    return "\xEF\xBB\xBF";
3212
  }
3213
3214
  /**
3215 3
   * alias for "UTF8::is_bom"
3216 3
   *
3217
   * @param string $utf8_chr
3218 4
   *
3219
   * @return boolean
3220
   */
3221
  public static function isBom($utf8_chr)
3222
  {
3223
    return self::is_bom($utf8_chr);
3224
  }
3225
3226
  /**
3227
   * Checks if string starts with "UTF-8 BOM" character.
3228
   *
3229
   * @param    string $str The input string.
3230
   *
3231
   * @return   bool True if the string has BOM at the start, False otherwise.
3232
   */
3233
  public static function string_has_bom($str)
3234
  {
3235
    return self::is_bom(substr($str, 0, 3));
3236
  }
3237
3238
  /**
3239
   * Prepends BOM character to the string and returns the whole string.
3240
   *
3241
   * INFO: If BOM already existed there, the Input string is returned.
3242
   *
3243 11
   * @param    string $str The input string
3244
   *
3245 11
   * @return   string The output string that contains BOM
3246 11
   */
3247
  public static function add_bom_to_string($str)
3248 11
  {
3249 2
    if (!self::is_bom(substr($str, 0, 3))) {
3250
      $str = self::bom() . $str;
3251
    }
3252
3253 10
    return $str;
3254 10
  }
3255
3256
  /**
3257
   * Shuffles all the characters in the string.
3258 10
   *
3259
   * @param    string $str The input string
3260
   *
3261
   * @return   string The shuffled string.
3262 10
   */
3263
  public static function str_shuffle($str)
3264
  {
3265
    $array = self::split($str);
3266 1
3267 1
    shuffle($array);
3268 1
3269
    return implode('', $array);
3270 10
  }
3271
3272
  /**
3273 10
   * Wraps a string to a given number of characters.
3274 1
   *
3275 1
   * @param string $string
3276
   * @param int    $width
3277 10
   * @param string $break
3278
   * @param bool   $cut
3279
   *
3280
   * @return false|string Returns the given string wrapped at the specified length.
3281
   */
3282
  public static function wordwrap($string, $width = 75, $break = "\n", $cut = false)
3283
  {
3284
    if (false === wordwrap('-', $width, $break, $cut)) {
3285
      return false;
3286
    }
3287
3288
    if (is_string($break)) {
3289
      $break = (string)$break;
3290
    }
3291
3292
    $w = '';
3293
    $string = explode($break, $string);
3294
    $iLen = count($string);
3295
    $chars = array();
3296
3297
    if (1 === $iLen && '' === $string[0]) {
3298
      return '';
3299
    }
3300
3301
    /** @noinspection ForeachInvariantsInspection */
3302
    for ($i = 0; $i < $iLen; ++$i) {
3303
3304
      if ($i) {
3305
        $chars[] = $break;
3306 8
        $w .= '#';
3307
      }
3308 8
3309
      $c = $string[$i];
3310 8
      unset($string[$i]);
3311
3312
      foreach (self::split($c) as $c) {
3313
        $chars[] = $c;
3314
        $w .= ' ' === $c ? ' ' : '?';
3315
      }
3316
    }
3317 8
3318
    $string = '';
3319
    $j = 0;
3320
    $b = $i = -1;
3321
    $w = wordwrap($w, $width, '#', $cut);
3322
3323
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
3324
      for (++$i; $i < $b; ++$i) {
3325
        $string .= $chars[$j];
3326
        unset($chars[$j++]);
3327
      }
3328
3329
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
3330
        unset($chars[$j++]);
3331
      }
3332
3333
      $string .= $break;
3334
    }
3335
3336
    return $string . implode('', $chars);
3337
  }
3338
3339
  /**
3340
   * Find position of first occurrence of string in a string.
3341
   *
3342
   * @link http://php.net/manual/en/function.mb-strpos.php
3343
   *
3344
   * @param string  $haystack     <p>
3345 4
   *                              The string being checked.
3346
   *                              </p>
3347 4
   * @param string  $needle       <p>
3348
   *                              The position counted from the beginning of haystack.
3349
   *                              </p>
3350
   * @param int     $offset       [optional] <p>
3351
   *                              The search offset. If it is not specified, 0 is used.
3352
   *                              </p>
3353
   * @param string  $encoding
3354
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
3355
   *
3356
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
3357 1
   *             If needle is not found it returns false.
3358
   */
3359 1
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
3360
  {
3361
    $haystack = (string)$haystack;
3362
    $needle = (string)$needle;
3363 1
3364
    if (!isset($haystack[0]) || !isset($needle[0])) {
3365
      return false;
3366
    }
3367
3368
    // init
3369
    self::checkForSupport();
3370
    $offset = (int)$offset;
3371
3372
    // iconv and mbstring do not support integer $needle
3373
3374
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
3375
      $needle = self::chr($needle);
3376
    }
3377 3
3378
    if ($cleanUtf8 === true) {
3379 3
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
3380 3
      // iconv_strpos is not tolerant to invalid characters
3381 3
3382
      $needle = self::clean((string)$needle);
3383 3
      $haystack = self::clean($haystack);
3384
    }
3385 3
3386 3
    if (self::$support['mbstring'] === true) {
3387 3
3388
      // INFO: this is only a fallback for old versions
3389 3
      if ($encoding === true || $encoding === false) {
3390
        $encoding = 'UTF-8';
3391 3
      }
3392
3393
      return mb_strpos($haystack, $needle, $offset, $encoding);
3394
    }
3395
3396
    if (self::$support['iconv'] === true) {
3397
      return grapheme_strpos($haystack, $needle, $offset);
3398
    }
3399
3400
    if ($offset > 0) {
3401 3
      $haystack = self::substr($haystack, $offset);
3402
    }
3403
3404 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3405
      $left = substr($haystack, 0, $pos);
3406
3407
      // negative offset not supported in PHP strpos(), ignoring
3408
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
3409
    }
3410
3411 1
    return false;
3412
  }
3413 1
3414
  /**
3415
   * Generates a UTF-8 encoded character from the given code point.
3416
   *
3417 1
   * @param    int $code_point The code point for which to generate a character.
3418
   *
3419
   * @return   string Multi-Byte character, returns empty string on failure to encode.
3420
   */
3421
  public static function chr($code_point)
3422
  {
3423
    self::checkForSupport();
3424
3425
    if (($i = (int)$code_point) !== $code_point) {
3426
      // $code_point is a string, lets extract int code point from it
3427
      if (!($i = (int)self::hex_to_int($code_point))) {
3428
        return '';
3429
      }
3430
    }
3431
3432
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
3433
  }
3434
3435
  /**
3436
   * Converts hexadecimal U+xxxx code point representation to Integer.
3437
   *
3438
   * INFO: opposite to UTF8::int_to_hex( )
3439
   *
3440
   * @param    string $str The hexadecimal code point representation.
3441
   *
3442
   * @return   int The code point, or 0 on failure.
3443
   */
3444
  public static function hex_to_int($str)
3445
  {
3446
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
3447
      return intval($match[1], 16);
3448
    }
3449
3450
    return 0;
3451
  }
3452
3453
  /**
3454
   * Reverses characters order in the string.
3455
   *
3456
   * @param    string $str The input string
3457
   *
3458
   * @return   string The string with characters in the reverse sequence
3459
   */
3460
  public static function strrev($str)
3461 1
  {
3462
    return implode(array_reverse(self::split($str)));
3463 1
  }
3464
3465 1
  /**
3466
   * Returns the UTF-8 character with the maximum code point in the given data.
3467
   *
3468
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3469
   *
3470 1
   * @return   string The character with the highest code point than others.
3471 1
   */
3472 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3473 1
  {
3474 1
    if (is_array($arg)) {
3475 1
      $arg = implode($arg);
3476
    }
3477 1
3478
    return self::chr(max(self::codepoints($arg)));
3479
  }
3480
3481
  /**
3482
   * Accepts a string and returns an array of Unicode code points.
3483
   *
3484
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
3485
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
3486
   *                          default, code points will be returned as integers.
3487
   *
3488
   * @return   array The array of code points
3489 1
   */
3490
  public static function codepoints($arg, $u_style = false)
3491 1
  {
3492 1
    if (is_string($arg)) {
3493 1
      $arg = self::split($arg);
3494 1
    }
3495
3496 1
    $arg = array_map(
3497
        array(
3498
            '\\voku\\helper\\UTF8',
3499
            'ord',
3500 1
        ),
3501
        $arg
3502
    );
3503
3504
    if ($u_style) {
3505
      $arg = array_map(
3506
          array(
3507
              '\\voku\\helper\\UTF8',
3508
              'int_to_hex',
3509 1
          ),
3510
          $arg
3511
      );
3512 1
    }
3513
3514
    return $arg;
3515
  }
3516
3517
  /**
3518
   * Returns the UTF-8 character with the minimum code point in the given data.
3519
   *
3520
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3521
   *
3522
   * @return   string The character with the lowest code point than others.
3523
   */
3524 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3525
  {
3526
    if (is_array($arg)) {
3527
      $arg = implode($arg);
3528 26
    }
3529
3530 26
    return self::chr(min(self::codepoints($arg)));
3531
  }
3532 26
3533 5
  /**
3534
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
3535
   *
3536
   * @param    string $chr The input character
3537 22
   * @param    string $pfix
3538 6
   *
3539
   * @return   string The code point encoded as U+xxxx
3540
   */
3541 16
  public static function chr_to_hex($chr, $pfix = 'U+')
3542
  {
3543
    return self::int_to_hex(self::ord($chr), $pfix);
3544
  }
3545
3546
  /**
3547
   * Converts Integer to hexadecimal U+xxxx code point representation.
3548
   *
3549
   * @param    int    $int The integer to be converted to hexadecimal code point.
3550
   * @param    string $pfix
3551
   *
3552
   * @return   string The code point, or empty string on failure.
3553
   */
3554 22
  public static function int_to_hex($int, $pfix = 'U+')
3555
  {
3556 22
    if (ctype_digit((string)$int)) {
3557
      $hex = dechex((int)$int);
3558 22
3559 5
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3560
3561
      return $pfix . $hex;
3562 18
    }
3563
3564 18
    return '';
3565
  }
3566
3567
  /**
3568
   * Get a binary representation of a specific character.
3569
   *
3570
   * @param   string $string The input character.
3571
   *
3572
   * @return  string
3573
   */
3574
  public static function str_to_binary($string)
3575
  {
3576
    $string = (string)$string;
3577 24
3578
    if (!isset($string[0])) {
3579 24
      return '';
3580
    }
3581 24
3582 2
    // init
3583
    $out = null;
3584
    $max = strlen($string);
3585 23
3586
    /** @noinspection ForeachInvariantsInspection */
3587 23
    for ($i = 0; $i < $max; ++$i) {
3588
      $out .= vsprintf('%08b', (array)self::ord($string[$i]));
3589
    }
3590
3591
    return $out;
3592
  }
3593
3594
  /**
3595
   * Counts number of words in the UTF-8 string.
3596
   *
3597
   * @param string $s The input string.
3598
   * @param int    $format
3599
   * @param string $charlist
3600
   *
3601
   * @return array|float|string The number of words in the string
3602 6
   */
3603
  public static function str_word_count($s, $format = 0, $charlist = '')
3604
  {
3605 6
    $charlist = self::rxClass($charlist, '\pL');
3606 1
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
3607
    $charlist = array();
3608
    $len = count($s);
3609 1
3610
    if (1 == $format) {
3611
      for ($i = 1; $i < $len; $i += 2) {
3612 1
        $charlist[] = $s[$i];
3613
      }
3614
    } elseif (2 == $format) {
3615
      self::checkForSupport();
3616 1
3617
      $offset = self::strlen($s[0]);
3618
      for ($i = 1; $i < $len; $i += 2) {
3619
        $charlist[$offset] = $s[$i];
3620
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
3621
      }
3622 1
    } else {
3623
      $charlist = ($len - 1) / 2;
3624
    }
3625
3626 1
    return $charlist;
3627 1
  }
3628 1
3629
  /**
3630
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
3631
   *
3632
   * INFO: This is slower then "trim()"
3633
   *
3634
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
3635
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
3636
   *
3637
   * @param    string $string The string to be trimmed
3638 1
   * @param    string $chars  Optional characters to be stripped
3639
   *
3640
   * @return   string The trimmed string
3641
   */
3642 1
  public static function trim($string = '', $chars = INF)
3643
  {
3644 6
    $string = (string)$string;
3645 1
3646 1
    if (!isset($string[0])) {
3647 1
      return '';
3648 1
    }
3649
3650 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3651
    if ($chars === INF || !$chars) {
3652
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $string);
3653 6
    }
3654 6
3655
    return self::rtrim(self::ltrim($string, $chars), $chars);
3656 6
  }
3657 4
3658
  /**
3659 4
   * Strip whitespace or other characters from end of a UTF-8 string.
3660 4
   *
3661
   * WARNING: This is much slower then "rtrim()" !!!!
3662 6
   *
3663
   * @param    string $string The string to be trimmed
3664 6
   * @param    string $chars  Optional characters to be stripped
3665
   *
3666
   * @return   string The string with unwanted characters stripped from the right
3667
   */
3668 View Code Duplication
  public static function rtrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3669
  {
3670
    $string = (string)$string;
3671
3672
    if (!isset($string[0])) {
3673
      return '';
3674
    }
3675
3676
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3677
3678
    return preg_replace("/{$chars}+$/u", '', $string);
3679
  }
3680
3681
  /**
3682
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3683
   *
3684
   * WARNING: This is much slower then "ltrim()" !!!!
3685
   *
3686
   * @param    string $string The string to be trimmed
3687
   * @param    string $chars  Optional characters to be stripped
3688
   *
3689
   * @return   string The string with unwanted characters stripped from the left
3690
   */
3691 View Code Duplication
  public static function ltrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3692
  {
3693
    $string = (string)$string;
3694
3695
    if (!isset($string[0])) {
3696
      return '';
3697
    }
3698
3699
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3700
3701
    return preg_replace("/^{$chars}+/u", '', $string);
3702
  }
3703
3704
  /**
3705
   * Replace text within a portion of a string.
3706
   *
3707
   * source: https://gist.github.com/stemar/8287074
3708
   *
3709
   * @param string|array $string
3710
   * @param string|array $replacement
3711
   * @param int          $start
3712
   * @param null|int     $length
3713 1
   *
3714
   * @return array|string
3715 1
   */
3716
  public static function substr_replace($string, $replacement, $start, $length = null)
3717
  {
3718
3719
    if (is_array($string)) {
3720
      $num = count($string);
3721
3722
      // $replacement
3723
      if (is_array($replacement)) {
3724
        $replacement = array_slice($replacement, 0, $num);
3725 1
      } else {
3726
        $replacement = array_pad(array($replacement), $num, $replacement);
3727 1
      }
3728
3729
      // $start
3730
      if (is_array($start)) {
3731
        $start = array_slice($start, 0, $num);
3732
        foreach ($start as &$valueTmp) {
3733
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
3734
        }
3735
        unset($value);
3736
      } else {
3737
        $start = array_pad(array($start), $num, $start);
3738 1
      }
3739
3740 1
      // $length
3741 1
      if (!isset($length)) {
3742
        $length = array_fill(0, $num, 0);
3743
      } elseif (is_array($length)) {
3744 1
        $length = array_slice($length, 0, $num);
3745
        foreach ($length as &$valueTmpV2) {
3746 1
          if (isset($valueTmpV2)) {
3747
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
3748
          } else {
3749 1
            $valueTmpV2 = 0;
3750
          }
3751
        }
3752 1
        unset($valueTmpV2);
3753
      } else {
3754
        $length = array_pad(array($length), $num, $length);
3755
      }
3756 1
3757
      // Recursive call
3758 1
      return array_map(array(__CLASS__, 'substr_replace'), $string, $replacement, $start, $length);
3759
    } else {
3760
      if (is_array($replacement)) {
3761 1
        if (count($replacement) > 0) {
3762
          $replacement = $replacement[0];
3763
        } else {
3764 1
          $replacement = '';
3765
        }
3766
      }
3767
    }
3768 1
3769
    preg_match_all('/./us', (string)$string, $smatches);
3770 1
    preg_match_all('/./us', (string)$replacement, $rmatches);
3771 1
3772 1
    if ($length === null) {
3773 1
      self::checkForSupport();
3774 1
3775
      $length = mb_strlen($string);
3776
    }
3777
3778
    array_splice($smatches[0], $start, $length, $rmatches[0]);
3779
3780
    return join($smatches[0], null);
3781
  }
3782
3783
  /**
3784 1
   * alias for "UTF8::to_latin1()"
3785
   *
3786 1
   * @param $string
3787 1
   *
3788
   * @return string
3789 1
   */
3790 1
  public static function toLatin1($string)
3791
  {
3792
    return self::to_latin1($string);
3793
  }
3794 1
3795
  /**
3796 1
   * Count the number of sub-string occurrences.
3797 1
   *
3798 1
   * @param    string $haystack The string to search in.
3799
   * @param    string $needle   The string to search for.
3800 1
   * @param    int    $offset   The offset where to start counting.
3801 1
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
3802 1
   *
3803 1
   * @return   int number of occurrences of $needle
3804 1
   */
3805
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
3806 1
  {
3807
    $offset = (int)$offset;
3808 1
3809 1
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
3810
      $length = (int)$length;
3811
3812
      $haystack = self::substr($haystack, $offset, $length);
3813 1
    }
3814 1
3815
    if ($length === null) {
3816 1
      return substr_count($haystack, $needle, $offset);
3817
    } else {
3818 1
      return substr_count($haystack, $needle, $offset, $length);
3819 1
    }
3820 1
  }
3821
3822 1
  /**
3823
   * alias for "UTF8::is_ascii()"
3824
   *
3825
   * @param string $str
3826
   *
3827
   * @return boolean
3828
   */
3829
  public static function isAscii($str)
3830
  {
3831
    return self::is_ascii($str);
3832
  }
3833
3834
  /**
3835 1
   * Checks if a string is 7 bit ASCII.
3836
   *
3837 1
   * @param    string $str The string to check.
3838
   *
3839
   * @return   bool <strong>true</strong> if it is ASCII<br />
3840
   *                <strong>false</strong> otherwise
3841
   */
3842
  public static function is_ascii($str)
3843
  {
3844
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3845
  }
3846
3847
  /**
3848
   * Create an array containing a range of UTF-8 characters.
3849 1
   *
3850
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3851 1
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3852
   *
3853 1
   * @return   array
3854
   */
3855
  public static function range($var1, $var2)
3856
  {
3857
    if (!$var1 || !$var2) {
3858
      return array();
3859
    }
3860
3861 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3862
      $start = (int)$var1;
3863
    } elseif (ctype_xdigit($var1)) {
3864 1
      $start = (int)self::hex_to_int($var1);
3865
    } else {
3866
      $start = self::ord($var1);
3867
    }
3868 1
3869
    if (!$start) {
3870
      return array();
3871
    }
3872
3873 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3874
      $end = (int)$var2;
3875
    } elseif (ctype_xdigit($var2)) {
3876
      $end = (int)self::hex_to_int($var2);
3877
    } else {
3878
      $end = self::ord($var2);
3879
    }
3880 1
3881
    if (!$end) {
3882 1
      return array();
3883
    }
3884 1
3885 1
    return array_map(
3886 1
        array(
3887
            '\\voku\\helper\\UTF8',
3888 1
            'chr',
3889 1
        ),
3890 1
        range($start, $end)
3891 1
    );
3892
  }
3893
3894 1
  /**
3895
   * Creates a random string of UTF-8 characters.
3896
   *
3897
   * @param    int $len The length of string in characters.
3898
   *
3899
   * @return   string String consisting of random characters.
3900
   */
3901
  public static function hash($len = 8)
3902
  {
3903
    static $chars = array();
3904 2
    static $chars_len = null;
3905
3906 2
    if ($len <= 0) {
3907 2
      return '';
3908
    }
3909 2
3910 2
    // init
3911 2
    self::checkForSupport();
3912
3913 2
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
3914 2
      if (self::$support['pcre_utf8'] === true) {
3915
        $chars = array_map(
3916
            array(
3917
                '\\voku\\helper\\UTF8',
3918
                'chr',
3919
            ),
3920
            range(48, 79)
3921
        );
3922
3923
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
3924
3925
        $chars = array_values(array_filter($chars));
3926
      } else {
3927
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
3928
      }
3929
3930
      $chars_len = count($chars);
3931
    }
3932
3933
    $hash = '';
3934
3935
    for (; $len; --$len) {
3936 2
      $hash .= $chars[mt_rand() % $chars_len];
3937
    }
3938
3939 2
    return $hash;
3940
  }
3941 2
3942
  /**
3943
   * @alias of UTF8::chr_map()
3944
   *
3945
   * @param $callback
3946
   * @param $str
3947
   *
3948
   * @return array
3949
   */
3950
  public static function callback($callback, $str)
3951
  {
3952
    return self::chr_map($callback, $str);
3953
  }
3954 2
3955
  /**
3956 2
   * Applies callback to all characters of a string.
3957
   *
3958 2
   * @param    string $callback The callback function.
3959 2
   * @param    string $str      UTF-8 string to run callback on.
3960
   *
3961 2
   * @return   array The outcome of callback.
3962
   */
3963
3964 2
  public static function chr_map($callback, $str)
3965 2
  {
3966 2
    $chars = self::split($str);
3967 2
3968 2
    return array_map($callback, $chars);
3969
  }
3970 2
3971 2
  /**
3972 2
   * Returns a single UTF-8 character from string.
3973 2
   *
3974 2
   * @param    string $string A UTF-8 string.
3975 2
   * @param    int    $pos    The position of character to return.
3976
   *
3977 2
   * @return   string Single Multi-Byte character.
3978 2
   */
3979 2
  public static function access($string, $pos)
3980 2
  {
3981 2
    // Return the character at the specified position: $str[1] like functionality.
3982 2
3983
    return self::substr($string, $pos, 1);
3984 2
  }
3985
3986
  /**
3987 2
   * Sort all characters according to code points.
3988
   *
3989
   * @param    string $str    A UTF-8 string.
3990
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
3991
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
3992
   *
3993
   * @return   string String of sorted characters
3994
   */
3995
  public static function str_sort($str, $unique = false, $desc = false)
3996
  {
3997
    $array = self::codepoints($str);
3998
3999
    if ($unique) {
4000
      $array = array_flip(array_flip($array));
4001
    }
4002
4003
    if ($desc) {
4004
      arsort($array);
4005
    } else {
4006
      asort($array);
4007
    }
4008 1
4009
    return self::string($array);
4010 1
  }
4011
4012 1
  /**
4013
   * Makes a UTF-8 string from code points.
4014
   *
4015
   * @param    array $array Integer or Hexadecimal codepoints
4016
   *
4017
   * @return   string UTF-8 encoded string
4018
   */
4019
  public static function string($array)
4020
  {
4021
    return implode(
4022
        array_map(
4023 1
            array(
4024
                '\\voku\\helper\\UTF8',
4025 1
                'chr',
4026 1
            ),
4027 1
            $array
4028
        )
4029 1
    );
4030 1
  }
4031 1
4032 1
  /**
4033 1
   * Strip HTML and PHP tags from a string.
4034
   *
4035 1
   * @link http://php.net/manual/en/function.strip-tags.php
4036
   *
4037
   * @param string $str            <p>
4038
   *                               The input string.
4039
   *                               </p>
4040
   * @param string $allowable_tags [optional] <p>
4041
   *                               You can use the optional second parameter to specify tags which should
4042
   *                               not be stripped.
4043
   *                               </p>
4044
   *                               <p>
4045
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4046
   *                               can not be changed with allowable_tags.
4047
   *                               </p>
4048
   *
4049
   * @return string the stripped string.
4050
   */
4051
  public static function strip_tags($str, $allowable_tags = null)
4052
  {
4053
    //clean broken utf8
4054
    $str = self::clean($str);
4055
4056
    return strip_tags($str, $allowable_tags);
4057
  }
4058
4059
  /**
4060
   * Pad a UTF-8 string to given length with another string.
4061 8
   *
4062
   * @param    string $input      The input string
4063 8
   * @param    int    $pad_length The length of return string
4064 8
   * @param    string $pad_string String to use for padding the input string
4065
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4066 8
   *
4067 2
   * @return   string Returns the padded string
4068
   */
4069
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4070
  {
4071 7
    $input_length = self::strlen($input);
4072
4073 7
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4074 1
      $ps_length = self::strlen($pad_string);
4075 1
4076 1
      $diff = $pad_length - $input_length;
4077
4078
      switch ($pad_type) {
4079 7 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4080 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4081 1
          $pre = self::substr($pre, 0, $diff);
4082
          $post = '';
4083 7
          break;
4084
4085
        case STR_PAD_BOTH:
4086
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4087
          $pre = self::substr($pre, 0, (int)$diff / 2);
4088
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4089
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4090
          break;
4091
4092
        case STR_PAD_RIGHT:
4093 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4094
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4095 1
          $post = self::substr($post, 0, $diff);
4096
          $pre = '';
4097
      }
4098
4099
      return $pre . $input . $post;
4100
    }
4101
4102
    return $input;
4103
  }
4104
4105 1
  /**
4106 1
   * Repeat a string.
4107 1
   *
4108 1
   * @param string $input      <p>
4109 1
   *                           The string to be repeated.
4110
   *                           </p>
4111 1
   * @param int    $multiplier <p>
4112
   *                           Number of time the input string should be
4113
   *                           repeated.
4114
   *                           </p>
4115
   *                           <p>
4116
   *                           multiplier has to be greater than or equal to 0.
4117
   *                           If the multiplier is set to 0, the function
4118
   *                           will return an empty string.
4119
   *                           </p>
4120
   *
4121
   * @return string the repeated string.
4122
   */
4123
  public static function str_repeat($input, $multiplier)
4124
  {
4125
    $input = self::filter($input);
4126
4127
    return str_repeat($input, $multiplier);
4128
  }
4129
4130
  /**
4131
   * Removes duplicate occurrences of a string in another string.
4132
   *
4133 13
   * @param    string       $str  The base string
4134
   * @param    string|array $what String to search for in the base string
4135 13
   *
4136
   * @return   string The result string with removed duplicates
4137
   */
4138
  public static function remove_duplicates($str, $what = ' ')
4139
  {
4140
    if (is_string($what)) {
4141
      $what = array($what);
4142
    }
4143
4144
    if (is_array($what)) {
4145
      foreach ($what as $item) {
4146
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4147
      }
4148
    }
4149
4150 14
    return $str;
4151
  }
4152 14
4153
  /**
4154 14
   * Finds position of first occurrence of a string within another, case insensitive.
4155 4
   *
4156
   * @link http://php.net/manual/en/function.mb-stripos.php
4157
   *
4158
   * @param string  $haystack  <p>
4159 13
   *                           The string from which to get the position of the first occurrence
4160
   *                           of needle
4161 13
   *                           </p>
4162 13
   * @param string  $needle    <p>
4163
   *                           The string to find in haystack
4164
   *                           </p>
4165
   * @param int     $offset    [optional] <p>
4166
   *                           The position in haystack
4167
   *                           to start searching
4168
   *                           </p>
4169
   * @param string  $encoding
4170
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4171
   *
4172
   * @return int Return the numeric position of the first occurrence of
4173
   * needle in the haystack
4174
   * string, or false if needle is not found.
4175
   */
4176
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4177
  {
4178
    $haystack = (string)$haystack;
4179
    $needle = (string)$needle;
4180
4181
    if (!isset($haystack[0]) || !isset($needle[0])) {
4182
      return false;
4183
    }
4184
4185
    // init
4186
    self::checkForSupport();
4187
4188
    if ($cleanUtf8 === true) {
4189
      $haystack = self::clean($haystack);
4190
      $needle = self::clean($needle);
4191
    }
4192
4193
    // INFO: this is only a fallback for old versions
4194
    if ($encoding === true || $encoding === false) {
4195
      $encoding = 'UTF-8';
4196
    }
4197
4198
    return mb_stripos($haystack, $needle, $offset, $encoding);
4199
  }
4200
4201
  /**
4202
   * Fix a double (or multiple) encoded UTF8 string.
4203
   *
4204
   * @param array|string $string
4205
   *
4206
   * @return string
4207
   */
4208
  public static function fix_utf8($string)
4209
  {
4210 View Code Duplication
    if (is_array($string)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4211
4212
      foreach ($string as $k => $v) {
4213
        /** @noinspection AlterInForeachInspection */
4214
        $string[$k] = self::fix_utf8($v);
4215
      }
4216
4217
      return $string;
4218
    }
4219
4220
    $last = '';
4221
    while ($last <> $string) {
4222
      $last = $string;
4223
      $string = self::to_utf8(self::utf8_decode($string));
0 ignored issues
show
Bug introduced by
It seems like $string defined by self::to_utf8(self::utf8_decode($string)) on line 4223 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
4224
    }
4225
4226
    return $string;
4227
  }
4228
4229
  /**
4230
   * alias for "UTF8::ucfirst"
4231
   *
4232
   * @param $str
4233
   *
4234
   * @return string
4235
   */
4236
  public static function ucword($str)
4237
  {
4238
    return self::ucfirst($str);
4239
  }
4240
4241
  /**
4242
   * Makes string's first char uppercase.
4243
   *
4244
   * @param    string $str The input string
4245
   *
4246
   * @return   string The resulting string
4247
   */
4248
  public static function ucfirst($str)
4249
  {
4250
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
4251
  }
4252
4253
  /**
4254
   * Make a string uppercase.
4255
   *
4256
   * @link http://php.net/manual/en/function.mb-strtoupper.php
4257
   *
4258
   * @param string $str <p>
4259
   *                    The string being uppercased.
4260
   *                    </p>
4261
   * @param string $encoding
4262
   *
4263
   * @return string str with all alphabetic characters converted to uppercase.
4264
   */
4265
  public static function strtoupper($str, $encoding = 'UTF-8')
4266
  {
4267
    $str = (string)$str;
4268
4269
    if (!isset($str[0])) {
4270
      return '';
4271
    }
4272
4273
    // init
4274
    self::checkForSupport();
4275
4276
    if (self::$support['mbstring'] === true) {
4277
      return mb_strtoupper($str, $encoding);
4278
    } else {
4279
4280
      // fallback
4281
4282
      static $caseTableKeys = null;
4283
      static $caseTableValues = null;
4284
4285
      if ($caseTableKeys === null) {
4286
        $caseTable = self::case_table();
4287
        $caseTableKeys = array_keys($caseTable);
4288
        $caseTableValues = array_values($caseTable);
4289
      }
4290
4291
      $str = self::clean($str);
4292
4293
      return str_replace($caseTableKeys, $caseTableValues, $str);
4294
    }
4295
  }
4296
4297
  /**
4298
   * Returns an array of all lower and upper case UTF-8 encoded characters.
4299
   *
4300
   * @return   string An array with lower case chars as keys and upper chars as values.
4301
   */
4302
  protected static function case_table()
4303
  {
4304
    static $case = array(
4305
4306
      // lower => upper
4307
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
4308
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
4309
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
4310
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
4311
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
4312
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
4313
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
4314
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
4315
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
4316
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
4317
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
4318
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
4319
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
4320
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
4321
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
4322
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
4323
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
4324
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
4325
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
4326
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
4327
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
4328
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
4329
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
4330
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
4331
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
4332
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
4333
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
4334
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
4335
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
4336
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
4337
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
4338
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
4339
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
4340
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
4341
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
4342
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
4343
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
4344
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
4345
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
4346
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
4347
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
4348
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
4349
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
4350
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
4351
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
4352
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
4353
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
4354
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
4355
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
4356
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
4357
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
4358
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
4359
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
4360
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
4361
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
4362
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
4363
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
4364
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
4365
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
4366
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
4367
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
4368
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
4369
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
4370
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
4371
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
4372
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
4373
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
4374
      "\xea\x9e\x87"     => "\xea\x9e\x86",
4375
      "\xea\x9e\x85"     => "\xea\x9e\x84",
4376
      "\xea\x9e\x83"     => "\xea\x9e\x82",
4377
      "\xea\x9e\x81"     => "\xea\x9e\x80",
4378
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
4379
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
4380
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
4381
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
4382
      "\xea\x9d\xad"     => "\xea\x9d\xac",
4383
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
4384
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
4385
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
4386
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
4387
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
4388
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
4389
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
4390
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
4391
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
4392
      "\xea\x9d\x99"     => "\xea\x9d\x98",
4393
      "\xea\x9d\x97"     => "\xea\x9d\x96",
4394
      "\xea\x9d\x95"     => "\xea\x9d\x94",
4395
      "\xea\x9d\x93"     => "\xea\x9d\x92",
4396
      "\xea\x9d\x91"     => "\xea\x9d\x90",
4397
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
4398
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
4399
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
4400
      "\xea\x9d\x89"     => "\xea\x9d\x88",
4401
      "\xea\x9d\x87"     => "\xea\x9d\x86",
4402
      "\xea\x9d\x85"     => "\xea\x9d\x84",
4403
      "\xea\x9d\x83"     => "\xea\x9d\x82",
4404
      "\xea\x9d\x81"     => "\xea\x9d\x80",
4405
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
4406
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
4407
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
4408
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
4409
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
4410
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
4411
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
4412
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
4413
      "\xea\x9c\xad"     => "\xea\x9c\xac",
4414
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
4415
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
4416
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
4417
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
4418
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
4419
      "\xea\x9a\x97"     => "\xea\x9a\x96",
4420
      "\xea\x9a\x95"     => "\xea\x9a\x94",
4421
      "\xea\x9a\x93"     => "\xea\x9a\x92",
4422
      "\xea\x9a\x91"     => "\xea\x9a\x90",
4423
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
4424
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
4425
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
4426
      "\xea\x9a\x89"     => "\xea\x9a\x88",
4427
      "\xea\x9a\x87"     => "\xea\x9a\x86",
4428
      "\xea\x9a\x85"     => "\xea\x9a\x84",
4429
      "\xea\x9a\x83"     => "\xea\x9a\x82",
4430
      "\xea\x9a\x81"     => "\xea\x9a\x80",
4431
      "\xea\x99\xad"     => "\xea\x99\xac",
4432
      "\xea\x99\xab"     => "\xea\x99\xaa",
4433
      "\xea\x99\xa9"     => "\xea\x99\xa8",
4434
      "\xea\x99\xa7"     => "\xea\x99\xa6",
4435
      "\xea\x99\xa5"     => "\xea\x99\xa4",
4436
      "\xea\x99\xa3"     => "\xea\x99\xa2",
4437
      "\xea\x99\x9f"     => "\xea\x99\x9e",
4438
      "\xea\x99\x9d"     => "\xea\x99\x9c",
4439
      "\xea\x99\x9b"     => "\xea\x99\x9a",
4440
      "\xea\x99\x99"     => "\xea\x99\x98",
4441
      "\xea\x99\x97"     => "\xea\x99\x96",
4442
      "\xea\x99\x95"     => "\xea\x99\x94",
4443
      "\xea\x99\x93"     => "\xea\x99\x92",
4444
      "\xea\x99\x91"     => "\xea\x99\x90",
4445
      "\xea\x99\x8f"     => "\xea\x99\x8e",
4446
      "\xea\x99\x8d"     => "\xea\x99\x8c",
4447
      "\xea\x99\x8b"     => "\xea\x99\x8a",
4448
      "\xea\x99\x89"     => "\xea\x99\x88",
4449
      "\xea\x99\x87"     => "\xea\x99\x86",
4450
      "\xea\x99\x85"     => "\xea\x99\x84",
4451
      "\xea\x99\x83"     => "\xea\x99\x82",
4452
      "\xea\x99\x81"     => "\xea\x99\x80",
4453
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
4454
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
4455
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
4456
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
4457
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
4458
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
4459
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
4460
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
4461
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
4462
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
4463
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
4464
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
4465
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
4466
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
4467
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
4468
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
4469
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
4470
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
4471
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
4472
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
4473
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
4474
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
4475
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
4476
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
4477
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
4478
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
4479
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
4480
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
4481
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
4482
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
4483
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
4484
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
4485
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
4486
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
4487
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
4488
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
4489
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
4490
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
4491
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
4492
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
4493
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
4494
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
4495
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
4496
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
4497
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
4498
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
4499
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
4500
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
4501
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
4502
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
4503
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
4504
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
4505
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
4506
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
4507
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
4508
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
4509
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
4510
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
4511
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
4512
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
4513
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
4514
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
4515
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
4516
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
4517
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
4518
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
4519
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
4520
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
4521
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
4522
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
4523
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
4524
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
4525
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
4526
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
4527
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
4528
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
4529
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
4530
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
4531
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
4532
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
4533
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
4534
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
4535
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
4536
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
4537
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
4538
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
4539
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
4540
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
4541
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
4542
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
4543
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
4544
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
4545
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
4546
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
4547
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
4548
      "\xe2\xb1\xa6"     => "\xc8\xbe",
4549
      "\xe2\xb1\xa5"     => "\xc8\xba",
4550
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
4551
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
4552
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
4553
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
4554
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
4555
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
4556
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
4557
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
4558
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
4559
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
4560
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
4561
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
4562
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
4563
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
4564
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
4565
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
4566
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
4567
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
4568
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
4569
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
4570
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
4571
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
4572
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
4573
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
4574
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
4575
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
4576
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
4577
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
4578
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
4579
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
4580
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
4581
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
4582
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
4583
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
4584
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
4585
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
4586
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
4587
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
4588
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
4589
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
4590
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
4591
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
4592
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
4593
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
4594
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
4595
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
4596
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
4597
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
4598
      "\xe2\x86\x84"     => "\xe2\x86\x83",
4599
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
4600
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
4601
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
4602
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
4603
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
4604
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
4605
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
4606
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
4607
      "\xe1\xbe\xbe"     => "\xce\x99",
4608
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
4609
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
4610
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
4611
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
4612
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
4613
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
4614
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
4615
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
4616
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
4617
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
4618
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
4619
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
4620
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
4621
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
4622
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
4623
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
4624
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
4625
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
4626
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
4627
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
4628
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
4629
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
4630
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
4631
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
4632
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
4633
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
4634
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
4635
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
4636
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
4637
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
4638
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
4639
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
4640
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
4641
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
4642
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
4643
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
4644
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
4645
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
4646
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
4647
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
4648
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
4649
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
4650
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
4651
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
4652
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
4653
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
4654
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
4655
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
4656
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
4657
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
4658
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
4659
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
4660
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
4661
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
4662
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
4663
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
4664
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
4665
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
4666
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
4667
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
4668
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
4669
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
4670
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
4671
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
4672
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
4673
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
4674
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
4675
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
4676
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
4677
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
4678
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
4679
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
4680
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
4681
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
4682
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
4683
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
4684
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
4685
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
4686
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
4687
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
4688
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
4689
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
4690
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
4691
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
4692
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
4693
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
4694
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
4695
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
4696
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
4697
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
4698
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
4699
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
4700
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
4701
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
4702
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
4703
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
4704
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
4705
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
4706
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
4707
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
4708
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
4709
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
4710
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
4711
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
4712
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
4713
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
4714
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
4715
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
4716
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
4717
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
4718
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
4719
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
4720
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
4721
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
4722
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
4723
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
4724
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
4725
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
4726
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
4727
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
4728
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
4729
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
4730
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
4731
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
4732
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
4733
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
4734
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
4735
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
4736
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
4737
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
4738
      "\xe1\xba\xad"     => "\xe1\xba\xac",
4739
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
4740
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
4741
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
4742
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
4743
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
4744
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
4745
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
4746
      "\xe1\xba\x95"     => "\xe1\xba\x94",
4747
      "\xe1\xba\x93"     => "\xe1\xba\x92",
4748
      "\xe1\xba\x91"     => "\xe1\xba\x90",
4749
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
4750
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
4751
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
4752
      "\xe1\xba\x89"     => "\xe1\xba\x88",
4753
      "\xe1\xba\x87"     => "\xe1\xba\x86",
4754
      "\xe1\xba\x85"     => "\xe1\xba\x84",
4755
      "\xe1\xba\x83"     => "\xe1\xba\x82",
4756
      "\xe1\xba\x81"     => "\xe1\xba\x80",
4757
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
4758
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
4759
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
4760
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
4761
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
4762
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
4763
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
4764
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
4765
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
4766
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
4767
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
4768
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
4769
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
4770
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
4771
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
4772
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
4773
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
4774
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
4775
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
4776
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
4777
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
4778
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
4779
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
4780
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
4781
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
4782
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
4783
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
4784
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
4785
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
4786
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
4787
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
4788
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
4789
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
4790
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
4791
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
4792
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
4793
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
4794
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
4795
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
4796
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
4797
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
4798
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
4799
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
4800
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
4801
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
4802
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
4803
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
4804
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
4805
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
4806
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
4807
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
4808
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
4809
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
4810
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
4811
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
4812
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
4813
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
4814
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
4815
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
4816
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
4817
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
4818
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
4819
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
4820
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
4821
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
4822
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
4823
      "\xd6\x86"         => "\xd5\x96",
4824
      "\xd6\x85"         => "\xd5\x95",
4825
      "\xd6\x84"         => "\xd5\x94",
4826
      "\xd6\x83"         => "\xd5\x93",
4827
      "\xd6\x82"         => "\xd5\x92",
4828
      "\xd6\x81"         => "\xd5\x91",
4829
      "\xd6\x80"         => "\xd5\x90",
4830
      "\xd5\xbf"         => "\xd5\x8f",
4831
      "\xd5\xbe"         => "\xd5\x8e",
4832
      "\xd5\xbd"         => "\xd5\x8d",
4833
      "\xd5\xbc"         => "\xd5\x8c",
4834
      "\xd5\xbb"         => "\xd5\x8b",
4835
      "\xd5\xba"         => "\xd5\x8a",
4836
      "\xd5\xb9"         => "\xd5\x89",
4837
      "\xd5\xb8"         => "\xd5\x88",
4838
      "\xd5\xb7"         => "\xd5\x87",
4839
      "\xd5\xb6"         => "\xd5\x86",
4840
      "\xd5\xb5"         => "\xd5\x85",
4841
      "\xd5\xb4"         => "\xd5\x84",
4842
      "\xd5\xb3"         => "\xd5\x83",
4843
      "\xd5\xb2"         => "\xd5\x82",
4844
      "\xd5\xb1"         => "\xd5\x81",
4845
      "\xd5\xb0"         => "\xd5\x80",
4846
      "\xd5\xaf"         => "\xd4\xbf",
4847
      "\xd5\xae"         => "\xd4\xbe",
4848
      "\xd5\xad"         => "\xd4\xbd",
4849
      "\xd5\xac"         => "\xd4\xbc",
4850
      "\xd5\xab"         => "\xd4\xbb",
4851
      "\xd5\xaa"         => "\xd4\xba",
4852
      "\xd5\xa9"         => "\xd4\xb9",
4853
      "\xd5\xa8"         => "\xd4\xb8",
4854
      "\xd5\xa7"         => "\xd4\xb7",
4855
      "\xd5\xa6"         => "\xd4\xb6",
4856
      "\xd5\xa5"         => "\xd4\xb5",
4857
      "\xd5\xa4"         => "\xd4\xb4",
4858
      "\xd5\xa3"         => "\xd4\xb3",
4859
      "\xd5\xa2"         => "\xd4\xb2",
4860
      "\xd5\xa1"         => "\xd4\xb1",
4861
      "\xd4\xa5"         => "\xd4\xa4",
4862
      "\xd4\xa3"         => "\xd4\xa2",
4863
      "\xd4\xa1"         => "\xd4\xa0",
4864
      "\xd4\x9f"         => "\xd4\x9e",
4865
      "\xd4\x9d"         => "\xd4\x9c",
4866
      "\xd4\x9b"         => "\xd4\x9a",
4867
      "\xd4\x99"         => "\xd4\x98",
4868
      "\xd4\x97"         => "\xd4\x96",
4869
      "\xd4\x95"         => "\xd4\x94",
4870
      "\xd4\x93"         => "\xd4\x92",
4871
      "\xd4\x91"         => "\xd4\x90",
4872
      "\xd4\x8f"         => "\xd4\x8e",
4873
      "\xd4\x8d"         => "\xd4\x8c",
4874
      "\xd4\x8b"         => "\xd4\x8a",
4875
      "\xd4\x89"         => "\xd4\x88",
4876
      "\xd4\x87"         => "\xd4\x86",
4877
      "\xd4\x85"         => "\xd4\x84",
4878
      "\xd4\x83"         => "\xd4\x82",
4879
      "\xd4\x81"         => "\xd4\x80",
4880
      "\xd3\xbf"         => "\xd3\xbe",
4881
      "\xd3\xbd"         => "\xd3\xbc",
4882
      "\xd3\xbb"         => "\xd3\xba",
4883
      "\xd3\xb9"         => "\xd3\xb8",
4884
      "\xd3\xb7"         => "\xd3\xb6",
4885
      "\xd3\xb5"         => "\xd3\xb4",
4886
      "\xd3\xb3"         => "\xd3\xb2",
4887
      "\xd3\xb1"         => "\xd3\xb0",
4888
      "\xd3\xaf"         => "\xd3\xae",
4889
      "\xd3\xad"         => "\xd3\xac",
4890
      "\xd3\xab"         => "\xd3\xaa",
4891
      "\xd3\xa9"         => "\xd3\xa8",
4892
      "\xd3\xa7"         => "\xd3\xa6",
4893
      "\xd3\xa5"         => "\xd3\xa4",
4894
      "\xd3\xa3"         => "\xd3\xa2",
4895
      "\xd3\xa1"         => "\xd3\xa0",
4896
      "\xd3\x9f"         => "\xd3\x9e",
4897
      "\xd3\x9d"         => "\xd3\x9c",
4898
      "\xd3\x9b"         => "\xd3\x9a",
4899
      "\xd3\x99"         => "\xd3\x98",
4900
      "\xd3\x97"         => "\xd3\x96",
4901
      "\xd3\x95"         => "\xd3\x94",
4902
      "\xd3\x93"         => "\xd3\x92",
4903
      "\xd3\x91"         => "\xd3\x90",
4904
      "\xd3\x8f"         => "\xd3\x80",
4905
      "\xd3\x8e"         => "\xd3\x8d",
4906
      "\xd3\x8c"         => "\xd3\x8b",
4907
      "\xd3\x8a"         => "\xd3\x89",
4908
      "\xd3\x88"         => "\xd3\x87",
4909
      "\xd3\x86"         => "\xd3\x85",
4910
      "\xd3\x84"         => "\xd3\x83",
4911
      "\xd3\x82"         => "\xd3\x81",
4912
      "\xd2\xbf"         => "\xd2\xbe",
4913
      "\xd2\xbd"         => "\xd2\xbc",
4914
      "\xd2\xbb"         => "\xd2\xba",
4915
      "\xd2\xb9"         => "\xd2\xb8",
4916
      "\xd2\xb7"         => "\xd2\xb6",
4917
      "\xd2\xb5"         => "\xd2\xb4",
4918
      "\xd2\xb3"         => "\xd2\xb2",
4919
      "\xd2\xb1"         => "\xd2\xb0",
4920
      "\xd2\xaf"         => "\xd2\xae",
4921
      "\xd2\xad"         => "\xd2\xac",
4922
      "\xd2\xab"         => "\xd2\xaa",
4923
      "\xd2\xa9"         => "\xd2\xa8",
4924
      "\xd2\xa7"         => "\xd2\xa6",
4925
      "\xd2\xa5"         => "\xd2\xa4",
4926
      "\xd2\xa3"         => "\xd2\xa2",
4927
      "\xd2\xa1"         => "\xd2\xa0",
4928
      "\xd2\x9f"         => "\xd2\x9e",
4929
      "\xd2\x9d"         => "\xd2\x9c",
4930
      "\xd2\x9b"         => "\xd2\x9a",
4931
      "\xd2\x99"         => "\xd2\x98",
4932
      "\xd2\x97"         => "\xd2\x96",
4933
      "\xd2\x95"         => "\xd2\x94",
4934
      "\xd2\x93"         => "\xd2\x92",
4935
      "\xd2\x91"         => "\xd2\x90",
4936
      "\xd2\x8f"         => "\xd2\x8e",
4937
      "\xd2\x8d"         => "\xd2\x8c",
4938
      "\xd2\x8b"         => "\xd2\x8a",
4939
      "\xd2\x81"         => "\xd2\x80",
4940
      "\xd1\xbf"         => "\xd1\xbe",
4941
      "\xd1\xbd"         => "\xd1\xbc",
4942
      "\xd1\xbb"         => "\xd1\xba",
4943
      "\xd1\xb9"         => "\xd1\xb8",
4944
      "\xd1\xb7"         => "\xd1\xb6",
4945
      "\xd1\xb5"         => "\xd1\xb4",
4946
      "\xd1\xb3"         => "\xd1\xb2",
4947
      "\xd1\xb1"         => "\xd1\xb0",
4948
      "\xd1\xaf"         => "\xd1\xae",
4949
      "\xd1\xad"         => "\xd1\xac",
4950
      "\xd1\xab"         => "\xd1\xaa",
4951
      "\xd1\xa9"         => "\xd1\xa8",
4952
      "\xd1\xa7"         => "\xd1\xa6",
4953
      "\xd1\xa5"         => "\xd1\xa4",
4954
      "\xd1\xa3"         => "\xd1\xa2",
4955
      "\xd1\xa1"         => "\xd1\xa0",
4956
      "\xd1\x9f"         => "\xd0\x8f",
4957
      "\xd1\x9e"         => "\xd0\x8e",
4958
      "\xd1\x9d"         => "\xd0\x8d",
4959
      "\xd1\x9c"         => "\xd0\x8c",
4960
      "\xd1\x9b"         => "\xd0\x8b",
4961
      "\xd1\x9a"         => "\xd0\x8a",
4962
      "\xd1\x99"         => "\xd0\x89",
4963
      "\xd1\x98"         => "\xd0\x88",
4964
      "\xd1\x97"         => "\xd0\x87",
4965
      "\xd1\x96"         => "\xd0\x86",
4966
      "\xd1\x95"         => "\xd0\x85",
4967
      "\xd1\x94"         => "\xd0\x84",
4968
      "\xd1\x93"         => "\xd0\x83",
4969
      "\xd1\x92"         => "\xd0\x82",
4970
      "\xd1\x91"         => "\xd0\x81",
4971
      "\xd1\x90"         => "\xd0\x80",
4972
      "\xd1\x8f"         => "\xd0\xaf",
4973
      "\xd1\x8e"         => "\xd0\xae",
4974
      "\xd1\x8d"         => "\xd0\xad",
4975
      "\xd1\x8c"         => "\xd0\xac",
4976
      "\xd1\x8b"         => "\xd0\xab",
4977
      "\xd1\x8a"         => "\xd0\xaa",
4978
      "\xd1\x89"         => "\xd0\xa9",
4979
      "\xd1\x88"         => "\xd0\xa8",
4980
      "\xd1\x87"         => "\xd0\xa7",
4981
      "\xd1\x86"         => "\xd0\xa6",
4982
      "\xd1\x85"         => "\xd0\xa5",
4983
      "\xd1\x84"         => "\xd0\xa4",
4984
      "\xd1\x83"         => "\xd0\xa3",
4985
      "\xd1\x82"         => "\xd0\xa2",
4986
      "\xd1\x81"         => "\xd0\xa1",
4987
      "\xd1\x80"         => "\xd0\xa0",
4988
      "\xd0\xbf"         => "\xd0\x9f",
4989
      "\xd0\xbe"         => "\xd0\x9e",
4990
      "\xd0\xbd"         => "\xd0\x9d",
4991
      "\xd0\xbc"         => "\xd0\x9c",
4992
      "\xd0\xbb"         => "\xd0\x9b",
4993
      "\xd0\xba"         => "\xd0\x9a",
4994
      "\xd0\xb9"         => "\xd0\x99",
4995
      "\xd0\xb8"         => "\xd0\x98",
4996
      "\xd0\xb7"         => "\xd0\x97",
4997
      "\xd0\xb6"         => "\xd0\x96",
4998
      "\xd0\xb5"         => "\xd0\x95",
4999
      "\xd0\xb4"         => "\xd0\x94",
5000
      "\xd0\xb3"         => "\xd0\x93",
5001
      "\xd0\xb2"         => "\xd0\x92",
5002
      "\xd0\xb1"         => "\xd0\x91",
5003
      "\xd0\xb0"         => "\xd0\x90",
5004
      "\xcf\xbb"         => "\xcf\xba",
5005
      "\xcf\xb8"         => "\xcf\xb7",
5006
      "\xcf\xb5"         => "\xce\x95",
5007
      "\xcf\xb2"         => "\xcf\xb9",
5008
      "\xcf\xb1"         => "\xce\xa1",
5009
      "\xcf\xb0"         => "\xce\x9a",
5010
      "\xcf\xaf"         => "\xcf\xae",
5011
      "\xcf\xad"         => "\xcf\xac",
5012
      "\xcf\xab"         => "\xcf\xaa",
5013
      "\xcf\xa9"         => "\xcf\xa8",
5014
      "\xcf\xa7"         => "\xcf\xa6",
5015
      "\xcf\xa5"         => "\xcf\xa4",
5016
      "\xcf\xa3"         => "\xcf\xa2",
5017
      "\xcf\xa1"         => "\xcf\xa0",
5018
      "\xcf\x9f"         => "\xcf\x9e",
5019
      "\xcf\x9d"         => "\xcf\x9c",
5020
      "\xcf\x9b"         => "\xcf\x9a",
5021
      "\xcf\x99"         => "\xcf\x98",
5022
      "\xcf\x97"         => "\xcf\x8f",
5023
      "\xcf\x96"         => "\xce\xa0",
5024
      "\xcf\x95"         => "\xce\xa6",
5025
      "\xcf\x91"         => "\xce\x98",
5026
      "\xcf\x90"         => "\xce\x92",
5027
      "\xcf\x8e"         => "\xce\x8f",
5028
      "\xcf\x8d"         => "\xce\x8e",
5029
      "\xcf\x8c"         => "\xce\x8c",
5030
      "\xcf\x8b"         => "\xce\xab",
5031
      "\xcf\x8a"         => "\xce\xaa",
5032
      "\xcf\x89"         => "\xce\xa9",
5033
      "\xcf\x88"         => "\xce\xa8",
5034
      "\xcf\x87"         => "\xce\xa7",
5035
      "\xcf\x86"         => "\xce\xa6",
5036
      "\xcf\x85"         => "\xce\xa5",
5037
      "\xcf\x84"         => "\xce\xa4",
5038
      "\xcf\x83"         => "\xce\xa3",
5039
      "\xcf\x82"         => "\xce\xa3",
5040
      "\xcf\x81"         => "\xce\xa1",
5041
      "\xcf\x80"         => "\xce\xa0",
5042
      "\xce\xbf"         => "\xce\x9f",
5043
      "\xce\xbe"         => "\xce\x9e",
5044
      "\xce\xbd"         => "\xce\x9d",
5045
      "\xce\xbc"         => "\xce\x9c",
5046
      "\xce\xbb"         => "\xce\x9b",
5047
      "\xce\xba"         => "\xce\x9a",
5048
      "\xce\xb9"         => "\xce\x99",
5049
      "\xce\xb8"         => "\xce\x98",
5050
      "\xce\xb7"         => "\xce\x97",
5051
      "\xce\xb6"         => "\xce\x96",
5052
      "\xce\xb5"         => "\xce\x95",
5053
      "\xce\xb4"         => "\xce\x94",
5054
      "\xce\xb3"         => "\xce\x93",
5055
      "\xce\xb2"         => "\xce\x92",
5056
      "\xce\xb1"         => "\xce\x91",
5057
      "\xce\xaf"         => "\xce\x8a",
5058
      "\xce\xae"         => "\xce\x89",
5059
      "\xce\xad"         => "\xce\x88",
5060
      "\xce\xac"         => "\xce\x86",
5061
      "\xcd\xbd"         => "\xcf\xbf",
5062
      "\xcd\xbc"         => "\xcf\xbe",
5063
      "\xcd\xbb"         => "\xcf\xbd",
5064
      "\xcd\xb7"         => "\xcd\xb6",
5065
      "\xcd\xb3"         => "\xcd\xb2",
5066
      "\xcd\xb1"         => "\xcd\xb0",
5067
      "\xca\x92"         => "\xc6\xb7",
5068
      "\xca\x8c"         => "\xc9\x85",
5069
      "\xca\x8b"         => "\xc6\xb2",
5070
      "\xca\x8a"         => "\xc6\xb1",
5071
      "\xca\x89"         => "\xc9\x84",
5072
      "\xca\x88"         => "\xc6\xae",
5073
      "\xca\x83"         => "\xc6\xa9",
5074
      "\xca\x80"         => "\xc6\xa6",
5075
      "\xc9\xbd"         => "\xe2\xb1\xa4",
5076
      "\xc9\xb5"         => "\xc6\x9f",
5077
      "\xc9\xb2"         => "\xc6\x9d",
5078
      "\xc9\xb1"         => "\xe2\xb1\xae",
5079
      "\xc9\xaf"         => "\xc6\x9c",
5080
      "\xc9\xab"         => "\xe2\xb1\xa2",
5081
      "\xc9\xa9"         => "\xc6\x96",
5082
      "\xc9\xa8"         => "\xc6\x97",
5083
      "\xc9\xa5"         => "\xea\x9e\x8d",
5084
      "\xc9\xa3"         => "\xc6\x94",
5085
      "\xc9\xa0"         => "\xc6\x93",
5086
      "\xc9\x9b"         => "\xc6\x90",
5087
      "\xc9\x99"         => "\xc6\x8f",
5088
      "\xc9\x97"         => "\xc6\x8a",
5089
      "\xc9\x96"         => "\xc6\x89",
5090
      "\xc9\x94"         => "\xc6\x86",
5091
      "\xc9\x93"         => "\xc6\x81",
5092
      "\xc9\x92"         => "\xe2\xb1\xb0",
5093
      "\xc9\x91"         => "\xe2\xb1\xad",
5094
      "\xc9\x90"         => "\xe2\xb1\xaf",
5095
      "\xc9\x8f"         => "\xc9\x8e",
5096
      "\xc9\x8d"         => "\xc9\x8c",
5097
      "\xc9\x8b"         => "\xc9\x8a",
5098
      "\xc9\x89"         => "\xc9\x88",
5099
      "\xc9\x87"         => "\xc9\x86",
5100
      "\xc9\x82"         => "\xc9\x81",
5101
      "\xc9\x80"         => "\xe2\xb1\xbf",
5102
      "\xc8\xbf"         => "\xe2\xb1\xbe",
5103
      "\xc8\xbc"         => "\xc8\xbb",
5104
      "\xc8\xb3"         => "\xc8\xb2",
5105
      "\xc8\xb1"         => "\xc8\xb0",
5106
      "\xc8\xaf"         => "\xc8\xae",
5107
      "\xc8\xad"         => "\xc8\xac",
5108
      "\xc8\xab"         => "\xc8\xaa",
5109
      "\xc8\xa9"         => "\xc8\xa8",
5110
      "\xc8\xa7"         => "\xc8\xa6",
5111
      "\xc8\xa5"         => "\xc8\xa4",
5112
      "\xc8\xa3"         => "\xc8\xa2",
5113
      "\xc8\x9f"         => "\xc8\x9e",
5114
      "\xc8\x9d"         => "\xc8\x9c",
5115
      "\xc8\x9b"         => "\xc8\x9a",
5116
      "\xc8\x99"         => "\xc8\x98",
5117
      "\xc8\x97"         => "\xc8\x96",
5118
      "\xc8\x95"         => "\xc8\x94",
5119
      "\xc8\x93"         => "\xc8\x92",
5120
      "\xc8\x91"         => "\xc8\x90",
5121
      "\xc8\x8f"         => "\xc8\x8e",
5122
      "\xc8\x8d"         => "\xc8\x8c",
5123
      "\xc8\x8b"         => "\xc8\x8a",
5124
      "\xc8\x89"         => "\xc8\x88",
5125
      "\xc8\x87"         => "\xc8\x86",
5126
      "\xc8\x85"         => "\xc8\x84",
5127
      "\xc8\x83"         => "\xc8\x82",
5128
      "\xc8\x81"         => "\xc8\x80",
5129
      "\xc7\xbf"         => "\xc7\xbe",
5130
      "\xc7\xbd"         => "\xc7\xbc",
5131
      "\xc7\xbb"         => "\xc7\xba",
5132
      "\xc7\xb9"         => "\xc7\xb8",
5133
      "\xc7\xb5"         => "\xc7\xb4",
5134
      "\xc7\xb3"         => "\xc7\xb2",
5135
      "\xc7\xaf"         => "\xc7\xae",
5136
      "\xc7\xad"         => "\xc7\xac",
5137
      "\xc7\xab"         => "\xc7\xaa",
5138
      "\xc7\xa9"         => "\xc7\xa8",
5139
      "\xc7\xa7"         => "\xc7\xa6",
5140
      "\xc7\xa5"         => "\xc7\xa4",
5141
      "\xc7\xa3"         => "\xc7\xa2",
5142
      "\xc7\xa1"         => "\xc7\xa0",
5143
      "\xc7\x9f"         => "\xc7\x9e",
5144
      "\xc7\x9d"         => "\xc6\x8e",
5145
      "\xc7\x9c"         => "\xc7\x9b",
5146
      "\xc7\x9a"         => "\xc7\x99",
5147
      "\xc7\x98"         => "\xc7\x97",
5148
      "\xc7\x96"         => "\xc7\x95",
5149
      "\xc7\x94"         => "\xc7\x93",
5150
      "\xc7\x92"         => "\xc7\x91",
5151
      "\xc7\x90"         => "\xc7\x8f",
5152
      "\xc7\x8e"         => "\xc7\x8d",
5153
      "\xc7\x8c"         => "\xc7\x8b",
5154
      "\xc7\x89"         => "\xc7\x88",
5155
      "\xc7\x86"         => "\xc7\x85",
5156
      "\xc6\xbf"         => "\xc7\xb7",
5157
      "\xc6\xbd"         => "\xc6\xbc",
5158
      "\xc6\xb9"         => "\xc6\xb8",
5159
      "\xc6\xb6"         => "\xc6\xb5",
5160
      "\xc6\xb4"         => "\xc6\xb3",
5161
      "\xc6\xb0"         => "\xc6\xaf",
5162
      "\xc6\xad"         => "\xc6\xac",
5163
      "\xc6\xa8"         => "\xc6\xa7",
5164
      "\xc6\xa5"         => "\xc6\xa4",
5165
      "\xc6\xa3"         => "\xc6\xa2",
5166
      "\xc6\xa1"         => "\xc6\xa0",
5167
      "\xc6\x9e"         => "\xc8\xa0",
5168
      "\xc6\x9a"         => "\xc8\xbd",
5169
      "\xc6\x99"         => "\xc6\x98",
5170
      "\xc6\x95"         => "\xc7\xb6",
5171
      "\xc6\x92"         => "\xc6\x91",
5172
      "\xc6\x8c"         => "\xc6\x8b",
5173
      "\xc6\x88"         => "\xc6\x87",
5174
      "\xc6\x85"         => "\xc6\x84",
5175
      "\xc6\x83"         => "\xc6\x82",
5176
      "\xc6\x80"         => "\xc9\x83",
5177
      "\xc5\xbf"         => "\x53",
5178
      "\xc5\xbe"         => "\xc5\xbd",
5179
      "\xc5\xbc"         => "\xc5\xbb",
5180
      "\xc5\xba"         => "\xc5\xb9",
5181
      "\xc5\xb7"         => "\xc5\xb6",
5182
      "\xc5\xb5"         => "\xc5\xb4",
5183
      "\xc5\xb3"         => "\xc5\xb2",
5184
      "\xc5\xb1"         => "\xc5\xb0",
5185
      "\xc5\xaf"         => "\xc5\xae",
5186
      "\xc5\xad"         => "\xc5\xac",
5187
      "\xc5\xab"         => "\xc5\xaa",
5188
      "\xc5\xa9"         => "\xc5\xa8",
5189
      "\xc5\xa7"         => "\xc5\xa6",
5190
      "\xc5\xa5"         => "\xc5\xa4",
5191
      "\xc5\xa3"         => "\xc5\xa2",
5192
      "\xc5\xa1"         => "\xc5\xa0",
5193
      "\xc5\x9f"         => "\xc5\x9e",
5194
      "\xc5\x9d"         => "\xc5\x9c",
5195
      "\xc5\x9b"         => "\xc5\x9a",
5196
      "\xc5\x99"         => "\xc5\x98",
5197
      "\xc5\x97"         => "\xc5\x96",
5198 1
      "\xc5\x95"         => "\xc5\x94",
5199
      "\xc5\x93"         => "\xc5\x92",
5200 1
      "\xc5\x91"         => "\xc5\x90",
5201
      "\xc5\x8f"         => "\xc5\x8e",
5202
      "\xc5\x8d"         => "\xc5\x8c",
5203
      "\xc5\x8b"         => "\xc5\x8a",
5204
      "\xc5\x88"         => "\xc5\x87",
5205
      "\xc5\x86"         => "\xc5\x85",
5206
      "\xc5\x84"         => "\xc5\x83",
5207
      "\xc5\x82"         => "\xc5\x81",
5208
      "\xc5\x80"         => "\xc4\xbf",
5209
      "\xc4\xbe"         => "\xc4\xbd",
5210
      "\xc4\xbc"         => "\xc4\xbb",
5211
      "\xc4\xba"         => "\xc4\xb9",
5212
      "\xc4\xb7"         => "\xc4\xb6",
5213
      "\xc4\xb5"         => "\xc4\xb4",
5214
      "\xc4\xb3"         => "\xc4\xb2",
5215 1
      "\xc4\xb1"         => "\x49",
5216
      "\xc4\xaf"         => "\xc4\xae",
5217
      "\xc4\xad"         => "\xc4\xac",
5218
      "\xc4\xab"         => "\xc4\xaa",
5219
      "\xc4\xa9"         => "\xc4\xa8",
5220
      "\xc4\xa7"         => "\xc4\xa6",
5221
      "\xc4\xa5"         => "\xc4\xa4",
5222
      "\xc4\xa3"         => "\xc4\xa2",
5223
      "\xc4\xa1"         => "\xc4\xa0",
5224
      "\xc4\x9f"         => "\xc4\x9e",
5225
      "\xc4\x9d"         => "\xc4\x9c",
5226
      "\xc4\x9b"         => "\xc4\x9a",
5227
      "\xc4\x99"         => "\xc4\x98",
5228
      "\xc4\x97"         => "\xc4\x96",
5229
      "\xc4\x95"         => "\xc4\x94",
5230
      "\xc4\x93"         => "\xc4\x92",
5231 1
      "\xc4\x91"         => "\xc4\x90",
5232
      "\xc4\x8f"         => "\xc4\x8e",
5233 1
      "\xc4\x8d"         => "\xc4\x8c",
5234 1
      "\xc4\x8b"         => "\xc4\x8a",
5235
      "\xc4\x89"         => "\xc4\x88",
5236 1
      "\xc4\x87"         => "\xc4\x86",
5237
      "\xc4\x85"         => "\xc4\x84",
5238
      "\xc4\x83"         => "\xc4\x82",
5239
      "\xc4\x81"         => "\xc4\x80",
5240
      "\xc3\xbf"         => "\xc5\xb8",
5241
      "\xc3\xbe"         => "\xc3\x9e",
5242
      "\xc3\xbd"         => "\xc3\x9d",
5243
      "\xc3\xbc"         => "\xc3\x9c",
5244
      "\xc3\xbb"         => "\xc3\x9b",
5245
      "\xc3\xba"         => "\xc3\x9a",
5246
      "\xc3\xb9"         => "\xc3\x99",
5247 8
      "\xc3\xb8"         => "\xc3\x98",
5248
      "\xc3\xb6"         => "\xc3\x96",
5249 8
      "\xc3\xb5"         => "\xc3\x95",
5250
      "\xc3\xb4"         => "\xc3\x94",
5251
      "\xc3\xb3"         => "\xc3\x93",
5252
      "\xc3\xb2"         => "\xc3\x92",
5253
      "\xc3\xb1"         => "\xc3\x91",
5254
      "\xc3\xb0"         => "\xc3\x90",
5255
      "\xc3\xaf"         => "\xc3\x8f",
5256
      "\xc3\xae"         => "\xc3\x8e",
5257
      "\xc3\xad"         => "\xc3\x8d",
5258
      "\xc3\xac"         => "\xc3\x8c",
5259
      "\xc3\xab"         => "\xc3\x8b",
5260 7
      "\xc3\xaa"         => "\xc3\x8a",
5261
      "\xc3\xa9"         => "\xc3\x89",
5262 7
      "\xc3\xa8"         => "\xc3\x88",
5263 2
      "\xc3\xa7"         => "\xc3\x87",
5264
      "\xc3\xa6"         => "\xc3\x86",
5265
      "\xc3\xa5"         => "\xc3\x85",
5266
      "\xc3\xa4"         => "\xc3\x84",
5267 6
      "\xc3\xa3"         => "\xc3\x83",
5268 6
      "\xc3\xa2"         => "\xc3\x82",
5269
      "\xc3\xa1"         => "\xc3\x81",
5270 6
      "\xc3\xa0"         => "\xc3\x80",
5271 1
      "\xc2\xb5"         => "\xce\x9c",
5272 1
      "\x7a"             => "\x5a",
5273 6
      "\x79"             => "\x59",
5274
      "\x78"             => "\x58",
5275
      "\x77"             => "\x57",
5276 6
      "\x76"             => "\x56",
5277
      "\x75"             => "\x55",
5278 6
      "\x74"             => "\x54",
5279
      "\x73"             => "\x53",
5280
      "\x72"             => "\x52",
5281
      "\x71"             => "\x51",
5282 1
      "\x70"             => "\x50",
5283 1
      "\x6f"             => "\x4f",
5284 1
      "\x6e"             => "\x4e",
5285 6
      "\x6d"             => "\x4d",
5286 6
      "\x6c"             => "\x4c",
5287 6
      "\x6b"             => "\x4b",
5288 6
      "\x6a"             => "\x4a",
5289 6
      "\x69"             => "\x49",
5290
      "\x68"             => "\x48",
5291 6
      "\x67"             => "\x47",
5292
      "\x66"             => "\x46",
5293
      "\x65"             => "\x45",
5294
      "\x64"             => "\x44",
5295
      "\x63"             => "\x43",
5296
      "\x62"             => "\x42",
5297
      "\x61"             => "\x41",
5298
5299
    );
5300
5301
    return $case;
5302
  }
5303
5304 1
  /**
5305
   * Translate characters or replace sub-strings.
5306 1
   *
5307 1
   * @param string $s
5308
   * @param string $from
5309
   * @param string $to
5310
   *
5311
   * @return string
5312
   */
5313
  public static function strtr($s, $from, $to = INF)
5314
  {
5315
    if (INF !== $to) {
5316
      $from = self::str_split($from);
5317
      $to = self::str_split($to);
5318
      $a = count($from);
5319
      $b = count($to);
5320 1
5321
      if ($a > $b) {
5322 1
        $from = array_slice($from, 0, $b);
5323
      } elseif ($a < $b) {
5324
        $to = array_slice($to, 0, $a);
5325
      }
5326
5327
      $from = array_combine($from, $to);
5328
    }
5329
5330
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5313 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5331
  }
5332
5333
  /**
5334
   * Binary safe comparison of two strings from an offset, up to length characters.
5335
   *
5336
   * @param string  $main_str           The main string being compared.
5337
   * @param string  $str                The secondary string being compared.
5338
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5339
   *                                    end of the string.
5340
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5341
   *                                    the str compared to the length of main_str less the offset.
5342
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5343
   *
5344
   * @return int
5345
   */
5346
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5347
  {
5348
    $main_str = self::substr($main_str, $offset, $length);
5349
    $str = self::substr($str, 0, self::strlen($main_str));
5350
5351
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5352
  }
5353
5354
  /**
5355 12
   * Case-insensitive string comparison.
5356
   *
5357 12
   * @param string $str1
5358
   * @param string $str2
5359
   *
5360
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5361
   */
5362
  public static function strcasecmp($str1, $str2)
5363
  {
5364
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5365
  }
5366
5367
  /**
5368
   * Uppercase for all words in the string.
5369
   *
5370 13
   * @param  string $string
5371
   * @param array   $exceptions
5372 13
   *
5373
   * @return string
5374
   */
5375 13
  public static function ucwords($string, $exceptions = array())
5376 13
  {
5377 1
    if (!$string) {
5378 1
      return '';
5379 12
    }
5380
5381 13
    // init
5382
    $words = explode(' ', $string);
5383 13
    $newwords = array();
5384 13
5385
    if (count($exceptions) > 0) {
5386 13
      $useExceptions = true;
5387
    } else {
5388
      $useExceptions = false;
5389
    }
5390
5391
    foreach ($words as $word) {
5392
      if (
5393
          ($useExceptions === false)
5394
          ||
5395
          (
5396 6
              $useExceptions === true
5397
              &&
5398 6
              !in_array($word, $exceptions, true)
5399
          )
5400
      ) {
5401
        $word = self::ucfirst($word);
5402
      }
5403
      $newwords[] = $word;
5404
    }
5405
5406
    return self::ucfirst(implode(' ', $newwords));
5407
  }
5408
5409
  /**
5410 1
   * Format a number with grouped thousands.
5411
   *
5412 1
   * @param float  $number
5413
   * @param int    $decimals
5414
   * @param string $dec_point
5415
   * @param string $thousands_sep
5416
   *
5417
   * @return string
5418
   */
5419
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
5420
  {
5421
    if (Bootup::is_php('5.4') === true) {
5422
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
5423
        return str_replace(
5424
            array(
5425
                '.',
5426
                ',',
5427
            ),
5428
            array(
5429
                $dec_point,
5430
                $thousands_sep,
5431
            ),
5432
            number_format($number, $decimals, '.', ',')
5433
        );
5434
      }
5435
    }
5436
5437 10
    return number_format($number, $decimals, $dec_point, $thousands_sep);
5438
  }
5439 10
5440 10
  /**
5441
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
5442 10
   *
5443 2
   * (PHP 4, PHP 5)<br/>
5444
   * Replace all occurrences of the search string with the replacement string
5445
   *
5446
   * @link http://php.net/manual/en/function.str-replace.php
5447 9
   *
5448
   * @param mixed $search  <p>
5449 9
   *                       The value being searched for, otherwise known as the needle.
5450
   *                       An array may be used to designate multiple needles.
5451
   *                       </p>
5452
   * @param mixed $replace <p>
5453 9
   *                       The replacement value that replaces found search
5454 9
   *                       values. An array may be used to designate multiple replacements.
5455
   *                       </p>
5456 9
   * @param mixed $subject <p>
5457
   *                       The string or array being searched and replaced on,
5458
   *                       otherwise known as the haystack.
5459 1
   *                       </p>
5460 1
   *                       <p>
5461 1
   *                       If subject is an array, then the search and
5462
   *                       replace is performed with every entry of
5463 9
   *                       subject, and the return value is an array as
5464 9
   *                       well.
5465
   *                       </p>
5466
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5467
   *
5468
   * @return mixed This function returns a string or an array with the replaced values.
5469
   */
5470
  public static function str_replace($search, $replace, $subject, &$count = null)
5471
  {
5472
    return str_replace($search, $replace, $subject, $count);
5473
  }
5474
5475
  /**
5476
   * str_ireplace
5477
   *
5478
   * @param string $search
5479
   * @param string $replace
5480
   * @param string $subject
5481
   * @param null   $count
5482
   *
5483
   * @return string
5484
   */
5485
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5486
  {
5487
    $search = (array)$search;
5488
5489
    /** @noinspection AlterInForeachInspection */
5490
    foreach ($search as &$s) {
5491
      if ('' === $s .= '') {
5492
        $s = '/^(?<=.)$/';
5493
      } else {
5494
        $s = '/' . preg_quote($s, '/') . '/ui';
5495
      }
5496
    }
5497
5498
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
5499 1
    $count = $replace;
5500
5501 1
    return $subject;
5502
  }
5503
5504
  /**
5505
   * Makes string's first char lowercase.
5506
   *
5507
   * @param    string $str The input string
5508
   *
5509
   * @return   string The resulting string
5510
   */
5511
  public static function lcfirst($str)
5512
  {
5513
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
5514
  }
5515
5516
  /**
5517
   * Find position of last occurrence of a case-insensitive string.
5518
   *
5519
   * @param    string $haystack The string to look in
5520
   * @param    string $needle   The string to look for
5521
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5522
   *
5523
   * @return   int The position of offset
5524
   */
5525
  public static function strripos($haystack, $needle, $offset = 0)
5526
  {
5527
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5528
  }
5529
5530
  /**
5531
   * Find position of last occurrence of a string in a string.
5532
   *
5533
   * @link http://php.net/manual/en/function.mb-strrpos.php
5534
   *
5535
   * @param string  $haystack     <p>
5536
   *                              The string being checked, for the last occurrence
5537
   *                              of needle
5538
   *                              </p>
5539
   * @param string  $needle       <p>
5540
   *                              The string to find in haystack.
5541
   *                              </p>
5542
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
5543
   *                              the string. Negative values will stop searching at an arbitrary point
5544
   *                              prior to the end of the string.
5545
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
5546
   *
5547
   * @return int the numeric position of
5548
   * the last occurrence of needle in the
5549
   * haystack string. If
5550
   * needle is not found, it returns false.
5551
   */
5552
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5553
  {
5554
    $haystack = (string)$haystack;
5555
    $needle = (string)$needle;
5556
5557
    if (!isset($haystack[0]) || !isset($needle[0])) {
5558
      return false;
5559
    }
5560
5561
    // init
5562
    self::checkForSupport();
5563
5564 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5565
      $needle = self::chr($needle);
5566
    }
5567 1
5568
    $needle = (string)$needle;
5569 1
    $offset = (int)$offset;
5570
5571 1
    if ($cleanUtf8 === true) {
5572 1
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5573
5574
      $needle = self::clean($needle);
5575
      $haystack = self::clean($haystack);
5576
    }
5577
5578
    if (self::$support['mbstring'] === true) {
5579
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5580
    }
5581 1
5582
    if (self::$support['iconv'] === true) {
5583 1
      return grapheme_strrpos($haystack, $needle, $offset);
5584
    }
5585 1
5586 1
    // fallback
5587
5588
    if ($offset > 0) {
5589 1
      $haystack = self::substr($haystack, $offset);
5590
    } elseif ($offset < 0) {
5591 1
      $haystack = self::substr($haystack, 0, $offset);
5592 1
    }
5593 1
5594 1 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5595 1
      $left = substr($haystack, 0, $pos);
5596 1
5597 1
      // negative offset not supported in PHP strpos(), ignoring
5598 1
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5599 1
    }
5600 1
5601 1
    return false;
5602
  }
5603
5604
  /**
5605
   * Splits a string into smaller chunks and multiple lines, using the specified
5606
   * line ending character.
5607
   *
5608
   * @param    string $body     The original string to be split.
5609
   * @param    int    $chunklen The maximum character length of a chunk.
5610
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
5611
   *
5612
   * @return   string The chunked string
5613
   */
5614
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
5615
  {
5616
    return implode($end, self::split($body, $chunklen));
5617
  }
5618
5619
  /**
5620
   * alias for "UTF8::to_win1252()"
5621 1
   *
5622 1
   * @param   string $string
5623
   *
5624
   * @return  array|string
5625
   */
5626
  public static function to_iso8859($string)
5627
  {
5628
    return self::to_win1252($string);
5629
  }
5630
5631
  /**
5632
   * fix -> utf8-win1252 chars
5633
   *
5634
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
5635
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
5636
   * See: http://en.wikipedia.org/wiki/Windows-1252
5637
   *
5638
   * @deprecated use "UTF8::fix_simple_utf8()"
5639
   *
5640
   * @param   string $string
5641
   *
5642
   * @return  string
5643
   */
5644
  public static function utf8_fix_win1252_chars($string)
5645
  {
5646
    return self::fix_simple_utf8($string);
5647
  }
5648
5649
  /**
5650
   * Returns an array of Unicode White Space characters.
5651
   *
5652
   * @return   array An array with numeric code point as key and White Space Character as value.
5653
   */
5654
  public static function ws()
5655
  {
5656
    return self::$whitespace;
5657
  }
5658
5659
  /**
5660
   * Parses the string into variables.
5661
   *
5662
   * WARNING: This differs from parse_str() by returning the results
5663
   *    instead of placing them in the local scope!
5664
   *
5665
   * @link http://php.net/manual/en/function.parse-str.php
5666
   *
5667
   * @param string $str     <p>
5668
   *                        The input string.
5669
   *                        </p>
5670
   * @param array  $result  <p>
5671
   *                        If the second parameter arr is present,
5672
   *                        variables are stored in this variable as array elements instead.
5673
   *                        </p>
5674
   *
5675
   * @return void
5676
   */
5677
  public static function parse_str($str, &$result)
5678
  {
5679
    // init
5680
    self::checkForSupport();
5681 2
5682
    $str = self::filter($str);
5683 2
5684 2
    mb_parse_str($str, $result);
5685 2
  }
5686
5687 2
  /**
5688
   * Get character of a specific character.
5689 2
   *
5690
   * @param   string $chr Character.
5691
   *
5692 2
   * @return  string 'RTL' or 'LTR'
5693
   */
5694 2
  public static function getCharDirection($chr)
5695 2
  {
5696 2
    $c = static::chr_to_decimal($chr);
5697
5698 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
5699 1
      return 'LTR';
5700 1
    }
5701
5702
    if (0x85e >= $c) {
5703
5704
      if (0x5be === $c ||
5705
          0x5c0 === $c ||
5706 2
          0x5c3 === $c ||
5707
          0x5c6 === $c ||
5708 2
          (0x5d0 <= $c && 0x5ea >= $c) ||
5709 2
          (0x5f0 <= $c && 0x5f4 >= $c) ||
5710
          0x608 === $c ||
5711 2
          0x60b === $c ||
5712
          0x60d === $c ||
5713
          0x61b === $c ||
5714
          (0x61e <= $c && 0x64a >= $c) ||
5715
          (0x66d <= $c && 0x66f >= $c) ||
5716
          (0x671 <= $c && 0x6d5 >= $c) ||
5717
          (0x6e5 <= $c && 0x6e6 >= $c) ||
5718
          (0x6ee <= $c && 0x6ef >= $c) ||
5719
          (0x6fa <= $c && 0x70d >= $c) ||
5720
          0x710 === $c ||
5721 1
          (0x712 <= $c && 0x72f >= $c) ||
5722
          (0x74d <= $c && 0x7a5 >= $c) ||
5723 1
          0x7b1 === $c ||
5724
          (0x7c0 <= $c && 0x7ea >= $c) ||
5725 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
5726 1
          0x7fa === $c ||
5727 1
          (0x800 <= $c && 0x815 >= $c) ||
5728
          0x81a === $c ||
5729 1
          0x824 === $c ||
5730
          0x828 === $c ||
5731
          (0x830 <= $c && 0x83e >= $c) ||
5732
          (0x840 <= $c && 0x858 >= $c) ||
5733
          0x85e === $c
5734
      ) {
5735
        return 'RTL';
5736
      }
5737
5738
    } elseif (0x200f === $c) {
5739
5740
      return 'RTL';
5741
5742
    } elseif (0xfb1d <= $c) {
5743
5744
      if (0xfb1d === $c ||
5745
          (0xfb1f <= $c && 0xfb28 >= $c) ||
5746
          (0xfb2a <= $c && 0xfb36 >= $c) ||
5747
          (0xfb38 <= $c && 0xfb3c >= $c) ||
5748
          0xfb3e === $c ||
5749
          (0xfb40 <= $c && 0xfb41 >= $c) ||
5750
          (0xfb43 <= $c && 0xfb44 >= $c) ||
5751
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
5752
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
5753
          (0xfd50 <= $c && 0xfd8f >= $c) ||
5754
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
5755
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
5756
          (0xfe70 <= $c && 0xfe74 >= $c) ||
5757
          (0xfe76 <= $c && 0xfefc >= $c) ||
5758
          (0x10800 <= $c && 0x10805 >= $c) ||
5759
          0x10808 === $c ||
5760
          (0x1080a <= $c && 0x10835 >= $c) ||
5761
          (0x10837 <= $c && 0x10838 >= $c) ||
5762
          0x1083c === $c ||
5763
          (0x1083f <= $c && 0x10855 >= $c) ||
5764
          (0x10857 <= $c && 0x1085f >= $c) ||
5765
          (0x10900 <= $c && 0x1091b >= $c) ||
5766
          (0x10920 <= $c && 0x10939 >= $c) ||
5767
          0x1093f === $c ||
5768
          0x10a00 === $c ||
5769
          (0x10a10 <= $c && 0x10a13 >= $c) ||
5770
          (0x10a15 <= $c && 0x10a17 >= $c) ||
5771
          (0x10a19 <= $c && 0x10a33 >= $c) ||
5772
          (0x10a40 <= $c && 0x10a47 >= $c) ||
5773
          (0x10a50 <= $c && 0x10a58 >= $c) ||
5774
          (0x10a60 <= $c && 0x10a7f >= $c) ||
5775
          (0x10b00 <= $c && 0x10b35 >= $c) ||
5776
          (0x10b40 <= $c && 0x10b55 >= $c) ||
5777
          (0x10b58 <= $c && 0x10b72 >= $c) ||
5778
          (0x10b78 <= $c && 0x10b7f >= $c)
5779
      ) {
5780
        return 'RTL';
5781
      }
5782
    }
5783
5784
    return 'LTR';
5785
  }
5786
5787
  /**
5788
   * Get a decimal code representation of a specific character.
5789
   *
5790
   * @param   string $chr The input character
5791
   *
5792
   * @return  int
5793
   */
5794
  public static function chr_to_decimal($chr)
5795
  {
5796
    $chr = (string)$chr;
5797
    $code = self::ord($chr[0]);
5798
    $bytes = 1;
5799
5800
    if (!($code & 0x80)) {
5801
      // 0xxxxxxx
5802
      return $code;
5803
    }
5804
5805
    if (($code & 0xe0) === 0xc0) {
5806
      // 110xxxxx
5807
      $bytes = 2;
5808
      $code &= ~0xc0;
5809
    } elseif (($code & 0xf0) == 0xe0) {
5810
      // 1110xxxx
5811
      $bytes = 3;
5812
      $code &= ~0xe0;
5813
    } elseif (($code & 0xf8) === 0xf0) {
5814
      // 11110xxx
5815
      $bytes = 4;
5816
      $code &= ~0xf0;
5817
    }
5818
5819
    for ($i = 2; $i <= $bytes; $i++) {
5820
      // 10xxxxxx
5821
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
5822
    }
5823
5824
    return $code;
5825
  }
5826
5827
  /**
5828
   * Get a UTF-8 character from its decimal code representation.
5829
   *
5830
   * @param   int $code Code.
5831
   *
5832
   * @return  string
5833
   */
5834
  public static function decimal_to_chr($code)
5835
  {
5836
    self::checkForSupport();
5837
5838
    return mb_convert_encoding(
5839
        '&#x' . dechex($code) . ';',
5840
        'UTF-8',
5841
        'HTML-ENTITIES'
5842
    );
5843
  }
5844
5845
  /**
5846
   * Return a array with "urlencoded"-win1252 -> UTF-8
5847
   *
5848
   * @return mixed
5849
   */
5850
  protected static function urldecode_fix_win1252_chars()
5851
  {
5852
    static $array = array(
5853
        '%20' => ' ',
5854
        '%21' => '!',
5855
        '%22' => '"',
5856
        '%23' => '#',
5857
        '%24' => '$',
5858
        '%25' => '%',
5859
        '%26' => '&',
5860
        '%27' => "'",
5861
        '%28' => '(',
5862
        '%29' => ')',
5863
        '%2A' => '*',
5864
        '%2B' => '+',
5865
        '%2C' => ',',
5866
        '%2D' => '-',
5867
        '%2E' => '.',
5868
        '%2F' => '/',
5869
        '%30' => '0',
5870
        '%31' => '1',
5871
        '%32' => '2',
5872
        '%33' => '3',
5873
        '%34' => '4',
5874
        '%35' => '5',
5875
        '%36' => '6',
5876
        '%37' => '7',
5877
        '%38' => '8',
5878
        '%39' => '9',
5879
        '%3A' => ':',
5880
        '%3B' => ';',
5881
        '%3C' => '<',
5882
        '%3D' => '=',
5883
        '%3E' => '>',
5884
        '%3F' => '?',
5885
        '%40' => '@',
5886
        '%41' => 'A',
5887
        '%42' => 'B',
5888
        '%43' => 'C',
5889
        '%44' => 'D',
5890
        '%45' => 'E',
5891
        '%46' => 'F',
5892
        '%47' => 'G',
5893
        '%48' => 'H',
5894
        '%49' => 'I',
5895
        '%4A' => 'J',
5896
        '%4B' => 'K',
5897
        '%4C' => 'L',
5898
        '%4D' => 'M',
5899
        '%4E' => 'N',
5900
        '%4F' => 'O',
5901
        '%50' => 'P',
5902
        '%51' => 'Q',
5903
        '%52' => 'R',
5904
        '%53' => 'S',
5905
        '%54' => 'T',
5906
        '%55' => 'U',
5907
        '%56' => 'V',
5908
        '%57' => 'W',
5909
        '%58' => 'X',
5910
        '%59' => 'Y',
5911
        '%5A' => 'Z',
5912
        '%5B' => '[',
5913
        '%5C' => '\\',
5914
        '%5D' => ']',
5915
        '%5E' => '^',
5916
        '%5F' => '_',
5917
        '%60' => '`',
5918
        '%61' => 'a',
5919
        '%62' => 'b',
5920
        '%63' => 'c',
5921
        '%64' => 'd',
5922
        '%65' => 'e',
5923
        '%66' => 'f',
5924
        '%67' => 'g',
5925
        '%68' => 'h',
5926
        '%69' => 'i',
5927
        '%6A' => 'j',
5928
        '%6B' => 'k',
5929
        '%6C' => 'l',
5930
        '%6D' => 'm',
5931
        '%6E' => 'n',
5932
        '%6F' => 'o',
5933
        '%70' => 'p',
5934
        '%71' => 'q',
5935
        '%72' => 'r',
5936
        '%73' => 's',
5937
        '%74' => 't',
5938
        '%75' => 'u',
5939
        '%76' => 'v',
5940
        '%77' => 'w',
5941
        '%78' => 'x',
5942
        '%79' => 'y',
5943
        '%7A' => 'z',
5944
        '%7B' => '{',
5945
        '%7C' => '|',
5946
        '%7D' => '}',
5947
        '%7E' => '~',
5948
        '%7F' => '',
5949
        '%80' => '`',
5950
        '%81' => '',
5951
        '%82' => '‚',
5952
        '%83' => 'ƒ',
5953
        '%84' => '„',
5954
        '%85' => '…',
5955
        '%86' => '†',
5956
        '%87' => '‡',
5957
        '%88' => 'ˆ',
5958
        '%89' => '‰',
5959
        '%8A' => 'Š',
5960
        '%8B' => '‹',
5961
        '%8C' => 'Œ',
5962
        '%8D' => '',
5963
        '%8E' => 'Ž',
5964
        '%8F' => '',
5965
        '%90' => '',
5966
        '%91' => '‘',
5967
        '%92' => '’',
5968
        '%93' => '“',
5969
        '%94' => '”',
5970
        '%95' => '•',
5971
        '%96' => '–',
5972
        '%97' => '—',
5973
        '%98' => '˜',
5974
        '%99' => '™',
5975
        '%9A' => 'š',
5976
        '%9B' => '›',
5977
        '%9C' => 'œ',
5978
        '%9D' => '',
5979
        '%9E' => 'ž',
5980
        '%9F' => 'Ÿ',
5981
        '%A0' => '',
5982
        '%A1' => '¡',
5983
        '%A2' => '¢',
5984
        '%A3' => '£',
5985
        '%A4' => '¤',
5986
        '%A5' => '¥',
5987
        '%A6' => '¦',
5988
        '%A7' => '§',
5989
        '%A8' => '¨',
5990
        '%A9' => '©',
5991
        '%AA' => 'ª',
5992
        '%AB' => '«',
5993
        '%AC' => '¬',
5994
        '%AD' => '',
5995
        '%AE' => '®',
5996
        '%AF' => '¯',
5997
        '%B0' => '°',
5998
        '%B1' => '±',
5999
        '%B2' => '²',
6000
        '%B3' => '³',
6001
        '%B4' => '´',
6002
        '%B5' => 'µ',
6003
        '%B6' => '¶',
6004
        '%B7' => '·',
6005
        '%B8' => '¸',
6006
        '%B9' => '¹',
6007
        '%BA' => 'º',
6008
        '%BB' => '»',
6009
        '%BC' => '¼',
6010
        '%BD' => '½',
6011
        '%BE' => '¾',
6012
        '%BF' => '¿',
6013
        '%C0' => 'À',
6014
        '%C1' => 'Á',
6015
        '%C2' => 'Â',
6016
        '%C3' => 'Ã',
6017
        '%C4' => 'Ä',
6018
        '%C5' => 'Å',
6019
        '%C6' => 'Æ',
6020
        '%C7' => 'Ç',
6021
        '%C8' => 'È',
6022
        '%C9' => 'É',
6023
        '%CA' => 'Ê',
6024
        '%CB' => 'Ë',
6025
        '%CC' => 'Ì',
6026
        '%CD' => 'Í',
6027
        '%CE' => 'Î',
6028
        '%CF' => 'Ï',
6029
        '%D0' => 'Ð',
6030
        '%D1' => 'Ñ',
6031
        '%D2' => 'Ò',
6032
        '%D3' => 'Ó',
6033
        '%D4' => 'Ô',
6034
        '%D5' => 'Õ',
6035
        '%D6' => 'Ö',
6036
        '%D7' => '×',
6037
        '%D8' => 'Ø',
6038
        '%D9' => 'Ù',
6039
        '%DA' => 'Ú',
6040
        '%DB' => 'Û',
6041
        '%DC' => 'Ü',
6042
        '%DD' => 'Ý',
6043
        '%DE' => 'Þ',
6044
        '%DF' => 'ß',
6045
        '%E0' => 'à',
6046
        '%E1' => 'á',
6047
        '%E2' => 'â',
6048
        '%E3' => 'ã',
6049
        '%E4' => 'ä',
6050
        '%E5' => 'å',
6051
        '%E6' => 'æ',
6052
        '%E7' => 'ç',
6053
        '%E8' => 'è',
6054
        '%E9' => 'é',
6055
        '%EA' => 'ê',
6056
        '%EB' => 'ë',
6057
        '%EC' => 'ì',
6058
        '%ED' => 'í',
6059
        '%EE' => 'î',
6060
        '%EF' => 'ï',
6061
        '%F0' => 'ð',
6062
        '%F1' => 'ñ',
6063
        '%F2' => 'ò',
6064
        '%F3' => 'ó',
6065
        '%F4' => 'ô',
6066
        '%F5' => 'õ',
6067
        '%F6' => 'ö',
6068
        '%F7' => '÷',
6069
        '%F8' => 'ø',
6070
        '%F9' => 'ù',
6071
        '%FA' => 'ú',
6072
        '%FB' => 'û',
6073
        '%FC' => 'ü',
6074
        '%FD' => 'ý',
6075
        '%FE' => 'þ',
6076
        '%FF' => 'ÿ',
6077
    );
6078
6079
    return $array;
6080
  }
6081
6082
}
6083