Completed
Branch master (215c22)
by Lars
04:19
created

UTF8::utf8_encode()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 19
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 19
ccs 11
cts 11
cp 1
rs 9.4285
cc 3
eloc 11
nc 3
nop 1
crap 3
1
<?php
2
3
namespace voku\helper;
4
5
use Patchwork\PHP\Shim\Intl;
6
use Patchwork\PHP\Shim\Normalizer;
7
use Patchwork\PHP\Shim\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric Code Point => UTF-8 Character
84
   *
85
   * @var array
86
   */
87
  protected static $whitespace = array(
88
      0     => "\x0",
89
      //NUL Byte
90
      9     => "\x9",
91
      //Tab
92
      10    => "\xa",
93
      //New Line
94
      11    => "\xb",
95
      //Vertical Tab
96
      13    => "\xd",
97
      //Carriage Return
98
      32    => "\x20",
99
      //Ordinary Space
100
      160   => "\xc2\xa0",
101
      //NO-BREAK SPACE
102
      5760  => "\xe1\x9a\x80",
103
      //OGHAM SPACE MARK
104
      6158  => "\xe1\xa0\x8e",
105
      //MONGOLIAN VOWEL SEPARATOR
106
      8192  => "\xe2\x80\x80",
107
      //EN QUAD
108
      8193  => "\xe2\x80\x81",
109
      //EM QUAD
110
      8194  => "\xe2\x80\x82",
111
      //EN SPACE
112
      8195  => "\xe2\x80\x83",
113
      //EM SPACE
114
      8196  => "\xe2\x80\x84",
115
      //THREE-PER-EM SPACE
116
      8197  => "\xe2\x80\x85",
117
      //FOUR-PER-EM SPACE
118
      8198  => "\xe2\x80\x86",
119
      //SIX-PER-EM SPACE
120
      8199  => "\xe2\x80\x87",
121
      //FIGURE SPACE
122
      8200  => "\xe2\x80\x88",
123
      //PUNCTUATION SPACE
124
      8201  => "\xe2\x80\x89",
125
      //THIN SPACE
126
      8202  => "\xe2\x80\x8a",
127
      //HAIR SPACE
128
      8232  => "\xe2\x80\xa8",
129
      //LINE SEPARATOR
130
      8233  => "\xe2\x80\xa9",
131
      //PARAGRAPH SEPARATOR
132
      8239  => "\xe2\x80\xaf",
133
      //NARROW NO-BREAK SPACE
134
      8287  => "\xe2\x81\x9f",
135
      //MEDIUM MATHEMATICAL SPACE
136
      12288 => "\xe3\x80\x80"
137
      //IDEOGRAPHIC SPACE
138
  );
139
140
  /**
141
   * @var array
142
   */
143
  protected static $whitespaceTable = array(
144
      'SPACE'                     => "\x20",
145
      'NO-BREAK SPACE'            => "\xc2\xa0",
146
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
147
      'EN QUAD'                   => "\xe2\x80\x80",
148
      'EM QUAD'                   => "\xe2\x80\x81",
149
      'EN SPACE'                  => "\xe2\x80\x82",
150
      'EM SPACE'                  => "\xe2\x80\x83",
151
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
152
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
153
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
154
      'FIGURE SPACE'              => "\xe2\x80\x87",
155
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
156
      'THIN SPACE'                => "\xe2\x80\x89",
157
      'HAIR SPACE'                => "\xe2\x80\x8a",
158
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
159
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
160
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
161
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  protected static $commonCaseFold = array(
168
      'ſ'            => 's',
169
      "\xCD\x85"     => 'ι',
170
      'ς'            => 'σ',
171
      "\xCF\x90"     => 'β',
172
      "\xCF\x91"     => 'θ',
173
      "\xCF\x95"     => 'φ',
174
      "\xCF\x96"     => 'π',
175
      "\xCF\xB0"     => 'κ',
176
      "\xCF\xB1"     => 'ρ',
177
      "\xCF\xB5"     => 'ε',
178
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
179
      "\xE1\xBE\xBE" => 'ι',
180
  );
181
182
  /**
183
   * @var array
184
   */
185
  protected static $brokenUtf8ToUtf8 = array(
186
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
187
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
188
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
189
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
190
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
191
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
192
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
193
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
194
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
195
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
196
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
197
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
198
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
199
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
200
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
201
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
202
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
203
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
204
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
205
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
206
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
207
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
208
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
209
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
210
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
211
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
212
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
213
      'ü'       => 'ü',
214
      'ä'       => 'ä',
215
      'ö'       => 'ö',
216
      'Ö'       => 'Ö',
217
      'ß'       => 'ß',
218
      'Ã '       => 'à',
219
      'á'       => 'á',
220
      'â'       => 'â',
221
      'ã'       => 'ã',
222
      'ù'       => 'ù',
223
      'ú'       => 'ú',
224
      'û'       => 'û',
225
      'Ù'       => 'Ù',
226
      'Ú'       => 'Ú',
227
      'Û'       => 'Û',
228
      'Ü'       => 'Ü',
229
      'ò'       => 'ò',
230
      'ó'       => 'ó',
231
      'ô'       => 'ô',
232
      'è'       => 'è',
233
      'é'       => 'é',
234
      'ê'       => 'ê',
235
      'ë'       => 'ë',
236
      'À'       => 'À',
237
      'Á'       => 'Á',
238
      'Â'       => 'Â',
239
      'Ã'       => 'Ã',
240
      'Ä'       => 'Ä',
241
      'Ã…'       => 'Å',
242
      'Ç'       => 'Ç',
243
      'È'       => 'È',
244
      'É'       => 'É',
245
      'Ê'       => 'Ê',
246
      'Ë'       => 'Ë',
247
      'ÃŒ'       => 'Ì',
248
      'Í'       => 'Í',
249
      'ÃŽ'       => 'Î',
250
      'Ï'       => 'Ï',
251
      'Ñ'       => 'Ñ',
252
      'Ã’'       => 'Ò',
253
      'Ó'       => 'Ó',
254
      'Ô'       => 'Ô',
255
      'Õ'       => 'Õ',
256
      'Ø'       => 'Ø',
257
      'Ã¥'       => 'å',
258
      'æ'       => 'æ',
259
      'ç'       => 'ç',
260
      'ì'       => 'ì',
261
      'í'       => 'í',
262
      'î'       => 'î',
263
      'ï'       => 'ï',
264
      'ð'       => 'ð',
265
      'ñ'       => 'ñ',
266
      'õ'       => 'õ',
267
      'ø'       => 'ø',
268
      'ý'       => 'ý',
269
      'ÿ'       => 'ÿ',
270
      '€'      => '€',
271
  );
272
273
  /**
274
   * @var array
275
   */
276
  protected static $utf8ToWin1252 = array(
277
      "\xe2\x82\xac" => "\x80", // EURO SIGN
278
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
279
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
280
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
281
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
282
      "\xe2\x80\xa0" => "\x86", // DAGGER
283
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
284
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
285
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
286
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
287
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
288
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
289
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
290
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
291
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
292
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
293
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
294
      "\xe2\x80\xa2" => "\x95", // BULLET
295
      "\xe2\x80\x93" => "\x96", // EN DASH
296
      "\xe2\x80\x94" => "\x97", // EM DASH
297
      "\xcb\x9c"     => "\x98", // SMALL TILDE
298
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
299
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
300
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
301
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
302
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
303
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
304
  );
305
306
  /**
307
   * @var array
308
   */
309
  protected static $utf8MSWord = array(
310
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
311
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
312
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
313
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
314
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
315
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
316
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
317
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
318
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
319
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
320
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
321
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
322
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
323
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
324
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
325
  );
326
327
  /**
328
   * @var array
329
   */
330
  private static $support = array();
331
332
  /**
333
   * __construct()
334
   */
335 1
  public function __construct()
336
  {
337 1
    self::checkForSupport();
338 1
  }
339
340
  /**
341
   * check for UTF8-Support
342
   */
343 151
  public static function checkForSupport()
344
  {
345 151
    if (!isset(self::$support['mbstring'])) {
346
347 1
      self::$support['mbstring'] = self::mbstring_loaded();
348 1
      self::$support['iconv'] = self::iconv_loaded();
349 1
      self::$support['intl'] = self::intl_loaded();
350 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
351
352 1
      Bootup::initAll(); // Enables the portablity layer and configures PHP for UTF-8
353 1
      Bootup::filterRequestUri(); // Redirects to an UTF-8 encoded URL if it's not already the case
354 1
      Bootup::filterRequestInputs(); // Normalizes HTTP inputs to UTF-8 NFC
355 1
    }
356 151
  }
357
358
  /**
359
   * checks whether mbstring is available on the server
360
   *
361
   * @return   bool True if available, False otherwise
362
   */
363 2
  public static function mbstring_loaded()
364
  {
365 2
    $return = extension_loaded('mbstring');
366
367 2
    if ($return === true) {
368 2
      mb_internal_encoding('UTF-8');
369 2
    }
370
371 2
    return $return;
372
  }
373
374
  /**
375
   * checks whether iconv is available on the server
376
   *
377
   * @return   bool True if available, False otherwise
378
   */
379 1
  public static function iconv_loaded()
380
  {
381 1
    return extension_loaded('iconv') ? true : false;
382
  }
383
384
  /**
385
   * checks whether intl is available on the server
386
   *
387
   * @return   bool True if available, False otherwise
388
   */
389 1
  public static function intl_loaded()
390
  {
391 1
    return extension_loaded('intl') ? true : false;
392
  }
393
394
  /**
395
   * checks if \u modifier is available that enables Unicode support in PCRE.
396
   *
397
   * @return   bool True if support is available, false otherwise
398
   */
399 30
  public static function pcre_utf8_support()
400
  {
401
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
402 30
    return (bool)@preg_match('//u', '');
403
  }
404
405
  /**
406
   * alias for "UTF8::to_ascii()"
407
   *
408
   * @param string $s The input string e.g. a UTF-8 String
409
   * @param string $subst_chr
410
   *
411
   * @return string
412
   */
413 6
  public static function toAscii($s, $subst_chr = '?')
414
  {
415 6
    return self::to_ascii($s, $subst_chr);
416
  }
417
418
  /**
419
   * convert to ASCII
420
   *
421
   * @param string $s The input string e.g. a UTF-8 String
422
   * @param string $subst_chr
423
   *
424
   * @return string
425
   */
426 7
  public static function to_ascii($s, $subst_chr = '?')
427
  {
428 7
    static $translitExtra = null;
429
430 7
    $s = (string)$s;
431
432 7
    if (!isset($s[0])) {
433 2
      return '';
434
    }
435
436 6
    $s = self::clean($s);
437
438 6
    if (preg_match("/[\x80-\xFF]/", $s)) {
439 3
      $s = Normalizer::normalize($s, Normalizer::NFKC);
440
441 3
      $glibc = 'glibc' === ICONV_IMPL;
442
443 3
      preg_match_all('/./u', $s, $s);
444
445
      /** @noinspection AlterInForeachInspection */
446 3
      foreach ($s[0] as &$c) {
447
448 3
        if (!isset($c[1])) {
449 3
          continue;
450
        }
451
452 3
        if ($glibc) {
453 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
454 3
        } else {
455
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
456
457
          if ($t !== false && is_string($t)) {
458
            if (!isset($t[0])) {
459
              $t = '?';
460
            } elseif (isset($t[1])) {
461
              $t = ltrim($t, '\'`"^~');
462
            }
463
          }
464
        }
465
466 3
        if ('?' === $t) {
467
468 1
          if ($translitExtra === null) {
469 1
            $translitExtra = (array)self::getData('translit_extra');
470 1
          }
471
472 1
          if (isset($translitExtra[$c])) {
473 1
            $t = $translitExtra[$c];
474 1
          } else {
475 1
            $t = Normalizer::normalize($c, Normalizer::NFD);
476
477 1
            if ($t[0] < "\x80") {
478
              $t = $t[0];
479
            } else {
480 1
              $t = $subst_chr;
481
            }
482
          }
483 1
        }
484
485 3
        if ('?' === $t) {
486 1
          $t = self::str_transliterate($c, $subst_chr);
487 1
        }
488
489 3
        $c = $t;
490 3
      }
491
492 3
      $s = implode('', $s[0]);
493 3
    }
494
495 6
    return $s;
496
  }
497
498
  /**
499
   * accepts a string and removes all non-UTF-8 characters from it.
500
   *
501
   * @param string $str              The string to be sanitized.
502
   * @param bool   $remove_bom
503
   * @param bool   $normalize_whitespace
504
   * @param bool   $normalize_msword e.g.: "…" => "..."
505
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
506
   *
507
   * @return string Clean UTF-8 encoded string
508
   */
509 24
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
510
  {
511
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
512
    // caused connection reset problem on larger strings
513
514
    $regx = '/
515
       (
516
        (?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
517
        |   [\xC2-\xDF][\x80-\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
518
        |   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
519
        |   [\xE1-\xEC][\x80-\xBF]{2}
520
        |   \xED[\x80-\x9F][\x80-\xBF]
521
        |   [\xEE-\xEF][\x80-\xBF]{2}
522
        ){1,50}                          # ...one or more times
523
       )
524
       | .                                  # anything else
525 24
       /x';
526 24
    $str = preg_replace($regx, '$1', $str);
527
528 24
    $str = self::replace_diamond_question_mark($str, '');
529 24
    $str = self::remove_invisible_characters($str);
530
531 24
    if ($normalize_whitespace === true) {
532 7
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
533 7
    }
534
535 24
    if ($normalize_msword === true) {
536 1
      $str = self::normalize_msword($str);
537 1
    }
538
539 24
    if ($remove_bom === true) {
540 6
      $str = self::removeBOM($str);
541 6
    }
542
543 24
    return $str;
544
  }
545
546
  /**
547
   * replace diamond question mark (�)
548
   *
549
   * @param string $str
550
   * @param string $unknown
551
   *
552
   * @return string
553
   */
554 25
  public static function replace_diamond_question_mark($str, $unknown = '?')
555
  {
556 25
    return str_replace(
557
        array(
558 25
            "\xEF\xBF\xBD",
559 25
            '�',
560 25
        ),
561
        array(
562 25
            $unknown,
563 25
            $unknown,
564 25
        ),
565
        $str
566 25
    );
567
  }
568
569
  /**
570
   * Remove Invisible Characters
571
   *
572
   * This prevents sandwiching null characters
573
   * between ascii characters, like Java\0script.
574
   *
575
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
576
   *
577
   * @param  string $str
578
   * @param  bool   $url_encoded
579
   *
580
   * @return  string
581
   */
582 25
  public static function remove_invisible_characters($str, $url_encoded = true)
583
  {
584
    // init
585 25
    $non_displayables = array();
586
587
    // every control character except newline (dec 10),
588
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
589 25
    if ($url_encoded) {
590 25
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
591 25
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
592 25
    }
593
594 25
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
595
596
    do {
597 25
      $str = preg_replace($non_displayables, '', $str, -1, $count);
598 25
    } while ($count !== 0);
599
600 25
    return $str;
601
  }
602
603
  /**
604
   * normalize whitespace
605
   *
606
   * @param string $str The string to be normalized.
607
   * @param bool   $keepNonBreakingSpace set true, to keep non-breaking-spaces
608
   *
609
   * @return string
610
   */
611 8
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false)
612
  {
613 8
    static $whitespaces = array();
614
615 8
    if (!isset($whitespaces[$keepNonBreakingSpace])) {
616
617 2
      $whitespaces[$keepNonBreakingSpace] = self::$whitespaceTable;
618
619 2
      if ($keepNonBreakingSpace === true) {
620
        /** @noinspection OffsetOperationsInspection */
621 1
        unset($whitespaces[$keepNonBreakingSpace]['NO-BREAK SPACE']);
622 1
      }
623
624 2
      $whitespaces[$keepNonBreakingSpace] = array_values($whitespaces[$keepNonBreakingSpace]);
625 2
    }
626
627 8
    return str_replace($whitespaces[$keepNonBreakingSpace], ' ', $str);
628
  }
629
630
  /**
631
   * returns an array with all utf8 whitespace characters as per
632
   * http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
633
   *
634
   * @author: Derek E. [email protected]
635
   *
636
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
637
   *         as defined in above URL
638
   */
639 1
  public static function whitespace_table()
640
  {
641 1
    return self::$whitespaceTable;
642
  }
643
644
  /**
645
   * normalize MS Word Special Chars
646
   *
647
   * @param string $str The string to be normalized.
648
   *
649
   * @return string
650
   */
651 2
  public static function normalize_msword($str)
652
  {
653 2
    static $utf8MSWordKeys = null;
654 2
    static $utf8MSWordValues = null;
655
656 2
    if ($utf8MSWordKeys === null) {
657 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
658 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
659 1
    }
660
661 2
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
662
  }
663
664
  /**
665
   * remove the BOM from UTF-8 / UTF-16 / UTF-32
666
   *
667
   * @param string $str
668
   *
669
   * @return string
670
   */
671 7
  public static function removeBOM($str = '')
672
  {
673
674
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
675 7 View Code Duplication
    if (substr($str, 0, 4) == pack('CCCC', 0x00, 0x00, 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
676 1
      $str = substr($str, 4);
677 1
    }
678
679
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
680 7 View Code Duplication
    if (substr($str, 0, 4) == pack('CCCC', 0xff, 0xfe, 0x00, 0x00)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
681 1
      $str = substr($str, 4);
682 1
    }
683
684
    // UTF-8
685 7 View Code Duplication
    if (substr($str, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
686 2
      $str = substr($str, 3);
687 2
    }
688
689
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
690 7 View Code Duplication
    if (substr($str, 0, 2) == pack('CC', 0xfe, 0xff)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
691 1
      $str = substr($str, 2);
692 1
    }
693
694
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
695 7 View Code Duplication
    if (substr($str, 0, 2) == pack('CC', 0xff, 0xfe)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
696 1
      $str = substr($str, 2);
697 1
    }
698
699 7
    return $str;
700
  }
701
702
  /**
703
   * get data
704
   *
705
   * @param string $file
706
   *
707
   * @return bool|string|array|int false on error
708
   */
709 2
  protected static function getData($file)
710
  {
711 2
    $file = __DIR__ . '/data/' . $file . '.ser';
712 2
    if (file_exists($file)) {
713 2
      return unserialize(file_get_contents($file));
714
    } else {
715
      return false;
716
    }
717
  }
718
719
  /**
720
   * US-ASCII transliterations of Unicode text
721
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
722
   * Warning: you should only pass this well formed UTF-8!
723
   * Be aware it works by making a copy of the input string which it appends transliterated
724
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
725
   * requiring up to the same amount again as the input string
726
   *
727
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
728
   *
729
   * @author <[email protected]>
730
   *
731
   * @param string $str     UTF-8 string to convert
732
   * @param string $unknown Character use if character unknown (default to ?)
733
   *
734
   * @return string US-ASCII string
735
   */
736 8
  public static function str_transliterate($str, $unknown = '?')
737
  {
738 8
    static $UTF8_TO_ASCII;
739
740 8
    $str = (string)$str;
741
742 8
    if (!isset($str[0])) {
743 2
      return '';
744
    }
745
746 7
    $str = self::clean($str);
747
748 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
749 7
    $chars = $ar[0];
750 7
    foreach ($chars as &$c) {
751
752 7
      $ordC0 = ord($c[0]);
753
754 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
755 6
        continue;
756
      }
757
758 4
      $ordC1 = ord($c[1]);
759
760
      // ASCII - next please
761 4
      if ($ordC0 >= 192 && $ordC0 <= 223) {
762 4
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
763 4
      }
764
765 4
      if ($ordC0 >= 224) {
766 3
        $ordC2 = ord($c[2]);
767
768 3
        if ($ordC0 <= 239) {
769 3
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
770 3
        }
771
772 3
        if ($ordC0 >= 240) {
773
          $ordC3 = ord($c[3]);
774
775
          if ($ordC0 <= 247) {
776
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
777
          }
778
779
          if ($ordC0 >= 248) {
780
            $ordC4 = ord($c[4]);
781
782 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
783
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
784
            }
785
786
            if ($ordC0 >= 252) {
787
              $ordC5 = ord($c[5]);
788
789 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
790
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
791
              }
792
            }
793
          }
794
        }
795 3
      }
796
797 4
      if ($ordC0 >= 254 && $ordC0 <= 255) {
798
        $c = $unknown;
799
        continue;
800
      }
801
802 4
      if (!isset($ord)) {
803
        $c = $unknown;
804
        continue;
805
      }
806
807 4
      $bank = $ord >> 8;
808 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
809 2
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
810 2
        if (file_exists($bankfile)) {
811
          /** @noinspection PhpIncludeInspection */
812 2
          include $bankfile;
813 2
        } else {
814
          $UTF8_TO_ASCII[$bank] = array();
815
        }
816 2
      }
817
818 4
      $newchar = $ord & 255;
819 4
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
820 4
        $c = $UTF8_TO_ASCII[$bank][$newchar];
821 4
      } else {
822
        $c = $unknown;
823
      }
824 7
    }
825
826 7
    return implode('', $chars);
827
  }
828
829
  /**
830
   * echo native UTF8-Support libs
831
   */
832
  public static function showSupport()
833
  {
834
    foreach (self::$support as $utf8Support) {
835
      echo $utf8Support . "\n<br>";
836
    }
837
  }
838
839
  /**
840
   * UTF-8 version of htmlentities()
841
   *
842
   * Convert all applicable characters to HTML entities
843
   *
844
   * @link http://php.net/manual/en/function.htmlentities.php
845
   *
846
   * @param string $string        <p>
847
   *                              The input string.
848
   *                              </p>
849
   * @param int    $flags         [optional] <p>
850
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
851
   *                              invalid code unit sequences and the used document type. The default is
852
   *                              ENT_COMPAT | ENT_HTML401.
853
   *                              <table>
854
   *                              Available <i>flags</i> constants
855
   *                              <tr valign="top">
856
   *                              <td>Constant Name</td>
857
   *                              <td>Description</td>
858
   *                              </tr>
859
   *                              <tr valign="top">
860
   *                              <td><b>ENT_COMPAT</b></td>
861
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
862
   *                              </tr>
863
   *                              <tr valign="top">
864
   *                              <td><b>ENT_QUOTES</b></td>
865
   *                              <td>Will convert both double and single quotes.</td>
866
   *                              </tr>
867
   *                              <tr valign="top">
868
   *                              <td><b>ENT_NOQUOTES</b></td>
869
   *                              <td>Will leave both double and single quotes unconverted.</td>
870
   *                              </tr>
871
   *                              <tr valign="top">
872
   *                              <td><b>ENT_IGNORE</b></td>
873
   *                              <td>
874
   *                              Silently discard invalid code unit sequences instead of returning
875
   *                              an empty string. Using this flag is discouraged as it
876
   *                              may have security implications.
877
   *                              </td>
878
   *                              </tr>
879
   *                              <tr valign="top">
880
   *                              <td><b>ENT_SUBSTITUTE</b></td>
881
   *                              <td>
882
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
883
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
884
   *                              </td>
885
   *                              </tr>
886
   *                              <tr valign="top">
887
   *                              <td><b>ENT_DISALLOWED</b></td>
888
   *                              <td>
889
   *                              Replace invalid code points for the given document type with a
890
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
891
   *                              (otherwise) instead of leaving them as is. This may be useful, for
892
   *                              instance, to ensure the well-formedness of XML documents with
893
   *                              embedded external content.
894
   *                              </td>
895
   *                              </tr>
896
   *                              <tr valign="top">
897
   *                              <td><b>ENT_HTML401</b></td>
898
   *                              <td>
899
   *                              Handle code as HTML 4.01.
900
   *                              </td>
901
   *                              </tr>
902
   *                              <tr valign="top">
903
   *                              <td><b>ENT_XML1</b></td>
904
   *                              <td>
905
   *                              Handle code as XML 1.
906
   *                              </td>
907
   *                              </tr>
908
   *                              <tr valign="top">
909
   *                              <td><b>ENT_XHTML</b></td>
910
   *                              <td>
911
   *                              Handle code as XHTML.
912
   *                              </td>
913
   *                              </tr>
914
   *                              <tr valign="top">
915
   *                              <td><b>ENT_HTML5</b></td>
916
   *                              <td>
917
   *                              Handle code as HTML 5.
918
   *                              </td>
919
   *                              </tr>
920
   *                              </table>
921
   *                              </p>
922
   * @param string $encoding      [optional] <p>
923
   *                              Like <b>htmlspecialchars</b>,
924
   *                              <b>htmlentities</b> takes an optional third argument
925
   *                              <i>encoding</i> which defines encoding used in
926
   *                              conversion.
927
   *                              Although this argument is technically optional, you are highly
928
   *                              encouraged to specify the correct value for your code.
929
   *                              </p>
930
   * @param bool   $double_encode [optional] <p>
931
   *                              When <i>double_encode</i> is turned off PHP will not
932
   *                              encode existing html entities. The default is to convert everything.
933
   *                              </p>
934
   *
935
   *
936
   * @return string the encoded string.
937
   * </p>
938
   * <p>
939
   * If the input <i>string</i> contains an invalid code unit
940
   * sequence within the given <i>encoding</i> an empty string
941
   * will be returned, unless either the <b>ENT_IGNORE</b> or
942
   * <b>ENT_SUBSTITUTE</b> flags are set.
943
   */
944 2
  public static function htmlentities($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
945
  {
946 2
    return htmlentities($string, $flags, $encoding, $double_encode);
947
  }
948
949
  /**
950
   * UTF-8 version of htmlspecialchars()
951
   *
952
   * Convert special characters to HTML entities
953
   *
954
   * @link http://php.net/manual/en/function.htmlspecialchars.php
955
   *
956
   * @param string $string        <p>
957
   *                              The string being converted.
958
   *                              </p>
959
   * @param int    $flags         [optional] <p>
960
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
961
   *                              invalid code unit sequences and the used document type. The default is
962
   *                              ENT_COMPAT | ENT_HTML401.
963
   *                              <table>
964
   *                              Available <i>flags</i> constants
965
   *                              <tr valign="top">
966
   *                              <td>Constant Name</td>
967
   *                              <td>Description</td>
968
   *                              </tr>
969
   *                              <tr valign="top">
970
   *                              <td><b>ENT_COMPAT</b></td>
971
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
972
   *                              </tr>
973
   *                              <tr valign="top">
974
   *                              <td><b>ENT_QUOTES</b></td>
975
   *                              <td>Will convert both double and single quotes.</td>
976
   *                              </tr>
977
   *                              <tr valign="top">
978
   *                              <td><b>ENT_NOQUOTES</b></td>
979
   *                              <td>Will leave both double and single quotes unconverted.</td>
980
   *                              </tr>
981
   *                              <tr valign="top">
982
   *                              <td><b>ENT_IGNORE</b></td>
983
   *                              <td>
984
   *                              Silently discard invalid code unit sequences instead of returning
985
   *                              an empty string. Using this flag is discouraged as it
986
   *                              may have security implications.
987
   *                              </td>
988
   *                              </tr>
989
   *                              <tr valign="top">
990
   *                              <td><b>ENT_SUBSTITUTE</b></td>
991
   *                              <td>
992
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
993
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
994
   *                              </td>
995
   *                              </tr>
996
   *                              <tr valign="top">
997
   *                              <td><b>ENT_DISALLOWED</b></td>
998
   *                              <td>
999
   *                              Replace invalid code points for the given document type with a
1000
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
1001
   *                              (otherwise) instead of leaving them as is. This may be useful, for
1002
   *                              instance, to ensure the well-formedness of XML documents with
1003
   *                              embedded external content.
1004
   *                              </td>
1005
   *                              </tr>
1006
   *                              <tr valign="top">
1007
   *                              <td><b>ENT_HTML401</b></td>
1008
   *                              <td>
1009
   *                              Handle code as HTML 4.01.
1010
   *                              </td>
1011
   *                              </tr>
1012
   *                              <tr valign="top">
1013
   *                              <td><b>ENT_XML1</b></td>
1014
   *                              <td>
1015
   *                              Handle code as XML 1.
1016
   *                              </td>
1017
   *                              </tr>
1018
   *                              <tr valign="top">
1019
   *                              <td><b>ENT_XHTML</b></td>
1020
   *                              <td>
1021
   *                              Handle code as XHTML.
1022
   *                              </td>
1023
   *                              </tr>
1024
   *                              <tr valign="top">
1025
   *                              <td><b>ENT_HTML5</b></td>
1026
   *                              <td>
1027
   *                              Handle code as HTML 5.
1028
   *                              </td>
1029
   *                              </tr>
1030
   *                              </table>
1031
   *                              </p>
1032
   * @param string $encoding      [optional] <p>
1033
   *                              Defines encoding used in conversion.
1034
   *                              </p>
1035
   *                              <p>
1036
   *                              For the purposes of this function, the encodings
1037
   *                              ISO-8859-1, ISO-8859-15,
1038
   *                              UTF-8, cp866,
1039
   *                              cp1251, cp1252, and
1040
   *                              KOI8-R are effectively equivalent, provided the
1041
   *                              <i>string</i> itself is valid for the encoding, as
1042
   *                              the characters affected by <b>htmlspecialchars</b> occupy
1043
   *                              the same positions in all of these encodings.
1044
   *                              </p>
1045
   * @param bool   $double_encode [optional] <p>
1046
   *                              When <i>double_encode</i> is turned off PHP will not
1047
   *                              encode existing html entities, the default is to convert everything.
1048
   *                              </p>
1049
   *
1050
   * @return string The converted string.
1051
   * </p>
1052
   * <p>
1053
   * If the input <i>string</i> contains an invalid code unit
1054
   * sequence within the given <i>encoding</i> an empty string
1055
   * will be returned, unless either the <b>ENT_IGNORE</b> or
1056
   * <b>ENT_SUBSTITUTE</b> flags are set.
1057
   */
1058 1
  public static function htmlspecialchars($string, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
1059
  {
1060 1
    return htmlspecialchars($string, $flags, $encoding, $double_encode);
1061
  }
1062
1063
  /**
1064
   * alias for "UTF8::is_utf8"
1065
   *
1066
   * @param string $str
1067
   *
1068
   * @return bool
1069
   */
1070 16
  public static function isUtf8($str)
1071
  {
1072 16
    return self::is_utf8($str);
1073
  }
1074
1075
  /**
1076
   * checks whether the passed string contains only byte sequances that
1077
   * appear valid UTF-8 characters.
1078
   *
1079
   * @see    http://hsivonen.iki.fi/php-utf8/
1080
   *
1081
   * @since  1.0
1082
   *
1083
   * @param    string $str The string to be checked
1084
   *
1085
   * @return   bool True if the check succeeds, False Otherwise
1086
   */
1087 31
  public static function is_utf8($str)
1088
  {
1089 31
    $str = (string)$str;
1090
1091 31
    if (!isset($str[0])) {
1092 3
      return true;
1093
    }
1094
1095 29
    if (self::pcre_utf8_support() !== true) {
1096
      // If even just the first character can be matched, when the /u
1097
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
1098
      // invalid, nothing at all will match, even if the string contains
1099
      // some valid sequences
1100
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
1101
    } else {
1102 29
      $mState = 0; // cached expected number of octets after the current octet
1103
      // until the beginning of the next UTF8 character sequence
1104 29
      $mUcs4 = 0; // cached Unicode character
1105 29
      $mBytes = 1; // cached expected number of octets in the current sequence
1106 29
      $len = strlen($str);
1107 29
      for ($i = 0; $i < $len; $i++) {
1108 29
        $in = ord($str[$i]);
1109 29
        if ($mState == 0) {
1110
          // When mState is zero we expect either a US-ASCII character or a
1111
          // multi-octet sequence.
1112 29
          if (0 == (0x80 & ($in))) {
1113
            // US-ASCII, pass straight through.
1114 27
            $mBytes = 1;
1115 29 View Code Duplication
          } elseif (0xC0 == (0xE0 & ($in))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1116
            // First octet of 2 octet sequence
1117 25
            $mUcs4 = ($in);
1118 25
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
1119 25
            $mState = 1;
1120 25
            $mBytes = 2;
1121 27
          } elseif (0xE0 == (0xF0 & ($in))) {
1122
            // First octet of 3 octet sequence
1123 11
            $mUcs4 = ($in);
1124 11
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
1125 11
            $mState = 2;
1126 11
            $mBytes = 3;
1127 21 View Code Duplication
          } elseif (0xF0 == (0xF8 & ($in))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1128
            // First octet of 4 octet sequence
1129 5
            $mUcs4 = ($in);
1130 5
            $mUcs4 = ($mUcs4 & 0x07) << 18;
1131 5
            $mState = 3;
1132 5
            $mBytes = 4;
1133 11
          } elseif (0xF8 == (0xFC & ($in))) {
1134
            /* First octet of 5 octet sequence.
1135
            *
1136
            * This is illegal because the encoded codepoint must be either
1137
            * (a) not the shortest form or
1138
            * (b) outside the Unicode range of 0-0x10FFFF.
1139
            * Rather than trying to resynchronize, we will carry on until the end
1140
            * of the sequence and let the later error handling code catch it.
1141
            */
1142 3
            $mUcs4 = ($in);
1143 3
            $mUcs4 = ($mUcs4 & 0x03) << 24;
1144 3
            $mState = 4;
1145 3
            $mBytes = 5;
1146 7 View Code Duplication
          } elseif (0xFC == (0xFE & ($in))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1147
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
1148 3
            $mUcs4 = ($in);
1149 3
            $mUcs4 = ($mUcs4 & 1) << 30;
1150 3
            $mState = 5;
1151 3
            $mBytes = 6;
1152 3
          } else {
1153
            /* Current octet is neither in the US-ASCII range nor a legal first
1154
             * octet of a multi-octet sequence.
1155
             */
1156 3
            return false;
1157
          }
1158 29
        } else {
1159
          // When mState is non-zero, we expect a continuation of the multi-octet
1160
          // sequence
1161 27
          if (0x80 == (0xC0 & ($in))) {
1162
            // Legal continuation.
1163 25
            $shift = ($mState - 1) * 6;
1164 25
            $tmp = $in;
1165 25
            $tmp = ($tmp & 0x0000003F) << $shift;
1166 25
            $mUcs4 |= $tmp;
1167
            /**
1168
             * End of the multi-octet sequence. mUcs4 now contains the final
1169
             * Unicode codepoint to be output
1170
             */
1171 25
            if (0 == --$mState) {
1172
              /*
1173
              * Check for illegal sequences and codepoints.
1174
              */
1175
              // From Unicode 3.1, non-shortest form is illegal
1176
              if (
1177 25
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
1178 25
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
1179 25
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
1180 25
                  (4 < $mBytes) ||
1181
                  // From Unicode 3.2, surrogate characters are illegal
1182 25
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
1183
                  // Codepoints outside the Unicode range are illegal
1184 25
                  ($mUcs4 > 0x10FFFF)
1185 25
              ) {
1186 5
                return false;
1187
              }
1188
              //initialize UTF8 cache
1189 25
              $mState = 0;
1190 25
              $mUcs4 = 0;
1191 25
              $mBytes = 1;
1192 25
            }
1193 25
          } else {
1194
            /**
1195
             *((0xC0 & (*in) != 0x80) && (mState != 0))
1196
             * Incomplete multi-octet sequence.
1197
             */
1198 13
            return false;
1199
          }
1200
        }
1201 29
      }
1202
1203 11
      return true;
1204
    }
1205
  }
1206
1207
  /**
1208
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
1209
   * mask.
1210
   *
1211
   * @param string $s
1212
   * @param string $mask
1213
   * @param int    $start
1214
   * @param int    $len
1215
   *
1216
   * @return int|null
1217
   */
1218 6
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
1219
  {
1220 6
    if ($start || 2147483647 != $len) {
1221
      $s = self::substr($s, $start, $len);
1222
    }
1223
1224 6
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
1225
  }
1226
1227
  /**
1228
   * Get part of string
1229
   *
1230
   * @link http://php.net/manual/en/function.mb-substr.php
1231
   *
1232
   * @param string  $str       <p>
1233
   *                           The string being checked.
1234
   *                           </p>
1235
   * @param int     $start     <p>
1236
   *                           The first position used in str.
1237
   *                           </p>
1238
   * @param int     $length    [optional] <p>
1239
   *                           The maximum length of the returned string.
1240
   *                           </p>
1241
   * @param string  $encoding
1242
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1243
   *
1244
   * @return string mb_substr returns the portion of
1245
   * str specified by the start and length parameters.
1246
   */
1247 37
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
1248
  {
1249 37
    static $bug62759;
1250
1251 37
    $str = (string)$str;
1252
1253 37
    if (!isset($str[0])) {
1254 9
      return '';
1255
    }
1256
1257
    // init
1258 35
    self::checkForSupport();
1259
1260 35
    if ($cleanUtf8 === true) {
1261
      // iconv and mbstring are not tolerant to invalid encoding
1262
      // further, their behaviour is inconsistent with that of PHP's substr
1263
1264 1
      $str = self::clean($str);
1265 1
    }
1266
1267 35
    if ($length === null) {
1268 21
      $length = (int)self::strlen($str);
1269 21
    } else {
1270 31
      $length = (int)$length;
1271
    }
1272
1273 35
    if (self::$support['mbstring'] === true) {
1274
1275
      // INFO: this is only a fallback for old versions
1276 35
      if ($encoding === true || $encoding === false) {
1277 1
        $encoding = 'UTF-8';
1278 1
      }
1279
1280 35
      return mb_substr($str, $start, $length, $encoding);
1281
    }
1282
1283
    if (self::$support['iconv'] === true) {
1284
1285
      if (!isset($bug62759)) {
1286
        $bug62759 = ('à' === grapheme_substr('éà', 1, -2));
1287
      }
1288
1289
      if ($bug62759) {
1290
        return (string)Intl::grapheme_substr_workaround62759($str, $start, $length);
1291
      } else {
1292
        return (string)grapheme_substr($str, $start, $length);
1293
      }
1294
    }
1295
1296
    // fallback
1297
1298
    // split to array, and remove invalid characters
1299
    $array = self::split($str);
1300
1301
    // extract relevant part, and join to make sting again
1302
    return implode(array_slice($array, $start, $length));
1303
  }
1304
1305
  /**
1306
   * Get string length
1307
   *
1308
   * @link     http://php.net/manual/en/function.mb-strlen.php
1309
   *
1310
   * @param string  $string    The string being checked for length.
1311
   * @param string  $encoding  Set the charset for e.g. "mb_" function
1312
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
1313
   *
1314
   * @return int the number of characters in
1315
   *           string str having character encoding
1316
   *           encoding. A multi-byte character is
1317
   *           counted as 1.
1318
   */
1319 63
  public static function strlen($string, $encoding = 'UTF-8', $cleanUtf8 = false)
1320
  {
1321 63
    $string = (string)$string;
1322
1323 63
    if (!isset($string[0])) {
1324 4
      return 0;
1325
    }
1326
1327
    // init
1328 62
    self::checkForSupport();
1329
1330
    // INFO: this is only a fallback for old versions
1331 62
    if ($encoding === true || $encoding === false) {
1332
      $encoding = 'UTF-8';
1333
    }
1334
1335 62
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
1336
      $str = self::clean($string);
1337
    } else {
1338 62
      $str = $string;
1339
    }
1340
1341 62
    return mb_strlen($str, $encoding);
1342
  }
1343
1344
  /**
1345
   * convert a string to an array of Unicode characters.
1346
   *
1347
   * @param    string  $str       The string to split into array.
1348
   * @param    int     $length    Max character length of each array element
1349
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string
1350
   *
1351
   * @return   array An array containing chunks of the string
1352
   */
1353 24
  public static function split($str, $length = 1, $cleanUtf8 = false)
1354
  {
1355 24
    $str = (string)$str;
1356
1357 24
    if (!isset($str[0])) {
1358 5
      return array();
1359
    }
1360
1361
    // init
1362 23
    self::checkForSupport();
1363 23
    $str = (string)$str;
1364 23
    $ret = array();
1365
1366 23
    if (self::$support['pcre_utf8'] === true) {
1367
1368 23
      if ($cleanUtf8 === true) {
1369
        $str = self::clean($str);
1370
      }
1371
1372 23
      preg_match_all('/./us', $str, $retArray);
1373 23
      if (isset($retArray[0])) {
1374 23
        $ret = $retArray[0];
1375 23
      }
1376 23
      unset($retArray);
1377
1378 23
    } else {
1379
1380
      // fallback
1381
1382
      $len = strlen($str);
1383
1384
      for ($i = 0; $i < $len; $i++) {
1385
        if (($str[$i] & "\x80") === "\x00") {
1386
          $ret[] = $str[$i];
1387
        } elseif ((($str[$i] & "\xE0") === "\xC0") && (isset($str[$i + 1]))) {
1388
          if (($str[$i + 1] & "\xC0") === "\x80") {
1389
            $ret[] = $str[$i] . $str[$i + 1];
1390
1391
            $i++;
1392
          }
1393 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && (isset($str[$i + 2]))) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1394
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
1395
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
1396
1397
            $i += 2;
1398
          }
1399
        } elseif ((($str[$i] & "\xF8") === "\xF0") && (isset($str[$i + 3]))) {
1400 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1401
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
1402
1403
            $i += 3;
1404
          }
1405
        }
1406
      }
1407
    }
1408
1409 23
    if ($length > 1) {
1410 5
      $ret = array_chunk($ret, $length);
1411
1412 5
      $ret = array_map('implode', $ret);
1413 5
    }
1414
1415 23
    if (isset($ret[0]) && $ret[0] === '') {
1416
      return array();
1417
    }
1418
1419 23
    return $ret;
1420
  }
1421
1422
  /**
1423
   * rxClass
1424
   *
1425
   * @param string $s
1426
   * @param string $class
1427
   *
1428
   * @return string
1429
   */
1430 40
  protected static function rxClass($s, $class = '')
1431
  {
1432 40
    static $rxClassCache = array();
1433
1434 40
    $cacheKey = $s . $class;
1435
1436 40
    if (isset($rxClassCache[$cacheKey])) {
1437 30
      return $rxClassCache[$cacheKey];
1438
    }
1439
1440 16
    $class = array($class);
1441
1442 16
    foreach (self::str_split($s) as $s) {
1443 15
      if ('-' === $s) {
1444
        $class[0] = '-' . $class[0];
1445 15
      } elseif (!isset($s[2])) {
1446 14
        $class[0] .= preg_quote($s, '/');
1447 15
      } elseif (1 === self::strlen($s)) {
1448 1
        $class[0] .= $s;
1449 1
      } else {
1450
        $class[] = $s;
1451
      }
1452 16
    }
1453
1454 16
    $class[0] = '[' . $class[0] . ']';
1455
1456 16
    if (1 === count($class)) {
1457 16
      $return = $class[0];
1458 16
    } else {
1459
      $return = '(?:' . implode('|', $class) . ')';
1460
    }
1461
1462 16
    $rxClassCache[$cacheKey] = $return;
1463
1464 16
    return $return;
1465
  }
1466
1467
  /**
1468
   * Convert a string to an array
1469
   *
1470
   * @param string $string
1471
   * @param int    $len
1472
   *
1473
   * @return array
1474
   */
1475 17
  public static function str_split($string, $len = 1)
1476
  {
1477
    // init
1478 17
    self::checkForSupport();
1479
1480 17
    if (1 > $len = (int)$len) {
1481
      $len = func_get_arg(1);
1482
1483
      return str_split($string, $len);
1484
    }
1485
1486 17
    if (self::$support['intl'] === true) {
1487 17
      $a = array();
1488 17
      $p = 0;
1489 17
      $l = strlen($string);
1490 17
      while ($p < $l) {
1491 16
        $a[] = grapheme_extract($string, 1, GRAPHEME_EXTR_COUNT, $p, $p);
1492 16
      }
1493 17
    } else {
1494
      preg_match_all('/' . GRAPHEME_CLUSTER_RX . '/u', $string, $a);
1495
      $a = $a[0];
1496
    }
1497
1498 17
    if (1 == $len) {
1499 17
      return $a;
1500
    }
1501
1502 1
    $arrayOutput = array();
1503 1
    $p = -1;
1504
1505
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
1506 1
    foreach ($a as $l => $a) {
1507 1
      if ($l % $len) {
1508 1
        $arrayOutput[$p] .= $a;
1509 1
      } else {
1510 1
        $arrayOutput[++$p] = $a;
1511
      }
1512 1
    }
1513
1514 1
    return $arrayOutput;
1515
  }
1516
1517
  /**
1518
   * return width of string
1519
   *
1520
   * @param string $s
1521
   *
1522
   * @return int
1523
   */
1524 1
  public static function strwidth($s)
1525
  {
1526
    // init
1527 1
    self::checkForSupport();
1528
1529 1
    return mb_strwidth($s, 'UTF-8');
1530
  }
1531
1532
  /**
1533
   * Find length of initial segment not matching mask
1534
   *
1535
   * @param string $str
1536
   * @param string $charlist
1537
   * @param int    $start
1538
   * @param int    $len
1539
   *
1540
   * @return int|null
1541
   */
1542 5
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
1543
  {
1544 5
    if ('' === $charlist .= '') {
1545
      return null;
1546
    }
1547
1548 5
    if ($start || 2147483647 != $len) {
1549
      $str = (string)self::substr($str, $start, $len);
1550
    } else {
1551 5
      $str = (string)$str;
1552
    }
1553
1554
    /* @var $len array */
1555 5
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
1556 5
      return self::strlen($len[1]);
1557
    } else {
1558
      return self::strlen($str);
1559
    }
1560
  }
1561
1562
  /**
1563
   * checks if the number of Unicode characters in a string are not
1564
   * more than the specified integer.
1565
   *
1566
   * @param    string $str      The original string to be checked.
1567
   * @param    int    $box_size The size in number of chars to be checked against string.
1568
   *
1569
   * @return   bool true if string is less than or equal to $box_size The
1570
   *           false otherwise
1571
   */
1572 1
  public static function fits_inside($str, $box_size)
1573
  {
1574 1
    return (self::strlen($str) <= $box_size);
1575
  }
1576
1577
  /**
1578
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
1579
   *
1580
   * @param string $string
1581
   * @param string $needle
1582
   * @param bool   $before_needle
1583
   *
1584
   * @return false|string
1585
   */
1586 7
  public static function stristr($string, $needle, $before_needle = false)
1587
  {
1588 7
    if ('' === $needle .= '') {
1589 2
      return false;
1590
    }
1591
1592
    // init
1593 5
    self::checkForSupport();
1594
1595 5
    return mb_stristr($string, $needle, $before_needle, 'UTF-8');
1596
  }
1597
1598
  /**
1599
   * Case insensitive string comparisons using a "natural order" algorithm
1600
   *
1601
   * @param string $str1
1602
   * @param string $str2
1603
   *
1604
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
1605
   *             str1 is greater than str2, and 0 if they are equal.
1606
   */
1607 1
  public static function strnatcasecmp($str1, $str2)
1608
  {
1609 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
1610
  }
1611
1612
  /**
1613
   * String comparisons using a "natural order" algorithm
1614
   *
1615
   * @param string $str1
1616
   * @param string $str2
1617
   *
1618
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
1619
   *             str1 is greater than str2, and 0 if they are equal.
1620
   */
1621 2
  public static function strnatcmp($str1, $str2)
1622
  {
1623 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
1624
  }
1625
1626
  /**
1627
   * generic case sensitive transformation for collation matching
1628
   *
1629
   * @param string $s
1630
   *
1631
   * @return string
1632
   */
1633 3
  protected static function strtonatfold($s)
1634
  {
1635 3
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
1636
  }
1637
1638
  /**
1639
   * Unicode transformation for caseless matching
1640
   *
1641
   * @link http://unicode.org/reports/tr21/tr21-5.html
1642
   *
1643
   * @param string $string
1644
   * @param bool   $full
1645
   *
1646
   * @return string
1647
   */
1648 10
  public static function strtocasefold($string, $full = true)
1649
  {
1650 10
    static $fullCaseFold = null;
1651 10
    static $commonCaseFoldKeys = null;
1652 10
    static $commonCaseFoldValues = null;
1653
1654 10
    if ($commonCaseFoldKeys === null) {
1655 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
1656 1
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
1657 1
    }
1658
1659 10
    $string = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $string);
1660
1661 10
    if ($full) {
1662
1663 10
      if ($fullCaseFold === null) {
1664 1
        $fullCaseFold = self::getData('caseFolding_full');
1665 1
      }
1666
1667
      /** @noinspection OffsetOperationsInspection */
1668 10
      $string = str_replace($fullCaseFold[0], $fullCaseFold[1], $string);
1669 10
    }
1670
1671 10
    return self::strtolower($string);
1672
  }
1673
1674
  /**
1675
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
1676
   * Make a string lowercase
1677
   *
1678
   * @link http://php.net/manual/en/function.mb-strtolower.php
1679
   *
1680
   * @param string $str <p>
1681
   *                    The string being lowercased.
1682
   *                    </p>
1683
   * @param string $encoding
1684
   *
1685
   * @return string str with all alphabetic characters converted to lowercase.
1686
   */
1687 19
  public static function strtolower($str, $encoding = 'UTF-8')
1688
  {
1689 19
    $str = (string)$str;
1690
1691 19
    if (!isset($str[0])) {
1692 5
      return '';
1693
    }
1694
1695
    // init
1696 17
    self::checkForSupport();
1697
1698 17
    return mb_strtolower($str, $encoding);
1699
  }
1700
1701
  /**
1702
   * urldecode & fixing urlencoded-win1252-chars
1703
   *
1704
   * @since 1.0.4
1705
   *
1706
   * @param string $str
1707
   *
1708
   * @return string
1709
   */
1710 1
  public static function urldecode($str)
1711
  {
1712 1
    $str = (string)$str;
1713
1714 1
    if (!isset($str[0])) {
1715 1
      return '';
1716
    }
1717
1718 1
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
1719
1720 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
1721
1722 1
    $str = self::fix_simple_utf8(
1723 1
        rawurldecode(
1724 1
            self::html_entity_decode(
1725 1
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
1726
                $flags
1727 1
            )
1728 1
        )
1729 1
    );
1730
1731 1
    return (string)$str;
1732
  }
1733
1734
  /**
1735
   * fixed a broken UTF-8 string
1736
   *
1737
   * @param string $str
1738
   *
1739
   * @return string
1740
   */
1741 8
  public static function fix_simple_utf8($str)
1742
  {
1743 8
    static $brokenUtf8ToUtf8Keys = null;
1744 8
    static $brokenUtf8ToUtf8Values = null;
1745
1746 8
    $str = (string)$str;
1747
1748 8
    if (!isset($str[0])) {
1749 2
      return '';
1750
    }
1751
1752 8
    if ($brokenUtf8ToUtf8Keys === null) {
1753 1
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
1754 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
1755 1
    }
1756
1757 8
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
1758
  }
1759
1760
  /**
1761
   *
1762
   * UTF-8 version of html_entity_decode()
1763
   *
1764
   * The reason we are not using html_entity_decode() by itself is because
1765
   * while it is not technically correct to leave out the semicolon
1766
   * at the end of an entity most browsers will still interpret the entity
1767
   * correctly. html_entity_decode() does not convert entities without
1768
   * semicolons, so we are left with our own little solution here. Bummer.
1769
   *
1770
   * Convert all HTML entities to their applicable characters
1771
   *
1772
   * @link http://php.net/manual/en/function.html-entity-decode.php
1773
   *
1774
   * @param string $string   <p>
1775
   *                         The input string.
1776
   *                         </p>
1777
   * @param int    $flags    [optional] <p>
1778
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1779
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1780
   *                         <table>
1781
   *                         Available <i>flags</i> constants
1782
   *                         <tr valign="top">
1783
   *                         <td>Constant Name</td>
1784
   *                         <td>Description</td>
1785
   *                         </tr>
1786
   *                         <tr valign="top">
1787
   *                         <td><b>ENT_COMPAT</b></td>
1788
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1789
   *                         </tr>
1790
   *                         <tr valign="top">
1791
   *                         <td><b>ENT_QUOTES</b></td>
1792
   *                         <td>Will convert both double and single quotes.</td>
1793
   *                         </tr>
1794
   *                         <tr valign="top">
1795
   *                         <td><b>ENT_NOQUOTES</b></td>
1796
   *                         <td>Will leave both double and single quotes unconverted.</td>
1797
   *                         </tr>
1798
   *                         <tr valign="top">
1799
   *                         <td><b>ENT_HTML401</b></td>
1800
   *                         <td>
1801
   *                         Handle code as HTML 4.01.
1802
   *                         </td>
1803
   *                         </tr>
1804
   *                         <tr valign="top">
1805
   *                         <td><b>ENT_XML1</b></td>
1806
   *                         <td>
1807
   *                         Handle code as XML 1.
1808
   *                         </td>
1809
   *                         </tr>
1810
   *                         <tr valign="top">
1811
   *                         <td><b>ENT_XHTML</b></td>
1812
   *                         <td>
1813
   *                         Handle code as XHTML.
1814
   *                         </td>
1815
   *                         </tr>
1816
   *                         <tr valign="top">
1817
   *                         <td><b>ENT_HTML5</b></td>
1818
   *                         <td>
1819
   *                         Handle code as HTML 5.
1820
   *                         </td>
1821
   *                         </tr>
1822
   *                         </table>
1823
   *                         </p>
1824
   * @param string $encoding [optional] <p>
1825
   *                         Encoding to use.
1826
   *                         </p>
1827
   *
1828
   * @return string the decoded string.
1829
   */
1830 14
  public static function html_entity_decode($string, $flags = null, $encoding = 'UTF-8')
1831
  {
1832 14
    $string = (string)$string;
1833
1834 14
    if (!isset($string[0])) {
1835 3
      return '';
1836
    }
1837
1838 14
    if (strpos($string, '&') === false) {
1839 4
      return $string;
1840
    }
1841
1842 14
    if ($flags === null) {
1843 2
      if (Bootup::is_php('5.4') === true) {
1844 2
        $flags = ENT_COMPAT | ENT_HTML5;
1845 2
      } else {
1846
        $flags = ENT_COMPAT;
1847
      }
1848 2
    }
1849
1850
    do {
1851 14
      $str_compare = $string;
1852
1853
      // decode numeric & UTF16 two byte entities
1854 14
      $string = html_entity_decode(
1855 14
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $string),
1856 14
          $flags,
1857
          $encoding
1858 14
      );
1859 14
    } while ($str_compare !== $string);
1860
1861 14
    return $string;
1862
  }
1863
1864
  /**
1865
   * Function UTF8::to_utf8
1866
   *
1867
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
1868
   *
1869
   * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
1870
   *
1871
   * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
1872
   *
1873
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
1874
   *    are followed by any of these:  ("group B")
1875
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
1876
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
1877
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
1878
   * is also a valid unicode character, and will be left unchanged.
1879
   *
1880
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
1881
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
1882
   *
1883
   * @name               to_utf8
1884
   *
1885
   * @param string       $text Any string or array.
1886
   *
1887
   * @return string The same string, UTF8 encoded
1888
   *
1889
   */
1890 20
  public static function to_utf8($text)
1891
  {
1892 20
    if (is_array($text)) {
1893 2
      foreach ($text as $k => $v) {
1894
        /** @noinspection AlterInForeachInspection */
1895 2
        $text[$k] = self::to_utf8($v);
1896 2
      }
1897
1898 2
      return $text;
1899
    }
1900
1901 20
    $text = (string)$text;
1902
1903 20
    if (!isset($text[0])) {
1904 9
      return $text;
1905
    }
1906
1907 20
    $max = self::strlen($text, '8bit');
1908
1909 20
    $buf = '';
1910 20
    for ($i = 0; $i < $max; $i++) {
1911 20
      $c1 = $text[$i];
1912
1913 20
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
1914 20
        $c2 = $i + 1 >= $max ? "\x00" : $text[$i + 1];
1915 20
        $c3 = $i + 2 >= $max ? "\x00" : $text[$i + 2];
1916 20
        $c4 = $i + 3 >= $max ? "\x00" : $text[$i + 3];
1917
1918 20
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
1919
1920 18
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
1921 17
            $buf .= $c1 . $c2;
1922 17
            $i++;
1923 17
          } else { // not valid UTF8 - convert it
1924 5
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1925 5
            $cc2 = ($c1 & "\x3f") | "\x80";
1926 5
            $buf .= $cc1 . $cc2;
1927
          }
1928
1929 20 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1930
1931 18
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
1932 14
            $buf .= $c1 . $c2 . $c3;
1933 14
            $i += 2;
1934 14
          } else { // not valid UTF8 - convert it
1935 8
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1936 8
            $cc2 = ($c1 & "\x3f") | "\x80";
1937 8
            $buf .= $cc1 . $cc2;
1938
          }
1939
1940 19
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
1941
1942 7 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1943 1
            $buf .= $c1 . $c2 . $c3 . $c4;
1944 1
            $i += 3;
1945 1
          } else { // not valid UTF8 - convert it
1946 6
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
1947 6
            $cc2 = ($c1 & "\x3f") | "\x80";
1948 6
            $buf .= $cc1 . $cc2;
1949
          }
1950
1951 7
        } else { // doesn't look like UTF8, but should be converted
1952 6
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
1953 6
          $cc2 = (($c1 & "\x3f") | "\x80");
1954 6
          $buf .= $cc1 . $cc2;
1955
        }
1956
1957 20
      } elseif (($c1 & "\xc0") == "\x80") { // needs conversion
1958
1959 2
        $ordC1 = ord($c1);
1960 2
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
1961
          $buf .= self::$win1252ToUtf8[$ordC1];
1962
        } else {
1963 2
          $cc1 = (chr($ordC1 / 64) | "\xc0");
1964 2
          $cc2 = (($c1 & "\x3f") | "\x80");
1965 2
          $buf .= $cc1 . $cc2;
1966
        }
1967
1968 2
      } else { // it doesn't need conversion
1969 18
        $buf .= $c1;
1970
      }
1971 20
    }
1972
1973 20
    self::checkForSupport();
1974
1975
    // decode unicode escape sequences
1976 20
    $buf = preg_replace_callback(
1977 20
        '/\\\\u([0-9a-f]{4})/i',
1978
        function ($match) {
1979 3
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
1980 20
        },
1981
        $buf
1982 20
    );
1983
1984
    // decode UTF-8 codepoints
1985 20
    $buf = preg_replace_callback(
1986 20
        '/&#\d{2,4};/',
1987 20
        function ($match) {
1988 2
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
1989 20
        },
1990
        $buf
1991 20
    );
1992
1993 20
    return $buf;
1994
  }
1995
1996
  /**
1997
   * alias for "UTF8::to_utf8"
1998
   *
1999
   * @param string $text
2000
   *
2001
   * @return string
2002
   */
2003
  public static function toUTF8($text)
2004
  {
2005
    return self::to_utf8($text);
2006
  }
2007
2008
  /**
2009
   * try to check if a string is a json-string
2010
   *
2011
   * @param $string
2012
   *
2013
   * @return bool
2014
   *
2015
   * @deprecated
2016
   */
2017
  public static function isJson($string)
2018
  {
2019
    $string = (string)$string;
2020
2021
    if (!isset($string[0])) {
2022
      return false;
2023
    }
2024
2025
    if (
2026
        is_object(json_decode($string))
2027
        &&
2028
        json_last_error() == JSON_ERROR_NONE
2029
    ) {
2030
      return true;
2031
    } else {
2032
      return false;
2033
    }
2034
  }
2035
2036
  /**
2037
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
2038
   *
2039
   * @link http://php.net/manual/en/function.grapheme-strstr.php
2040
   *
2041
   * @param string $haystack      <p>
2042
   *                              The input string. Must be valid UTF-8.
2043
   *                              </p>
2044
   * @param string $needle        <p>
2045
   *                              The string to look for. Must be valid UTF-8.
2046
   *                              </p>
2047
   * @param bool   $before_needle [optional] <p>
2048
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
2049
   *                              haystack before the first occurrence of the needle (excluding the needle).
2050
   *                              </p>
2051
   *
2052
   * @return string the portion of string, or FALSE if needle is not found.
2053
   */
2054 1
  public static function strstr($haystack, $needle, $before_needle = false)
2055
  {
2056 1
    self::checkForSupport();
2057
2058 1
    return grapheme_strstr($haystack, $needle, $before_needle);
2059
  }
2060
2061
  /**
2062
   * Reads entire file into a string | !!! WARNING: do not use UTF-8 Option fir binary-files (e.g.: images)
2063
   *
2064
   * @link http://php.net/manual/en/function.file-get-contents.php
2065
   *
2066
   * @param string   $filename      <p>
2067
   *                                Name of the file to read.
2068
   *                                </p>
2069
   * @param int      $flags         [optional] <p>
2070
   *                                Prior to PHP 6, this parameter is called
2071
   *                                use_include_path and is a bool.
2072
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2073
   *                                to trigger include path
2074
   *                                search.
2075
   *                                </p>
2076
   *                                <p>
2077
   *                                The value of flags can be any combination of
2078
   *                                the following flags (with some restrictions), joined with the
2079
   *                                binary OR (|)
2080
   *                                operator.
2081
   *                                </p>
2082
   *                                <p>
2083
   *                                <table>
2084
   *                                Available flags
2085
   *                                <tr valign="top">
2086
   *                                <td>Flag</td>
2087
   *                                <td>Description</td>
2088
   *                                </tr>
2089
   *                                <tr valign="top">
2090
   *                                <td>
2091
   *                                FILE_USE_INCLUDE_PATH
2092
   *                                </td>
2093
   *                                <td>
2094
   *                                Search for filename in the include directory.
2095
   *                                See include_path for more
2096
   *                                information.
2097
   *                                </td>
2098
   *                                </tr>
2099
   *                                <tr valign="top">
2100
   *                                <td>
2101
   *                                FILE_TEXT
2102
   *                                </td>
2103
   *                                <td>
2104
   *                                As of PHP 6, the default encoding of the read
2105
   *                                data is UTF-8. You can specify a different encoding by creating a
2106
   *                                custom context or by changing the default using
2107
   *                                stream_default_encoding. This flag cannot be
2108
   *                                used with FILE_BINARY.
2109
   *                                </td>
2110
   *                                </tr>
2111
   *                                <tr valign="top">
2112
   *                                <td>
2113
   *                                FILE_BINARY
2114
   *                                </td>
2115
   *                                <td>
2116
   *                                With this flag, the file is read in binary mode. This is the default
2117
   *                                setting and cannot be used with FILE_TEXT.
2118
   *                                </td>
2119
   *                                </tr>
2120
   *                                </table>
2121
   *                                </p>
2122
   * @param resource $context       [optional] <p>
2123
   *                                A valid context resource created with
2124
   *                                stream_context_create. If you don't need to use a
2125
   *                                custom context, you can skip this parameter by &null;.
2126
   *                                </p>
2127
   * @param int      $offset        [optional] <p>
2128
   *                                The offset where the reading starts.
2129
   *                                </p>
2130
   * @param int      $maxlen        [optional] <p>
2131
   *                                Maximum length of data read. The default is to read until end
2132
   *                                of file is reached.
2133
   *                                </p>
2134
   * @param int      $timeout
2135
   *
2136
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2137
   *                                default utf-8 chars
2138
   *
2139
   * @return string The function returns the read data or false on failure.
2140
   */
2141 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2142
  {
2143
    // init
2144 2
    $timeout = (int)$timeout;
2145 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2146
2147 2
    if ($timeout && $context === null) {
2148 2
      $context = stream_context_create(
2149
          array(
2150
              'http' =>
2151
                  array(
2152 2
                      'timeout' => $timeout,
2153 2
                  ),
2154
          )
2155 2
      );
2156 2
    }
2157
2158 2
    if (is_int($maxlen)) {
2159 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2160 1
    } else {
2161 2
      $data = file_get_contents($filename, $flags, $context, $offset);
2162
    }
2163
2164
    // return false on error
2165 2
    if ($data === false) {
2166
      return false;
2167
    }
2168
2169 2
    if ($convertToUtf8 === true) {
2170 2
      self::checkForSupport();
2171
2172 2
      $encoding = self::str_detect_encoding($data);
2173 2
      if ($encoding != 'UTF-8') {
2174 1
        $data = mb_convert_encoding($data, 'UTF-8', $encoding);
2175 1
      }
2176
2177 2
      $data = self::cleanup($data);
2178 2
    }
2179
2180
    // clean utf-8 string
2181 2
    return $data;
2182
  }
2183
2184
  /**
2185
   * optimized "mb_detect_encoding()"-function -> with UTF-16 and UTF-32 support
2186
   *
2187
   * @param string $str
2188
   *
2189
   * @return bool|string false if we can't detect the string-encoding
2190
   */
2191 2
  public static function str_detect_encoding($str)
2192
  {
2193
    // init
2194 2
    $encoding = '';
2195
2196
    // UTF-8
2197
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2198 2
    if (substr($str, 0, 3) == @pack('CCC', 0xef, 0xbb, 0xbf)) {
2199
      return 'UTF-8';
2200
    }
2201
2202
    // UTF-16 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2203
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2204 2
    if (substr($str, 0, 2) == @pack('CC', 0xfe, 0xff)) {
2205
      return 'UTF-16BE';
2206
    }
2207
2208
    // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2209
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2210 2
    if (substr($str, 0, 2) == @pack('CC', 0xff, 0xfe)) {
2211
      return 'UTF-16LE';
2212
    }
2213
2214
    // UTF-32 (BE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2215
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2216 2
    if (substr($str, 0, 4) == @pack('CC', 0x00, 0x00, 0xfe, 0xff)) {
2217
      return 'UTF-32BE';
2218
    }
2219
2220
    // UTF-32 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2221
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
2222 2
    if (substr($str, 0, 4) == @pack('CC', 0xff, 0xfe, 0x00, 0x00)) {
2223
      return 'UTF32LE';
2224
    }
2225
2226 2
    if (!$encoding) {
2227 2
      self::checkForSupport();
2228
2229
      // For UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always.
2230
      $detectOrder = array(
2231 2
          'UTF-8',
2232 2
          'windows-1251',
2233 2
          'ISO-8859-1',
2234 2
      );
2235 2
      $encoding = mb_detect_encoding($str, $detectOrder, true);
2236 2
    }
2237
2238 2
    if (self::is_binary($str)) {
2239 1
      if (self::is_utf16($str) == 1) {
2240 1
        return 'UTF-16LE';
2241 1
      } elseif (self::is_utf16($str) == 2) {
2242 1
        return 'UTF-16BE';
2243 1
      } elseif (self::is_utf32($str) == 1) {
2244
        return 'UTF-32LE';
2245 1
      } elseif (self::is_utf32($str) == 2) {
2246
        return 'UTF-32BE';
2247
      }
2248 1
    }
2249
2250 2
    if (!$encoding) {
2251
      $encoding = false;
2252
    }
2253
2254 2
    return $encoding;
2255
  }
2256
2257
  /**
2258
   * check if the input is binary (is look like a hack)
2259
   *
2260
   * @param string $input
2261
   *
2262
   * @return bool
2263
   */
2264 3
  public static function is_binary($input)
2265
  {
2266
2267 3
    $testLength = strlen($input);
2268
2269
    if (
2270 3
        preg_match('~^[01]+$~', $input)
2271
        ||
2272 3
        substr_count($input, "\x00") > 0
2273 3
        ||
2274 3
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
2275 3
    ) {
2276 2
      return true;
2277
    } else {
2278 3
      return false;
2279
    }
2280
  }
2281
2282
  /**
2283
   * is_utf16
2284
   *
2285
   * @param string $string
2286
   *
2287
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE
2288
   */
2289 1 View Code Duplication
  public static function is_utf16($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2290
  {
2291 1
    if (self::is_binary($string)) {
2292 1
      self::checkForSupport();
2293
2294 1
      $maybeUTF16LE = 0;
2295 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16LE');
2296 1
      if ($test !== false && strlen($test) > 1) {
2297 1
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2298 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2299 1
        if ($test3 == $test) {
2300 1
          $stringChars = self::count_chars($string);
2301 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2302 1
            if (in_array($test3char, $stringChars, true) === true) {
2303 1
              $maybeUTF16LE++;
2304 1
            }
2305 1
          }
2306 1
        }
2307 1
      }
2308
2309 1
      $maybeUTF16BE = 0;
2310 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-16BE');
2311 1
      if ($test !== false && strlen($test) > 1) {
2312 1
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2313 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2314 1
        if ($test3 == $test) {
2315 1
          $stringChars = self::count_chars($string);
2316 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2317 1
            if (in_array($test3char, $stringChars, true) === true) {
2318 1
              $maybeUTF16BE++;
2319 1
            }
2320 1
          }
2321 1
        }
2322 1
      }
2323
2324 1
      if ($maybeUTF16BE != $maybeUTF16LE) {
2325 1
        if ($maybeUTF16LE > $maybeUTF16BE) {
2326 1
          return 1;
2327
        } else {
2328 1
          return 2;
2329
        }
2330
      }
2331
2332 1
    }
2333
2334 1
    return false;
2335
  }
2336
2337
  /**
2338
   * returns count of characters used in a string
2339
   *
2340
   * @param    string $str The input string
2341
   *
2342
   * @return   array An associative array of Character as keys and
2343
   *           their count as values
2344
   */
2345 2
  public static function count_chars($str) //there is no $mode parameters
2346
  {
2347 2
    $array = array_count_values(self::split($str));
2348
2349 2
    ksort($array);
2350
2351 2
    return $array;
2352
  }
2353
2354
  /**
2355
   * is_utf32
2356
   *
2357
   * @param string $string
2358
   *
2359
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE
2360
   */
2361 1 View Code Duplication
  public static function is_utf32($string)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2362
  {
2363 1
    if (self::is_binary($string)) {
2364 1
      self::checkForSupport();
2365
2366 1
      $maybeUTF32LE = 0;
2367 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32LE');
2368 1
      if ($test !== false && strlen($test) > 1) {
2369 1
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2370 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2371 1
        if ($test3 == $test) {
2372 1
          $stringChars = self::count_chars($string);
2373 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2374 1
            if (in_array($test3char, $stringChars, true) === true) {
2375
              $maybeUTF32LE++;
2376
            }
2377 1
          }
2378 1
        }
2379 1
      }
2380
2381 1
      $maybeUTF32BE = 0;
2382 1
      $test = mb_convert_encoding($string, 'UTF-8', 'UTF-32BE');
2383 1
      if ($test !== false && strlen($test) > 1) {
2384
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2385
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2386
        if ($test3 == $test) {
2387
          $stringChars = self::count_chars($string);
2388
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
2389
            if (in_array($test3char, $stringChars, true) === true) {
2390
              $maybeUTF32BE++;
2391
            }
2392
          }
2393
        }
2394
      }
2395
2396 1
      if ($maybeUTF32BE != $maybeUTF32LE) {
2397
        if ($maybeUTF32LE > $maybeUTF32BE) {
2398
          return 1;
2399
        } else {
2400
          return 2;
2401
        }
2402
      }
2403
2404 1
    }
2405
2406 1
    return false;
2407
  }
2408
2409
  /**
2410
   * clean-up a UTF-8 string and show only printable chars at the end
2411
   *
2412
   * @param string|false $text
2413
   *
2414
   * @return string
2415
   */
2416 5
  public static function cleanup($text)
2417
  {
2418 5
    $text = (string)$text;
2419
2420 5
    if (!isset($text[0])) {
2421
      return '';
2422
    }
2423
2424
    // init
2425 5
    self::checkForSupport();
2426
2427
    // fixed ISO <-> UTF-8 Errors
2428 5
    $text = self::fix_simple_utf8($text);
2429
2430
    // remove all none UTF-8 symbols
2431
    // && remove diamond question mark (�)
2432
    // && remove remove invisible characters (e.g. "\0")
2433
    // && remove BOM
2434
    // && normalize whitespace chars
2435 5
    $text = self::clean($text, true, true, false, true);
2436
2437 5
    return (string)$text;
2438
  }
2439
2440
  /**
2441
   * is_binary_file
2442
   *
2443
   * @param string $file
2444
   *
2445
   * @return boolean
2446
   */
2447
  public static function is_binary_file($file)
2448
  {
2449
    try {
2450
      $fp = fopen($file, 'r');
2451
      $block = fread($fp, 512);
2452
      fclose($fp);
2453
    } catch (\Exception $e) {
2454
      $block = '';
2455
    }
2456
2457
    return self::is_binary($block);
2458
  }
2459
2460
  /**
2461
   * Finds the last occurrence of a character in a string within another
2462
   *
2463
   * @link http://php.net/manual/en/function.mb-strrchr.php
2464
   *
2465
   * @param string $haystack <p>
2466
   *                         The string from which to get the last occurrence
2467
   *                         of needle
2468
   *                         </p>
2469
   * @param string $needle   <p>
2470
   *                         The string to find in haystack
2471
   *                         </p>
2472
   * @param bool   $part     [optional] <p>
2473
   *                         Determines which portion of haystack
2474
   *                         this function returns.
2475
   *                         If set to true, it returns all of haystack
2476
   *                         from the beginning to the last occurrence of needle.
2477
   *                         If set to false, it returns all of haystack
2478
   *                         from the last occurrence of needle to the end,
2479
   *                         </p>
2480
   * @param string $encoding [optional] <p>
2481
   *                         Character encoding name to use.
2482
   *                         If it is omitted, internal character encoding is used.
2483
   *                         </p>
2484
   *
2485
   * @return string the portion of haystack.
2486
   * or false if needle is not found.
2487
   */
2488 1
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2489
  {
2490 1
    self::checkForSupport();
2491
2492 1
    return mb_strrchr($haystack, $needle, $part, $encoding);
2493
  }
2494
2495
  /**
2496
   * Finds the last occurrence of a character in a string within another, case insensitive
2497
   *
2498
   * @link http://php.net/manual/en/function.mb-strrichr.php
2499
   *
2500
   * @param string $haystack <p>
2501
   *                         The string from which to get the last occurrence
2502
   *                         of needle
2503
   *                         </p>
2504
   * @param string $needle   <p>
2505
   *                         The string to find in haystack
2506
   *                         </p>
2507
   * @param bool   $part     [optional] <p>
2508
   *                         Determines which portion of haystack
2509
   *                         this function returns.
2510
   *                         If set to true, it returns all of haystack
2511
   *                         from the beginning to the last occurrence of needle.
2512
   *                         If set to false, it returns all of haystack
2513
   *                         from the last occurrence of needle to the end,
2514
   *                         </p>
2515
   * @param string $encoding [optional] <p>
2516
   *                         Character encoding name to use.
2517
   *                         If it is omitted, internal character encoding is used.
2518
   *                         </p>
2519
   *
2520
   * @return string the portion of haystack.
2521
   * or false if needle is not found.
2522
   */
2523 1
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
2524
  {
2525 1
    self::checkForSupport();
2526
2527 1
    return mb_strrichr($haystack, $needle, $part, $encoding);
2528
  }
2529
2530
  /**
2531
   * filter var
2532
   *
2533
   * @param      $var
2534
   * @param int  $filter
2535
   * @param null $option
2536
   *
2537
   * @return mixed|string
2538
   */
2539 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2540
  {
2541 1
    if (3 > func_num_args()) {
2542 1
      $var = filter_var($var, $filter);
2543 1
    } else {
2544 1
      $var = filter_var($var, $filter, $option);
2545
    }
2546
2547 1
    return self::filter($var);
2548
  }
2549
2550
  /**
2551
   * normalizes to UTF-8 NFC, converting from CP-1252 when needed
2552
   *
2553
   * @param        $var
2554
   * @param int    $normalization_form
2555
   * @param string $leading_combining
2556
   *
2557
   * @return mixed|string
2558
   */
2559 6
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2560
  {
2561 6
    switch (gettype($var)) {
2562 6 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2563 1
        foreach ($var as $k => $v) {
2564
          /** @noinspection AlterInForeachInspection */
2565
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2566 1
        }
2567 1
        break;
2568 6 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2569 1
        foreach ($var as $k => $v) {
2570 1
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2571 1
        }
2572 1
        break;
2573 6
      case 'string':
2574 6 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2575
          // Workaround https://bugs.php.net/65732
2576
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2577
        }
2578 6 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2579 6
          if (Normalizer::isNormalized($var, $normalization_form)) {
2580 1
            $n = '-';
2581 1
          } else {
2582 6
            $n = Normalizer::normalize($var, $normalization_form);
2583
2584 6
            if (isset($n[0])) {
2585 4
              $var = $n;
2586 4
            } else {
2587 4
              $var = self::encode('UTF-8', $var);
2588
            }
2589
2590
          }
2591 6
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2592
            // Prevent leading combining chars
2593
            // for NFC-safe concatenations.
2594
            $var = $leading_combining . $var;
2595
          }
2596 6
        }
2597 6
        break;
2598 6
    }
2599
2600 6
    return $var;
2601
  }
2602
2603
  /**
2604
   * encode to UTF8 or LATIN1
2605
   *
2606
   * INFO:  the different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2607
   *        so you can call this function also on a UTF-8 String and you don't mess the string
2608
   *
2609
   * @param string $encodingLabel ISO-8859-1 || UTF-8
2610
   * @param string $text
2611
   *
2612
   * @return string will return false on error
2613
   */
2614 11
  public static function encode($encodingLabel, $text)
2615
  {
2616 11
    $encodingLabel = self::normalizeEncoding($encodingLabel);
2617
2618 11
    if ($encodingLabel === 'UTF-8') {
2619 11
      return self::to_utf8($text);
2620
    }
2621
2622 1
    if ($encodingLabel === 'ISO-8859-1') {
2623 1
      return self::to_latin1($text);
2624
    }
2625
2626
    return false;
2627
  }
2628
2629
  /**
2630
   * normalize encoding-name
2631
   *
2632
   * @param string $encodingLabel e.g.: ISO, UTF8, ISO88591, WIN1252 ...
2633
   *
2634
   * @return string
2635
   */
2636 11
  protected static function normalizeEncoding($encodingLabel)
2637
  {
2638 11
    $encoding = strtoupper($encodingLabel);
2639 11
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
2640
    $equivalences = array(
2641 11
        'ISO88591'    => 'ISO-8859-1',
2642 11
        'ISO8859'     => 'ISO-8859-1',
2643 11
        'ISO'         => 'ISO-8859-1',
2644 11
        'LATIN1'      => 'ISO-8859-1',
2645 11
        'LATIN'       => 'ISO-8859-1',
2646 11
        'UTF8'        => 'UTF-8',
2647 11
        'UTF'         => 'UTF-8',
2648 11
        'WIN1252'     => 'ISO-8859-1',
2649 11
        'WINDOWS1252' => 'ISO-8859-1',
2650 11
    );
2651 11
    if (empty($equivalences[$encoding])) {
2652
      return 'UTF-8';
2653
    }
2654
2655 11
    return $equivalences[$encoding];
2656
  }
2657
2658
  /**
2659
   * convert to latin1
2660
   *
2661
   * @param $text
2662
   *
2663
   * @return string
2664
   */
2665 2
  public static function to_latin1($text)
2666
  {
2667 2
    return self::to_win1252($text);
2668
  }
2669
2670
  /**
2671
   * convert to win1252
2672
   *
2673
   * @param  string|array $text
2674
   *
2675
   * @return string
2676
   */
2677 2
  protected static function to_win1252($text)
2678
  {
2679 2
    if (is_array($text)) {
2680
2681 1
      foreach ($text as $k => $v) {
2682
        /** @noinspection AlterInForeachInspection */
2683 1
        $text[$k] = self::to_win1252($v);
2684 1
      }
2685
2686 1
      return $text;
2687 2
    } elseif (is_string($text)) {
2688 2
      return self::utf8_decode($text);
2689
    } else {
2690
      return $text;
2691
    }
2692
  }
2693
2694
  /**
2695
   * utf8 - decode
2696
   *
2697
   * @param string $string
2698
   *
2699
   * @return string
2700
   */
2701 6
  public static function utf8_decode($string)
2702
  {
2703 6
    static $utf8ToWin1252Keys = null;
2704 6
    static $utf8ToWin1252Values = null;
2705
2706 6
    $string = (string)$string;
2707
2708 6
    if (!isset($string[0])) {
2709 5
      return '';
2710
    }
2711
2712
    // init
2713 6
    self::checkForSupport();
2714
2715 6
    $string = self::to_utf8($string);
2716
2717 6
    if ($utf8ToWin1252Keys === null) {
2718 1
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
2719 1
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
2720 1
    }
2721
2722 6
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $string));
2723
  }
2724
2725
  /**
2726
   * filter input
2727
   *
2728
   * @param      $type
2729
   * @param      $var
2730
   * @param int  $filter
2731
   * @param null $option
2732
   *
2733
   * @return mixed|string
2734
   */
2735 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2736
  {
2737
    if (4 > func_num_args()) {
2738
      $var = filter_input($type, $var, $filter);
2739
    } else {
2740
      $var = filter_input($type, $var, $filter, $option);
2741
    }
2742
2743
    return self::filter($var);
2744
  }
2745
2746
  /**
2747
   * utf8_encode
2748
   *
2749
   * @param string $string
2750
   *
2751
   * @return string
2752
   */
2753 6
  public static function utf8_encode($string)
2754
  {
2755 6
    $string = utf8_encode($string);
2756
2757 6
    if (false === strpos($string, "\xC2")) {
2758 6
      return $string;
2759
    } else {
2760
2761 5
      static $cp1252ToUtf8Keys = null;
2762 5
      static $cp1252ToUtf8Values = null;
2763
2764 5
      if ($cp1252ToUtf8Keys === null) {
2765 1
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
2766 1
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
2767 1
      }
2768
2769 5
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $string);
2770
    }
2771
  }
2772
2773
  /**
2774
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2775
   * Returns the JSON representation of a value
2776
   *
2777
   * @link http://php.net/manual/en/function.json-encode.php
2778
   *
2779
   * @param mixed $value   <p>
2780
   *                       The <i>value</i> being encoded. Can be any type except
2781
   *                       a resource.
2782
   *                       </p>
2783
   *                       <p>
2784
   *                       All string data must be UTF-8 encoded.
2785
   *                       </p>
2786
   *                       <p>PHP implements a superset of
2787
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2788
   *                       only supports these values when they are nested inside an array or an object.
2789
   *                       </p>
2790
   * @param int   $options [optional] <p>
2791
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2792
   *                       <b>JSON_HEX_TAG</b>,
2793
   *                       <b>JSON_HEX_AMP</b>,
2794
   *                       <b>JSON_HEX_APOS</b>,
2795
   *                       <b>JSON_NUMERIC_CHECK</b>,
2796
   *                       <b>JSON_PRETTY_PRINT</b>,
2797
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2798
   *                       <b>JSON_FORCE_OBJECT</b>,
2799
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2800
   *                       constants is described on
2801
   *                       the JSON constants page.
2802
   *                       </p>
2803
   * @param int   $depth   [optional] <p>
2804
   *                       Set the maximum depth. Must be greater than zero.
2805
   *                       </p>
2806
   *
2807
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2808
   */
2809 1
  public static function json_encode($value, $options = 0, $depth = 512)
2810
  {
2811 1
    $value = self::filter($value);
2812
2813 1
    if (Bootup::is_php('5.5')) {
2814 1
      $json = json_encode($value, $options, $depth);
2815 1
    } else {
2816
      $json = json_encode($value, $options);
2817
    }
2818
2819 1
    return $json;
2820
  }
2821
2822
  /**
2823
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2824
   * Decodes a JSON string
2825
   *
2826
   * @link http://php.net/manual/en/function.json-decode.php
2827
   *
2828
   * @param string $json    <p>
2829
   *                        The <i>json</i> string being decoded.
2830
   *                        </p>
2831
   *                        <p>
2832
   *                        This function only works with UTF-8 encoded strings.
2833
   *                        </p>
2834
   *                        <p>PHP implements a superset of
2835
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2836
   *                        only supports these values when they are nested inside an array or an object.
2837
   *                        </p>
2838
   * @param bool   $assoc   [optional] <p>
2839
   *                        When <b>TRUE</b>, returned objects will be converted into
2840
   *                        associative arrays.
2841
   *                        </p>
2842
   * @param int    $depth   [optional] <p>
2843
   *                        User specified recursion depth.
2844
   *                        </p>
2845
   * @param int    $options [optional] <p>
2846
   *                        Bitmask of JSON decode options. Currently only
2847
   *                        <b>JSON_BIGINT_AS_STRING</b>
2848
   *                        is supported (default is to cast large integers as floats)
2849
   *                        </p>
2850
   *
2851
   * @return mixed the value encoded in <i>json</i> in appropriate
2852
   * PHP type. Values true, false and
2853
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2854
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2855
   * <i>json</i> cannot be decoded or if the encoded
2856
   * data is deeper than the recursion limit.
2857
   */
2858 2
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
2859
  {
2860 2
    $json = self::filter($json);
2861
2862 2
    if (Bootup::is_php('5.4') === true) {
2863 2
      $json = json_decode($json, $assoc, $depth, $options);
2864 2
    } else {
2865
      $json = json_decode($json, $assoc, $depth);
2866
    }
2867
2868 2
    return $json;
2869
  }
2870
2871
  /**
2872
   * filter input array
2873
   *
2874
   * @param      $type
2875
   * @param null $def
2876
   * @param bool $add_empty
2877
   *
2878
   * @return mixed|string
2879
   */
2880
  public static function filter_input_array($type, $def = null, $add_empty = true)
2881
  {
2882
    if (2 > func_num_args()) {
2883
      $a = filter_input_array($type);
2884
    } else {
2885
      $a = filter_input_array($type, $def, $add_empty);
2886
    }
2887
2888
    return self::filter($a);
2889
  }
2890
2891
  /**
2892
   * Search a string for any of a set of characters
2893
   *
2894
   * @param string $s
2895
   * @param string $charlist
2896
   *
2897
   * @return string|false
2898
   */
2899
  public static function strpbrk($s, $charlist)
2900
  {
2901
    if (preg_match('/' . self::rxClass($charlist) . '/us', $s, $m)) {
2902
      return substr($s, strpos($s, $m[0]));
2903
    } else {
2904
      return false;
2905
    }
2906
  }
2907
2908
  /**
2909
   * case-insensitive string comparison of the first n characters
2910
   *
2911
   * @param string $str1
2912
   * @param string $str2
2913
   * @param int    $len
2914
   *
2915
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
2916
   */
2917
  public static function strncasecmp($str1, $str2, $len)
2918
  {
2919
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
2920
  }
2921
2922
  /**
2923
   * comparison of the first n characters
2924
   *
2925
   * @param string $str1
2926
   * @param string $str2
2927
   * @param int    $len
2928
   *
2929
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
2930
   */
2931
  public static function strncmp($str1, $str2, $len)
2932
  {
2933
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
2934
  }
2935
2936
  /**
2937
   * string comparison
2938
   *
2939
   * @param string $a
2940
   * @param string $b
2941
   *
2942
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
2943
   */
2944 8
  public static function strcmp($a, $b)
2945
  {
2946 8
    return $a . '' === $b . '' ? 0 : strcmp(
2947 5
        Normalizer::normalize($a, Normalizer::NFD),
2948 5
        Normalizer::normalize($b, Normalizer::NFD)
2949 8
    );
2950
  }
2951
2952
  /**
2953
   * calculates and returns the maximum number of bytes taken by any
2954
   * UTF-8 encoded character in the given string
2955
   *
2956
   * @param    string $str The original Unicode string
2957
   *
2958
   * @return   int An array of byte lengths of each character.
2959
   */
2960 1
  public static function max_chr_width($str)
2961
  {
2962 1
    $bytes = self::chr_size_list($str);
2963 1
    if (count($bytes) > 0) {
2964 1
      return (int)max($bytes);
2965
    } else {
2966 1
      return 0;
2967
    }
2968
  }
2969
2970
  /**
2971
   * generates an array of byte length of each character of a Unicode string.
2972
   *
2973
   * 1 byte => U+0000  - U+007F
2974
   * 2 byte => U+0080  - U+07FF
2975
   * 3 byte => U+0800  - U+FFFF
2976
   * 4 byte => U+10000 - U+10FFFF
2977
   *
2978
   * @param    string $str The original Unicode string
2979
   *
2980
   * @return   array An array of byte lengths of each character.
2981
   */
2982 2
  public static function chr_size_list($str)
2983
  {
2984 2
    if (!$str) {
2985 2
      return array();
2986
    }
2987
2988 2
    return array_map('strlen', self::split($str));
2989
  }
2990
2991
  /**
2992
   * converts a UTF-8 character to HTML Numbered Entity like &#123;
2993
   *
2994
   * @param    string $chr The Unicode character to be encoded as numbered entity
2995
   *
2996
   * @return   string HTML numbered entity
2997
   */
2998 2
  public static function single_chr_html_encode($chr)
2999
  {
3000 2
    if (!$chr) {
3001 1
      return '';
3002
    }
3003
3004 2
    return '&#' . self::ord($chr) . ';';
3005
  }
3006
3007
  /**
3008
   * calculates Unicode Code Point of the given UTF-8 encoded character
3009
   *
3010
   * @param    string $s The character of which to calculate Code Point
3011
   *
3012
   * @return   int Unicode Code Point of the given character
3013
   *           0 on invalid UTF-8 byte sequence
3014
   */
3015 15
  public static function ord($s)
3016
  {
3017 15
    if (!$s) {
3018 2
      return 0;
3019
    }
3020
3021 14
    $s = unpack('C*', substr($s, 0, 4));
3022 14
    $a = $s ? $s[1] : 0;
3023
3024 14
    if (0xF0 <= $a && isset($s[4])) {
3025 2
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3026
    }
3027
3028 13
    if (0xE0 <= $a && isset($s[3])) {
3029 7
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3030
    }
3031
3032 12
    if (0xC0 <= $a && isset($s[2])) {
3033 8
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3034
    }
3035
3036 10
    return $a;
3037
  }
3038
3039
  /**
3040
   * converts a UTF-8 string to a series of
3041
   *
3042
   * INFO: HTML Numbered Entities like &#123;&#39;&#1740;...
3043
   *
3044
   * @param    string $str The Unicode string to be encoded as numbered entities
3045
   *
3046
   * @return   string HTML numbered entities
3047
   */
3048 1
  public static function html_encode($str)
3049
  {
3050 1
    return implode(
3051 1
        array_map(
3052
            array(
3053 1
                '\\voku\\helper\\UTF8',
3054 1
                'single_chr_html_encode',
3055 1
            ),
3056 1
            self::split($str)
3057 1
        )
3058 1
    );
3059
  }
3060
3061
  /**
3062
   * checks if a file starts with BOM character
3063
   *
3064
   * @param    string $file_path Path to a valid file
3065
   *
3066
   * @return   bool True if the file has BOM at the start, False otherwise
3067
   */
3068 1
  public static function file_has_bom($file_path)
3069
  {
3070 1
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
3071
  }
3072
3073
  /**
3074
   * checks if the given string is exactly "UTF8 - Byte Order Mark"
3075
   *
3076
   * WARNING: use "UTF8::string_has_bom()" if you will check BOM in a string
3077
   *
3078
   * @param    string $utf8_chr The input string
3079
   *
3080
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise
3081
   */
3082 2
  public static function is_bom($utf8_chr)
3083
  {
3084 2
    return ($utf8_chr === self::bom());
3085
  }
3086
3087
  /**
3088
   * returns the Byte Order Mark Character
3089
   *
3090
   * @return   string Byte Order Mark
3091
   */
3092 2
  public static function bom()
3093
  {
3094 2
    return "\xEF\xBB\xBF";
3095
  }
3096
3097
  /**
3098
   * alias for "UTF8::is_bom"
3099
   *
3100
   * @param string $utf8_chr
3101
   *
3102
   * @return boolean
3103
   */
3104
  public static function isBom($utf8_chr)
3105
  {
3106
    return self::is_bom($utf8_chr);
3107
  }
3108
3109
  /**
3110
   * checks if string starts with "UTF-8 BOM" character
3111
   *
3112
   * @param    string $str The input string
3113
   *
3114
   * @return   bool True if the string has BOM at the start, False otherwise
3115
   */
3116 1
  public static function string_has_bom($str)
3117
  {
3118 1
    return self::is_bom(substr($str, 0, 3));
3119
  }
3120
3121
  /**
3122
   * prepends BOM character to the string and returns the whole string.
3123
   *
3124
   * INFO: If BOM already existed there, the Input string is returned.
3125
   *
3126
   * @param    string $str The input string
3127
   *
3128
   * @return   string The output string that contains BOM
3129
   */
3130
  public static function add_bom_to_string($str)
3131
  {
3132
    if (!self::is_bom(substr($str, 0, 3))) {
3133
      $str = self::bom() . $str;
3134
    }
3135
3136
    return $str;
3137
  }
3138
3139
  /**
3140
   * shuffles all the characters in the string.
3141
   *
3142
   * @param    string $str The input string
3143
   *
3144
   * @return   string The shuffled string
3145
   */
3146
  public static function str_shuffle($str)
3147
  {
3148
    $array = self::split($str);
3149
3150
    shuffle($array);
3151
3152
    return implode('', $array);
3153
  }
3154
3155
  /**
3156
   * Wraps a string to a given number of characters
3157
   *
3158
   * @param string $string
3159
   * @param int    $width
3160
   * @param string $break
3161
   * @param bool   $cut
3162
   *
3163
   * @return false|string Returns the given string wrapped at the specified length.
3164
   */
3165 4
  public static function wordwrap($string, $width = 75, $break = "\n", $cut = false)
3166
  {
3167 4
    if (false === wordwrap('-', $width, $break, $cut)) {
3168
      return false;
3169
    }
3170
3171 4
    if (is_string($break)) {
3172 4
      $break = (string)$break;
3173 4
    }
3174
3175 4
    $w = '';
3176 4
    $string = explode($break, $string);
3177 4
    $iLen = count($string);
3178 4
    $chars = array();
3179
3180 4
    if (1 === $iLen && '' === $string[0]) {
3181
      return '';
3182
    }
3183
3184 4
    for ($i = 0; $i < $iLen; ++$i) {
3185
3186 4
      if ($i) {
3187
        $chars[] = $break;
3188
        $w .= '#';
3189
      }
3190
3191 4
      $c = $string[$i];
3192 4
      unset($string[$i]);
3193
3194 4
      foreach (self::split($c) as $c) {
3195 4
        $chars[] = $c;
3196 4
        $w .= ' ' === $c ? ' ' : '?';
3197 4
      }
3198 4
    }
3199
3200 4
    $string = '';
3201 4
    $j = 0;
3202 4
    $b = $i = -1;
3203 4
    $w = wordwrap($w, $width, '#', $cut);
3204
3205 4
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
3206 3
      for (++$i; $i < $b; ++$i) {
3207 3
        $string .= $chars[$j];
3208 3
        unset($chars[$j++]);
3209 3
      }
3210
3211 3
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
3212
        unset($chars[$j++]);
3213
      }
3214
3215 3
      $string .= $break;
3216 3
    }
3217
3218 4
    return $string . implode('', $chars);
3219
  }
3220
3221
  /**
3222
   * Find position of first occurrence of string in a string
3223
   *
3224
   * @link http://php.net/manual/en/function.mb-strpos.php
3225
   *
3226
   * @param string  $haystack     <p>
3227
   *                              The string being checked.
3228
   *                              </p>
3229
   * @param string  $needle       <p>
3230
   *                              The position counted from the beginning of haystack.
3231
   *                              </p>
3232
   * @param int     $offset       [optional] <p>
3233
   *                              The search offset. If it is not specified, 0 is used.
3234
   *                              </p>
3235
   * @param string  $encoding
3236
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
3237
   *
3238
   * @return int the numeric position of
3239
   * the first occurrence of needle in the
3240
   * haystack string. If
3241
   * needle is not found, it returns false.
3242
   */
3243 11
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
3244
  {
3245 11
    $haystack = (string)$haystack;
3246 11
    $needle = (string)$needle;
3247
3248 11
    if (!isset($haystack[0]) || !isset($needle[0])) {
3249 2
      return false;
3250
    }
3251
3252
    // init
3253 10
    self::checkForSupport();
3254 10
    $offset = (int)$offset;
3255
3256
    // iconv and mbstring do not support integer $needle
3257
3258 10
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
3259
      $needle = self::chr($needle);
3260
    }
3261
3262 10
    if ($cleanUtf8 === true) {
3263
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
3264
      // iconv_strpos is not tolerant to invalid characters
3265
3266 1
      $needle = self::clean((string)$needle);
3267 1
      $haystack = self::clean($haystack);
3268 1
    }
3269
3270 10
    if (self::$support['mbstring'] === true) {
3271
3272
      // INFO: this is only a fallback for old versions
3273 10
      if ($encoding === true || $encoding === false) {
3274 1
        $encoding = 'UTF-8';
3275 1
      }
3276
3277 10
      return mb_strpos($haystack, $needle, $offset, $encoding);
3278
    }
3279
3280
    if (self::$support['iconv'] === true) {
3281
      return grapheme_strpos($haystack, $needle, $offset);
3282
    }
3283
3284
    if ($offset > 0) {
3285
      $haystack = self::substr($haystack, $offset);
3286
    }
3287
3288 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3289
      $left = substr($haystack, 0, $pos);
3290
3291
      // negative offset not supported in PHP strpos(), ignoring
3292
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
3293
    }
3294
3295
    return false;
3296
  }
3297
3298
  /**
3299
   * generates a UTF-8 encoded character from the given Code Point
3300
   *
3301
   * @param    int $code_point The code point for which to generate a character
3302
   *
3303
   * @return   string Multi-Byte character
3304
   *           returns empty string on failure to encode
3305
   */
3306 8
  public static function chr($code_point)
3307
  {
3308 8
    self::checkForSupport();
3309
3310 8
    if (($i = (int)$code_point) !== $code_point) {
3311
      // $code_point is a string, lets extract int code point from it
3312
      if (!($i = (int)self::hex_to_int($code_point))) {
3313
        return '';
3314
      }
3315
    }
3316
3317 8
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
3318
  }
3319
3320
  /**
3321
   * converts hexadecimal U+xxxx code point representation to Integer
3322
   *
3323
   * INFO: opposite to UTF8::int_to_hex( )
3324
   *
3325
   * @param    string $str The Hexadecimal Code Point representation
3326
   *
3327
   * @return   int The Code Point, or 0 on failure
3328
   */
3329
  public static function hex_to_int($str)
3330
  {
3331
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
3332
      return intval($match[1], 16);
3333
    }
3334
3335
    return 0;
3336
  }
3337
3338
  /**
3339
   * reverses characters order in the string
3340
   *
3341
   * @param    string $str The input string
3342
   *
3343
   * @return   string The string with characters in the reverse sequence
3344
   */
3345 4
  public static function strrev($str)
3346
  {
3347 4
    return implode(array_reverse(self::split($str)));
3348
  }
3349
3350
  /**
3351
   * returns the UTF-8 character with the maximum code point in the given data
3352
   *
3353
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings
3354
   *
3355
   * @return   string The character with the highest code point than others
3356
   */
3357 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3358
  {
3359 1
    if (is_array($arg)) {
3360
      $arg = implode($arg);
3361
    }
3362
3363 1
    return self::chr(max(self::codepoints($arg)));
3364
  }
3365
3366
  /**
3367
   * accepts a string and returns an array of Unicode Code Points
3368
   *
3369
   * @since 1.0
3370
   *
3371
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings
3372
   * @param    bool  $u_style If True, will return Code Points in U+xxxx format,
3373
   *                          default, Code Points will be returned as integers
3374
   *
3375
   * @return   array The array of code points
3376
   */
3377 3
  public static function codepoints($arg, $u_style = false)
3378
  {
3379 3
    if (is_string($arg)) {
3380 3
      $arg = self::split($arg);
3381 3
    }
3382
3383 3
    $arg = array_map(
3384
        array(
3385 3
            '\\voku\\helper\\UTF8',
3386 3
            'ord',
3387 3
        ),
3388
        $arg
3389 3
    );
3390
3391 3
    if ($u_style) {
3392
      $arg = array_map(
3393
          array(
3394
              '\\voku\\helper\\UTF8',
3395
              'int_to_hex',
3396
          ),
3397
          $arg
3398
      );
3399
    }
3400
3401 3
    return $arg;
3402
  }
3403
3404
  /**
3405
   * returns the UTF-8 character with the minimum code point in the given data
3406
   *
3407
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings
3408
   *
3409
   * @return   string The character with the lowest code point than others
3410
   */
3411 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3412
  {
3413 1
    if (is_array($arg)) {
3414
      $arg = implode($arg);
3415
    }
3416
3417 1
    return self::chr(min(self::codepoints($arg)));
3418
  }
3419
3420
  /**
3421
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
3422
   *
3423
   * @param    string $chr The input character
3424
   * @param    string $pfix
3425
   *
3426
   * @return   string The Code Point encoded as U+xxxx
3427
   */
3428
  public static function chr_to_hex($chr, $pfix = 'U+')
3429
  {
3430
    return self::int_to_hex(self::ord($chr), $pfix);
3431
  }
3432
3433
  /**
3434
   * Converts Integer to hexadecimal U+xxxx code point representation.
3435
   *
3436
   * @param    int    $int The integer to be converted to hexadecimal code point
3437
   * @param    string $pfix
3438
   *
3439
   * @return   string The Code Point, or empty string on failure
3440
   */
3441
  public static function int_to_hex($int, $pfix = 'U+')
3442
  {
3443
    if (ctype_digit((string)$int)) {
3444
      $hex = dechex((int)$int);
3445
3446
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3447
3448
      return $pfix . $hex;
3449
    }
3450
3451
    return '';
3452
  }
3453
3454
  /**
3455
   * Get a binary representation of a specific character.
3456
   *
3457
   * @param   string $string The input character.
3458
   *
3459
   * @return  string
3460
   */
3461 1
  public static function str_to_binary($string)
3462
  {
3463 1
    $string = (string)$string;
3464
3465 1
    if (!isset($string[0])) {
3466
      return '';
3467
    }
3468
3469
    // init
3470 1
    $out = null;
3471 1
    $max = strlen($string);
3472
3473 1
    for ($i = 0; $i < $max; ++$i) {
3474 1
      $out .= vsprintf('%08b', (array)self::ord($string[$i]));
3475 1
    }
3476
3477 1
    return $out;
3478
  }
3479
3480
  /**
3481
   * counts number of words in the UTF-8 string
3482
   *
3483
   * @param string $s The input string
3484
   * @param int    $format
3485
   * @param string $charlist
3486
   *
3487
   * @return array|float|string The number of words in the string
3488
   */
3489 1
  public static function str_word_count($s, $format = 0, $charlist = '')
3490
  {
3491 1
    $charlist = self::rxClass($charlist, '\pL');
3492 1
    $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
3493 1
    $charlist = array();
3494 1
    $len = count($s);
3495
3496 1
    if (1 == $format) {
3497
      for ($i = 1; $i < $len; $i += 2) {
3498
        $charlist[] = $s[$i];
3499
      }
3500 1
    } elseif (2 == $format) {
3501
      self::checkForSupport();
3502
3503
      $offset = self::strlen($s[0]);
3504
      for ($i = 1; $i < $len; $i += 2) {
3505
        $charlist[$offset] = $s[$i];
3506
        $offset += self::strlen($s[$i]) + self::strlen($s[$i + 1]);
3507
      }
3508
    } else {
3509 1
      $charlist = ($len - 1) / 2;
3510
    }
3511
3512 1
    return $charlist;
3513
  }
3514
3515
  /**
3516
   * strip whitespace or other characters from beginning or end of a UTF-8 string
3517
   *
3518
   * INFO: this is slower then "trim()"
3519
   *
3520
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
3521
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
3522
   *
3523
   * @param    string $string The string to be trimmed
3524
   * @param    string $chars  Optional characters to be stripped
3525
   *
3526
   * @return   string The trimmed string
3527
   */
3528 26
  public static function trim($string = '', $chars = INF)
3529
  {
3530 26
    $string = (string)$string;
3531
3532 26
    if (!isset($string[0])) {
3533 5
      return '';
3534
    }
3535
3536
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3537 22
    if ($chars === INF || !$chars) {
3538 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $string);
3539
    }
3540
3541 16
    return self::rtrim(self::ltrim($string, $chars), $chars);
3542
  }
3543
3544
  /**
3545
   * strip whitespace or other characters from end of a UTF-8 string
3546
   *
3547
   * WARNING: this is much slower then "rtrim()" !!!!
3548
   *
3549
   * @param    string $string The string to be trimmed
3550
   * @param    string $chars  Optional characters to be stripped
3551
   *
3552
   * @return   string The string with unwanted characters stripped from the right
3553
   */
3554 22 View Code Duplication
  public static function rtrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3555
  {
3556 22
    $string = (string)$string;
3557
3558 22
    if (!isset($string[0])) {
3559 5
      return '';
3560
    }
3561
3562 18
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3563
3564 18
    return preg_replace("/{$chars}+$/u", '', $string);
3565
  }
3566
3567
  /**
3568
   * strip whitespace or other characters from beginning of a UTF-8 string
3569
   *
3570
   * WARNING: this is much slower then "ltrim()" !!!!
3571
   *
3572
   * @param    string $string The string to be trimmed
3573
   * @param    string $chars  Optional characters to be stripped
3574
   *
3575
   * @return   string The string with unwanted characters stripped from the left
3576
   */
3577 24 View Code Duplication
  public static function ltrim($string = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3578
  {
3579 24
    $string = (string)$string;
3580
3581 24
    if (!isset($string[0])) {
3582 2
      return '';
3583
    }
3584
3585 23
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3586
3587 23
    return preg_replace("/^{$chars}+/u", '', $string);
3588
  }
3589
3590
  /**
3591
   * Replace text within a portion of a string
3592
   *
3593
   * source: https://gist.github.com/stemar/8287074
3594
   *
3595
   * @param string|array $string
3596
   * @param string|array $replacement
3597
   * @param int          $start
3598
   * @param null|int     $length
3599
   *
3600
   * @return array|string
3601
   */
3602 6
  public static function substr_replace($string, $replacement, $start, $length = null)
3603
  {
3604
3605 6
    if (is_array($string)) {
3606 1
      $num = count($string);
3607
3608
      // $replacement
3609 1
      if (is_array($replacement)) {
3610
        $replacement = array_slice($replacement, 0, $num);
3611
      } else {
3612 1
        $replacement = array_pad(array($replacement), $num, $replacement);
3613
      }
3614
3615
      // $start
3616 1 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3617
        $start = array_slice($start, 0, $num);
3618
        foreach ($start as $key => $value) {
3619
          $start[$key] = is_int($value) ? $value : 0;
3620
        }
3621
      } else {
3622 1
        $start = array_pad(array($start), $num, $start);
3623
      }
3624
3625
      // $length
3626 1
      if (!isset($length)) {
3627 1
        $length = array_fill(0, $num, 0);
3628 1 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3629
        $length = array_slice($length, 0, $num);
3630
        foreach ($length as $key => $value) {
3631
          if (isset($value)) {
3632
            $length[$key] = (is_int($value) ? $value : $num);
3633
          } else {
3634
            $length[$key] = 0;
3635
          }
3636
        }
3637
      } else {
3638 1
        $length = array_pad(array($length), $num, $length);
3639
      }
3640
3641
      // Recursive call
3642 1
      return array_map(array(__CLASS__, 'substr_replace'), $string, $replacement, $start, $length);
3643
    } else {
3644 6
      if (is_array($replacement)) {
3645 1
        if (count($replacement) > 0) {
3646 1
          $replacement = $replacement[0];
3647 1
        } else {
3648 1
          $replacement = '';
3649
        }
3650 1
      }
3651
    }
3652
3653 6
    preg_match_all('/./us', (string)$string, $smatches);
3654 6
    preg_match_all('/./us', (string)$replacement, $rmatches);
3655
3656 6
    if ($length === null) {
3657 4
      self::checkForSupport();
3658
3659 4
      $length = mb_strlen($string);
3660 4
    }
3661
3662 6
    array_splice($smatches[0], $start, $length, $rmatches[0]);
3663
3664 6
    return join($smatches[0], null);
3665
  }
3666
3667
  /**
3668
   * alias for "UTF8::to_latin1()"
3669
   *
3670
   * @param $text
3671
   *
3672
   * @return string
3673
   */
3674
  public static function toLatin1($text)
3675
  {
3676
    return self::to_latin1($text);
3677
  }
3678
3679
  /**
3680
   * count the number of sub string occurrences
3681
   *
3682
   * @param    string $haystack The string to search in
3683
   * @param    string $needle   The string to search for
3684
   * @param    int    $offset   The offset where to start counting
3685
   * @param    int    $length   The maximum length after the specified offset to search for the substring.
3686
   *
3687
   * @return   int number of occurrences of $needle
3688
   */
3689
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
3690
  {
3691
    $offset = (int)$offset;
3692
3693
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
3694
      $length = (int)$length;
3695
3696
      $haystack = self::substr($haystack, $offset, $length);
3697
    }
3698
3699
    if ($length === null) {
3700
      return substr_count($haystack, $needle, $offset);
3701
    } else {
3702
      return substr_count($haystack, $needle, $offset, $length);
3703
    }
3704
  }
3705
3706
  /**
3707
   * alias for "UTF8::is_ascii()"
3708
   *
3709
   * @param string $str
3710
   *
3711
   * @return boolean
3712
   */
3713 1
  public static function isAscii($str)
3714
  {
3715 1
    return self::is_ascii($str);
3716
  }
3717
3718
  /**
3719
   * checks if a string is 7 bit ASCII
3720
   *
3721
   * @param    string $str The string to check
3722
   *
3723
   * @return   bool True if ASCII, False otherwise
3724
   */
3725 1
  public static function is_ascii($str)
3726
  {
3727 1
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3728
  }
3729
3730
  /**
3731
   * create an array containing a range of UTF-8 characters
3732
   *
3733
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from
3734
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at
3735
   *
3736
   * @return   array Array of UTF-8 characters
3737
   */
3738 1
  public static function range($var1, $var2)
3739
  {
3740 1
    if (!$var1 || !$var2) {
3741 1
      return array();
3742
    }
3743
3744 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3745
      $start = (int)$var1;
3746 1
    } elseif (ctype_xdigit($var1)) {
3747
      $start = (int)self::hex_to_int($var1);
3748
    } else {
3749 1
      $start = self::ord($var1);
3750
    }
3751
3752 1
    if (!$start) {
3753
      return array();
3754
    }
3755
3756 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3757
      $end = (int)$var2;
3758 1
    } elseif (ctype_xdigit($var2)) {
3759
      $end = (int)self::hex_to_int($var2);
3760
    } else {
3761 1
      $end = self::ord($var2);
3762
    }
3763
3764 1
    if (!$end) {
3765
      return array();
3766
    }
3767
3768 1
    return array_map(
3769
        array(
3770 1
            '\\voku\\helper\\UTF8',
3771 1
            'chr',
3772 1
        ),
3773 1
        range($start, $end)
3774 1
    );
3775
  }
3776
3777
  /**
3778
   * creates a random string of UTF-8 characters
3779
   *
3780
   * @param    int $len The length of string in characters
3781
   *
3782
   * @return   string String consisting of random characters
3783
   */
3784 1
  public static function hash($len = 8)
3785
  {
3786 1
    static $chars = array();
3787 1
    static $chars_len = null;
3788
3789 1
    if ($len <= 0) {
3790 1
      return '';
3791
    }
3792
3793
    // init
3794 1
    self::checkForSupport();
3795
3796 1
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
3797 1
      if (self::$support['pcre_utf8'] === true) {
3798 1
        $chars = array_map(
3799
            array(
3800 1
                '\\voku\\helper\\UTF8',
3801 1
                'chr',
3802 1
            ),
3803 1
            range(48, 79)
3804 1
        );
3805
3806 1
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
3807
3808 1
        $chars = array_values(array_filter($chars));
3809 1
      } else {
3810
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
3811
      }
3812
3813 1
      $chars_len = count($chars);
3814 1
    }
3815
3816 1
    $hash = '';
3817
3818 1
    for (; $len; --$len) {
3819 1
      $hash .= $chars[mt_rand() % $chars_len];
3820 1
    }
3821
3822 1
    return $hash;
3823
  }
3824
3825
  /**
3826
   * callback( )
3827
   *
3828
   * @alias of UTF8::chr_map( )
3829
   *
3830
   * @param $callback
3831
   * @param $str
3832
   *
3833
   * @return array
3834
   */
3835 1
  public static function callback($callback, $str)
3836
  {
3837 1
    return self::chr_map($callback, $str);
3838
  }
3839
3840
  /**
3841
   * applies callback to all characters of a string
3842
   *
3843
   * @param    string $callback The callback function
3844
   * @param    string $str      UTF-8 string to run callback on
3845
   *
3846
   * @return   array The outcome of callback
3847
   */
3848
3849 1
  public static function chr_map($callback, $str)
3850
  {
3851 1
    $chars = self::split($str);
3852
3853 1
    return array_map($callback, $chars);
3854
  }
3855
3856
  /**
3857
   * returns a single UTF-8 character from string.
3858
   *
3859
   * @param    string $string UTF-8 string
3860
   * @param    int    $pos    The position of character to return.
3861
   *
3862
   * @return   string Single Multi-Byte character
3863
   */
3864 1
  public static function access($string, $pos)
3865
  {
3866
    //return the character at the specified position: $str[1] like functionality
3867
3868 1
    return self::substr($string, $pos, 1);
3869
  }
3870
3871
  /**
3872
   * sort all characters according to code points
3873
   *
3874
   * @param    string $str    UTF-8 string
3875
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored
3876
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
3877
   *
3878
   * @return   string String of sorted characters
3879
   */
3880 1
  public static function str_sort($str, $unique = false, $desc = false)
3881
  {
3882 1
    $array = self::codepoints($str);
3883
3884 1
    if ($unique) {
3885 1
      $array = array_flip(array_flip($array));
3886 1
    }
3887
3888 1
    if ($desc) {
3889 1
      arsort($array);
3890 1
    } else {
3891 1
      asort($array);
3892
    }
3893
3894 1
    return self::string($array);
3895
  }
3896
3897
  /**
3898
   * makes a UTF-8 string from code points
3899
   *
3900
   * @param    array $array Integer or Hexadecimal codepoints
3901
   *
3902
   * @return   string UTF-8 encoded string
3903
   */
3904 2
  public static function string($array)
3905
  {
3906 2
    return implode(
3907 2
        array_map(
3908
            array(
3909 2
                '\\voku\\helper\\UTF8',
3910 2
                'chr',
3911 2
            ),
3912
            $array
3913 2
        )
3914 2
    );
3915
  }
3916
3917
  /**
3918
   * Strip HTML and PHP tags from a string
3919
   *
3920
   * @link http://php.net/manual/en/function.strip-tags.php
3921
   *
3922
   * @param string $str            <p>
3923
   *                               The input string.
3924
   *                               </p>
3925
   * @param string $allowable_tags [optional] <p>
3926
   *                               You can use the optional second parameter to specify tags which should
3927
   *                               not be stripped.
3928
   *                               </p>
3929
   *                               <p>
3930
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
3931
   *                               can not be changed with allowable_tags.
3932
   *                               </p>
3933
   *
3934
   * @return string the stripped string.
3935
   */
3936 2
  public static function strip_tags($str, $allowable_tags = null)
3937
  {
3938
    //clean broken utf8
3939 2
    $str = self::clean($str);
3940
3941 2
    return strip_tags($str, $allowable_tags);
3942
  }
3943
3944
  /**
3945
   * pad a UTF-8 string to given length with another string
3946
   *
3947
   * @param    string $input      The input string
3948
   * @param    int    $pad_length The length of return string
3949
   * @param    string $pad_string String to use for padding the input string
3950
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
3951
   *
3952
   * @return   string Returns the padded string
3953
   */
3954 2
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
3955
  {
3956 2
    $input_length = self::strlen($input);
3957
3958 2
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
3959 2
      $ps_length = self::strlen($pad_string);
3960
3961 2
      $diff = $pad_length - $input_length;
3962
3963
      switch ($pad_type) {
3964 2 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3965 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3966 2
          $pre = self::substr($pre, 0, $diff);
3967 2
          $post = '';
3968 2
          break;
3969
3970 2
        case STR_PAD_BOTH:
3971 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3972 2
          $pre = self::substr($pre, 0, (int)$diff / 2);
3973 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3974 2
          $post = self::substr($post, 0, (int)ceil($diff / 2));
3975 2
          break;
3976
3977 2
        case STR_PAD_RIGHT:
3978 2 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3979 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3980 2
          $post = self::substr($post, 0, $diff);
3981 2
          $pre = '';
3982 2
      }
3983
3984 2
      return $pre . $input . $post;
3985
    }
3986
3987 2
    return $input;
3988
  }
3989
3990
  /**
3991
   * Repeat a string
3992
   *
3993
   * @param string $input      <p>
3994
   *                           The string to be repeated.
3995
   *                           </p>
3996
   * @param int    $multiplier <p>
3997
   *                           Number of time the input string should be
3998
   *                           repeated.
3999
   *                           </p>
4000
   *                           <p>
4001
   *                           multiplier has to be greater than or equal to 0.
4002
   *                           If the multiplier is set to 0, the function
4003
   *                           will return an empty string.
4004
   *                           </p>
4005
   *
4006
   * @return string the repeated string.
4007
   */
4008 1
  public static function str_repeat($input, $multiplier)
4009
  {
4010 1
    $input = self::filter($input);
4011
4012 1
    return str_repeat($input, $multiplier);
4013
  }
4014
4015
  /**
4016
   * removes duplicate occurrences of a string in another string
4017
   *
4018
   * @param    string       $str  The base string
4019
   * @param    string|array $what String to search for in the base string
4020
   *
4021
   * @return   string The result string with removed duplicates
4022
   */
4023 1
  public static function remove_duplicates($str, $what = ' ')
4024
  {
4025 1
    if (is_string($what)) {
4026 1
      $what = array($what);
4027 1
    }
4028
4029 1
    if (is_array($what)) {
4030 1
      foreach ($what as $item) {
4031 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4032 1
      }
4033 1
    }
4034
4035 1
    return $str;
4036
  }
4037
4038
  /**
4039
   * Finds position of first occurrence of a string within another, case insensitive
4040
   *
4041
   * @link http://php.net/manual/en/function.mb-stripos.php
4042
   *
4043
   * @param string  $haystack  <p>
4044
   *                           The string from which to get the position of the first occurrence
4045
   *                           of needle
4046
   *                           </p>
4047
   * @param string  $needle    <p>
4048
   *                           The string to find in haystack
4049
   *                           </p>
4050
   * @param int     $offset    [optional] <p>
4051
   *                           The position in haystack
4052
   *                           to start searching
4053
   *                           </p>
4054
   * @param string  $encoding
4055
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4056
   *
4057
   * @return int Return the numeric position of the first occurrence of
4058
   * needle in the haystack
4059
   * string, or false if needle is not found.
4060
   */
4061 8
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4062
  {
4063 8
    $haystack = (string)$haystack;
4064 8
    $needle = (string)$needle;
4065
4066 8
    if (!isset($haystack[0]) || !isset($needle[0])) {
4067 2
      return false;
4068
    }
4069
4070
    // init
4071 7
    self::checkForSupport();
4072
4073 7
    if ($cleanUtf8 === true) {
4074 1
      $haystack = self::clean($haystack);
4075 1
      $needle = self::clean($needle);
4076 1
    }
4077
4078
    // INFO: this is only a fallback for old versions
4079 7
    if ($encoding === true || $encoding === false) {
4080 1
      $encoding = 'UTF-8';
4081 1
    }
4082
4083 7
    return mb_stripos($haystack, $needle, $offset, $encoding);
4084
  }
4085
4086
  /**
4087
   * fix a double (or multiple) encoded UTF8 string
4088
   *
4089
   * @param array|string $text
4090
   *
4091
   * @return string
4092
   */
4093 1
  public static function fix_utf8($text)
4094
  {
4095 1
    if (is_array($text)) {
4096
4097
      foreach ($text as $k => $v) {
4098
        /** @noinspection AlterInForeachInspection */
4099
        $text[$k] = self::fix_utf8($v);
4100
      }
4101
4102
      return $text;
4103
    }
4104
4105 1
    $last = '';
4106 1
    while ($last <> $text) {
4107 1
      $last = $text;
4108 1
      $text = self::to_utf8(self::utf8_decode($text));
0 ignored issues
show
Bug introduced by
It seems like $text defined by self::to_utf8(self::utf8_decode($text)) on line 4108 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
4109 1
    }
4110
4111 1
    return $text;
4112
  }
4113
4114
  /**
4115
   * alias for "UTF8::ucfirst"
4116
   *
4117
   * @param $str
4118
   *
4119
   * @return string
4120
   */
4121
  public static function ucword($str)
4122
  {
4123
    return self::ucfirst($str);
4124
  }
4125
4126
  /**
4127
   * makes string's first char uppercase
4128
   *
4129
   * @param    string $str The input string
4130
   *
4131
   * @return   string The resulting string
4132
   */
4133 13
  public static function ucfirst($str)
4134
  {
4135 13
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
4136
  }
4137
4138
  /**
4139
   * make a string uppercase
4140
   *
4141
   * @link http://php.net/manual/en/function.mb-strtoupper.php
4142
   *
4143
   * @param string $str <p>
4144
   *                    The string being uppercased.
4145
   *                    </p>
4146
   * @param string $encoding
4147
   *
4148
   * @return string str with all alphabetic characters converted to uppercase.
4149
   */
4150 14
  public static function strtoupper($str, $encoding = 'UTF-8')
4151
  {
4152 14
    $str = (string)$str;
4153
4154 14
    if (!isset($str[0])) {
4155 4
      return '';
4156
    }
4157
4158
    // init
4159 13
    self::checkForSupport();
4160
4161 13
    if (self::$support['mbstring'] === true) {
4162 13
      return mb_strtoupper($str, $encoding);
4163
    } else {
4164
4165
      // fallback
4166
4167
      static $caseTableKeys = null;
4168
      static $caseTableValues = null;
4169
4170
      if ($caseTableKeys === null) {
4171
        $caseTable = self::case_table();
4172
        $caseTableKeys = array_keys($caseTable);
4173
        $caseTableValues = array_values($caseTable);
4174
      }
4175
4176
      $str = self::clean($str);
4177
4178
      return str_replace($caseTableKeys, $caseTableValues, $str);
4179
    }
4180
  }
4181
4182
  /**
4183
   * returns an array of all lower and upper case UTF-8 encoded characters
4184
   *
4185
   * @return   string An array with lower case chars as keys and upper chars as values
4186
   */
4187
  protected static function case_table()
4188
  {
4189
    static $case = array(
4190
4191
      //lower => upper
4192
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
4193
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
4194
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
4195
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
4196
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
4197
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
4198
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
4199
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
4200
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
4201
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
4202
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
4203
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
4204
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
4205
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
4206
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
4207
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
4208
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
4209
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
4210
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
4211
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
4212
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
4213
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
4214
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
4215
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
4216
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
4217
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
4218
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
4219
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
4220
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
4221
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
4222
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
4223
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
4224
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
4225
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
4226
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
4227
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
4228
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
4229
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
4230
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
4231
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
4232
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
4233
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
4234
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
4235
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
4236
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
4237
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
4238
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
4239
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
4240
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
4241
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
4242
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
4243
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
4244
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
4245
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
4246
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
4247
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
4248
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
4249
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
4250
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
4251
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
4252
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
4253
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
4254
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
4255
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
4256
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
4257
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
4258
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
4259
      "\xea\x9e\x87"     => "\xea\x9e\x86",
4260
      "\xea\x9e\x85"     => "\xea\x9e\x84",
4261
      "\xea\x9e\x83"     => "\xea\x9e\x82",
4262
      "\xea\x9e\x81"     => "\xea\x9e\x80",
4263
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
4264
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
4265
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
4266
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
4267
      "\xea\x9d\xad"     => "\xea\x9d\xac",
4268
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
4269
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
4270
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
4271
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
4272
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
4273
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
4274
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
4275
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
4276
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
4277
      "\xea\x9d\x99"     => "\xea\x9d\x98",
4278
      "\xea\x9d\x97"     => "\xea\x9d\x96",
4279
      "\xea\x9d\x95"     => "\xea\x9d\x94",
4280
      "\xea\x9d\x93"     => "\xea\x9d\x92",
4281
      "\xea\x9d\x91"     => "\xea\x9d\x90",
4282
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
4283
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
4284
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
4285
      "\xea\x9d\x89"     => "\xea\x9d\x88",
4286
      "\xea\x9d\x87"     => "\xea\x9d\x86",
4287
      "\xea\x9d\x85"     => "\xea\x9d\x84",
4288
      "\xea\x9d\x83"     => "\xea\x9d\x82",
4289
      "\xea\x9d\x81"     => "\xea\x9d\x80",
4290
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
4291
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
4292
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
4293
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
4294
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
4295
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
4296
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
4297
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
4298
      "\xea\x9c\xad"     => "\xea\x9c\xac",
4299
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
4300
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
4301
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
4302
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
4303
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
4304
      "\xea\x9a\x97"     => "\xea\x9a\x96",
4305
      "\xea\x9a\x95"     => "\xea\x9a\x94",
4306
      "\xea\x9a\x93"     => "\xea\x9a\x92",
4307
      "\xea\x9a\x91"     => "\xea\x9a\x90",
4308
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
4309
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
4310
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
4311
      "\xea\x9a\x89"     => "\xea\x9a\x88",
4312
      "\xea\x9a\x87"     => "\xea\x9a\x86",
4313
      "\xea\x9a\x85"     => "\xea\x9a\x84",
4314
      "\xea\x9a\x83"     => "\xea\x9a\x82",
4315
      "\xea\x9a\x81"     => "\xea\x9a\x80",
4316
      "\xea\x99\xad"     => "\xea\x99\xac",
4317
      "\xea\x99\xab"     => "\xea\x99\xaa",
4318
      "\xea\x99\xa9"     => "\xea\x99\xa8",
4319
      "\xea\x99\xa7"     => "\xea\x99\xa6",
4320
      "\xea\x99\xa5"     => "\xea\x99\xa4",
4321
      "\xea\x99\xa3"     => "\xea\x99\xa2",
4322
      "\xea\x99\x9f"     => "\xea\x99\x9e",
4323
      "\xea\x99\x9d"     => "\xea\x99\x9c",
4324
      "\xea\x99\x9b"     => "\xea\x99\x9a",
4325
      "\xea\x99\x99"     => "\xea\x99\x98",
4326
      "\xea\x99\x97"     => "\xea\x99\x96",
4327
      "\xea\x99\x95"     => "\xea\x99\x94",
4328
      "\xea\x99\x93"     => "\xea\x99\x92",
4329
      "\xea\x99\x91"     => "\xea\x99\x90",
4330
      "\xea\x99\x8f"     => "\xea\x99\x8e",
4331
      "\xea\x99\x8d"     => "\xea\x99\x8c",
4332
      "\xea\x99\x8b"     => "\xea\x99\x8a",
4333
      "\xea\x99\x89"     => "\xea\x99\x88",
4334
      "\xea\x99\x87"     => "\xea\x99\x86",
4335
      "\xea\x99\x85"     => "\xea\x99\x84",
4336
      "\xea\x99\x83"     => "\xea\x99\x82",
4337
      "\xea\x99\x81"     => "\xea\x99\x80",
4338
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
4339
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
4340
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
4341
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
4342
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
4343
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
4344
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
4345
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
4346
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
4347
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
4348
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
4349
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
4350
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
4351
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
4352
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
4353
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
4354
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
4355
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
4356
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
4357
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
4358
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
4359
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
4360
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
4361
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
4362
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
4363
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
4364
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
4365
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
4366
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
4367
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
4368
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
4369
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
4370
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
4371
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
4372
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
4373
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
4374
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
4375
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
4376
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
4377
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
4378
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
4379
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
4380
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
4381
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
4382
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
4383
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
4384
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
4385
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
4386
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
4387
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
4388
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
4389
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
4390
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
4391
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
4392
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
4393
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
4394
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
4395
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
4396
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
4397
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
4398
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
4399
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
4400
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
4401
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
4402
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
4403
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
4404
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
4405
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
4406
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
4407
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
4408
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
4409
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
4410
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
4411
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
4412
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
4413
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
4414
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
4415
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
4416
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
4417
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
4418
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
4419
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
4420
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
4421
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
4422
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
4423
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
4424
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
4425
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
4426
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
4427
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
4428
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
4429
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
4430
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
4431
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
4432
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
4433
      "\xe2\xb1\xa6"     => "\xc8\xbe",
4434
      "\xe2\xb1\xa5"     => "\xc8\xba",
4435
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
4436
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
4437
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
4438
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
4439
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
4440
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
4441
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
4442
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
4443
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
4444
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
4445
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
4446
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
4447
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
4448
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
4449
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
4450
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
4451
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
4452
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
4453
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
4454
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
4455
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
4456
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
4457
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
4458
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
4459
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
4460
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
4461
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
4462
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
4463
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
4464
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
4465
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
4466
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
4467
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
4468
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
4469
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
4470
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
4471
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
4472
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
4473
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
4474
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
4475
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
4476
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
4477
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
4478
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
4479
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
4480
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
4481
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
4482
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
4483
      "\xe2\x86\x84"     => "\xe2\x86\x83",
4484
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
4485
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
4486
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
4487
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
4488
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
4489
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
4490
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
4491
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
4492
      "\xe1\xbe\xbe"     => "\xce\x99",
4493
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
4494
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
4495
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
4496
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
4497
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
4498
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
4499
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
4500
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
4501
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
4502
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
4503
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
4504
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
4505
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
4506
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
4507
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
4508
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
4509
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
4510
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
4511
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
4512
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
4513
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
4514
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
4515
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
4516
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
4517
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
4518
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
4519
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
4520
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
4521
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
4522
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
4523
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
4524
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
4525
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
4526
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
4527
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
4528
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
4529
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
4530
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
4531
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
4532
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
4533
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
4534
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
4535
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
4536
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
4537
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
4538
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
4539
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
4540
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
4541
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
4542
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
4543
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
4544
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
4545
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
4546
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
4547
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
4548
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
4549
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
4550
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
4551
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
4552
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
4553
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
4554
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
4555
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
4556
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
4557
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
4558
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
4559
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
4560
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
4561
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
4562
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
4563
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
4564
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
4565
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
4566
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
4567
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
4568
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
4569
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
4570
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
4571
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
4572
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
4573
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
4574
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
4575
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
4576
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
4577
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
4578
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
4579
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
4580
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
4581
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
4582
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
4583
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
4584
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
4585
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
4586
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
4587
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
4588
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
4589
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
4590
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
4591
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
4592
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
4593
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
4594
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
4595
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
4596
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
4597
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
4598
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
4599
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
4600
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
4601
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
4602
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
4603
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
4604
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
4605
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
4606
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
4607
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
4608
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
4609
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
4610
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
4611
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
4612
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
4613
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
4614
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
4615
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
4616
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
4617
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
4618
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
4619
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
4620
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
4621
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
4622
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
4623
      "\xe1\xba\xad"     => "\xe1\xba\xac",
4624
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
4625
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
4626
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
4627
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
4628
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
4629
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
4630
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
4631
      "\xe1\xba\x95"     => "\xe1\xba\x94",
4632
      "\xe1\xba\x93"     => "\xe1\xba\x92",
4633
      "\xe1\xba\x91"     => "\xe1\xba\x90",
4634
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
4635
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
4636
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
4637
      "\xe1\xba\x89"     => "\xe1\xba\x88",
4638
      "\xe1\xba\x87"     => "\xe1\xba\x86",
4639
      "\xe1\xba\x85"     => "\xe1\xba\x84",
4640
      "\xe1\xba\x83"     => "\xe1\xba\x82",
4641
      "\xe1\xba\x81"     => "\xe1\xba\x80",
4642
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
4643
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
4644
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
4645
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
4646
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
4647
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
4648
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
4649
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
4650
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
4651
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
4652
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
4653
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
4654
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
4655
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
4656
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
4657
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
4658
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
4659
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
4660
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
4661
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
4662
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
4663
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
4664
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
4665
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
4666
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
4667
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
4668
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
4669
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
4670
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
4671
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
4672
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
4673
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
4674
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
4675
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
4676
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
4677
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
4678
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
4679
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
4680
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
4681
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
4682
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
4683
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
4684
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
4685
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
4686
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
4687
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
4688
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
4689
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
4690
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
4691
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
4692
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
4693
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
4694
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
4695
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
4696
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
4697
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
4698
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
4699
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
4700
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
4701
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
4702
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
4703
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
4704
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
4705
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
4706
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
4707
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
4708
      "\xd6\x86"         => "\xd5\x96",
4709
      "\xd6\x85"         => "\xd5\x95",
4710
      "\xd6\x84"         => "\xd5\x94",
4711
      "\xd6\x83"         => "\xd5\x93",
4712
      "\xd6\x82"         => "\xd5\x92",
4713
      "\xd6\x81"         => "\xd5\x91",
4714
      "\xd6\x80"         => "\xd5\x90",
4715
      "\xd5\xbf"         => "\xd5\x8f",
4716
      "\xd5\xbe"         => "\xd5\x8e",
4717
      "\xd5\xbd"         => "\xd5\x8d",
4718
      "\xd5\xbc"         => "\xd5\x8c",
4719
      "\xd5\xbb"         => "\xd5\x8b",
4720
      "\xd5\xba"         => "\xd5\x8a",
4721
      "\xd5\xb9"         => "\xd5\x89",
4722
      "\xd5\xb8"         => "\xd5\x88",
4723
      "\xd5\xb7"         => "\xd5\x87",
4724
      "\xd5\xb6"         => "\xd5\x86",
4725
      "\xd5\xb5"         => "\xd5\x85",
4726
      "\xd5\xb4"         => "\xd5\x84",
4727
      "\xd5\xb3"         => "\xd5\x83",
4728
      "\xd5\xb2"         => "\xd5\x82",
4729
      "\xd5\xb1"         => "\xd5\x81",
4730
      "\xd5\xb0"         => "\xd5\x80",
4731
      "\xd5\xaf"         => "\xd4\xbf",
4732
      "\xd5\xae"         => "\xd4\xbe",
4733
      "\xd5\xad"         => "\xd4\xbd",
4734
      "\xd5\xac"         => "\xd4\xbc",
4735
      "\xd5\xab"         => "\xd4\xbb",
4736
      "\xd5\xaa"         => "\xd4\xba",
4737
      "\xd5\xa9"         => "\xd4\xb9",
4738
      "\xd5\xa8"         => "\xd4\xb8",
4739
      "\xd5\xa7"         => "\xd4\xb7",
4740
      "\xd5\xa6"         => "\xd4\xb6",
4741
      "\xd5\xa5"         => "\xd4\xb5",
4742
      "\xd5\xa4"         => "\xd4\xb4",
4743
      "\xd5\xa3"         => "\xd4\xb3",
4744
      "\xd5\xa2"         => "\xd4\xb2",
4745
      "\xd5\xa1"         => "\xd4\xb1",
4746
      "\xd4\xa5"         => "\xd4\xa4",
4747
      "\xd4\xa3"         => "\xd4\xa2",
4748
      "\xd4\xa1"         => "\xd4\xa0",
4749
      "\xd4\x9f"         => "\xd4\x9e",
4750
      "\xd4\x9d"         => "\xd4\x9c",
4751
      "\xd4\x9b"         => "\xd4\x9a",
4752
      "\xd4\x99"         => "\xd4\x98",
4753
      "\xd4\x97"         => "\xd4\x96",
4754
      "\xd4\x95"         => "\xd4\x94",
4755
      "\xd4\x93"         => "\xd4\x92",
4756
      "\xd4\x91"         => "\xd4\x90",
4757
      "\xd4\x8f"         => "\xd4\x8e",
4758
      "\xd4\x8d"         => "\xd4\x8c",
4759
      "\xd4\x8b"         => "\xd4\x8a",
4760
      "\xd4\x89"         => "\xd4\x88",
4761
      "\xd4\x87"         => "\xd4\x86",
4762
      "\xd4\x85"         => "\xd4\x84",
4763
      "\xd4\x83"         => "\xd4\x82",
4764
      "\xd4\x81"         => "\xd4\x80",
4765
      "\xd3\xbf"         => "\xd3\xbe",
4766
      "\xd3\xbd"         => "\xd3\xbc",
4767
      "\xd3\xbb"         => "\xd3\xba",
4768
      "\xd3\xb9"         => "\xd3\xb8",
4769
      "\xd3\xb7"         => "\xd3\xb6",
4770
      "\xd3\xb5"         => "\xd3\xb4",
4771
      "\xd3\xb3"         => "\xd3\xb2",
4772
      "\xd3\xb1"         => "\xd3\xb0",
4773
      "\xd3\xaf"         => "\xd3\xae",
4774
      "\xd3\xad"         => "\xd3\xac",
4775
      "\xd3\xab"         => "\xd3\xaa",
4776
      "\xd3\xa9"         => "\xd3\xa8",
4777
      "\xd3\xa7"         => "\xd3\xa6",
4778
      "\xd3\xa5"         => "\xd3\xa4",
4779
      "\xd3\xa3"         => "\xd3\xa2",
4780
      "\xd3\xa1"         => "\xd3\xa0",
4781
      "\xd3\x9f"         => "\xd3\x9e",
4782
      "\xd3\x9d"         => "\xd3\x9c",
4783
      "\xd3\x9b"         => "\xd3\x9a",
4784
      "\xd3\x99"         => "\xd3\x98",
4785
      "\xd3\x97"         => "\xd3\x96",
4786
      "\xd3\x95"         => "\xd3\x94",
4787
      "\xd3\x93"         => "\xd3\x92",
4788
      "\xd3\x91"         => "\xd3\x90",
4789
      "\xd3\x8f"         => "\xd3\x80",
4790
      "\xd3\x8e"         => "\xd3\x8d",
4791
      "\xd3\x8c"         => "\xd3\x8b",
4792
      "\xd3\x8a"         => "\xd3\x89",
4793
      "\xd3\x88"         => "\xd3\x87",
4794
      "\xd3\x86"         => "\xd3\x85",
4795
      "\xd3\x84"         => "\xd3\x83",
4796
      "\xd3\x82"         => "\xd3\x81",
4797
      "\xd2\xbf"         => "\xd2\xbe",
4798
      "\xd2\xbd"         => "\xd2\xbc",
4799
      "\xd2\xbb"         => "\xd2\xba",
4800
      "\xd2\xb9"         => "\xd2\xb8",
4801
      "\xd2\xb7"         => "\xd2\xb6",
4802
      "\xd2\xb5"         => "\xd2\xb4",
4803
      "\xd2\xb3"         => "\xd2\xb2",
4804
      "\xd2\xb1"         => "\xd2\xb0",
4805
      "\xd2\xaf"         => "\xd2\xae",
4806
      "\xd2\xad"         => "\xd2\xac",
4807
      "\xd2\xab"         => "\xd2\xaa",
4808
      "\xd2\xa9"         => "\xd2\xa8",
4809
      "\xd2\xa7"         => "\xd2\xa6",
4810
      "\xd2\xa5"         => "\xd2\xa4",
4811
      "\xd2\xa3"         => "\xd2\xa2",
4812
      "\xd2\xa1"         => "\xd2\xa0",
4813
      "\xd2\x9f"         => "\xd2\x9e",
4814
      "\xd2\x9d"         => "\xd2\x9c",
4815
      "\xd2\x9b"         => "\xd2\x9a",
4816
      "\xd2\x99"         => "\xd2\x98",
4817
      "\xd2\x97"         => "\xd2\x96",
4818
      "\xd2\x95"         => "\xd2\x94",
4819
      "\xd2\x93"         => "\xd2\x92",
4820
      "\xd2\x91"         => "\xd2\x90",
4821
      "\xd2\x8f"         => "\xd2\x8e",
4822
      "\xd2\x8d"         => "\xd2\x8c",
4823
      "\xd2\x8b"         => "\xd2\x8a",
4824
      "\xd2\x81"         => "\xd2\x80",
4825
      "\xd1\xbf"         => "\xd1\xbe",
4826
      "\xd1\xbd"         => "\xd1\xbc",
4827
      "\xd1\xbb"         => "\xd1\xba",
4828
      "\xd1\xb9"         => "\xd1\xb8",
4829
      "\xd1\xb7"         => "\xd1\xb6",
4830
      "\xd1\xb5"         => "\xd1\xb4",
4831
      "\xd1\xb3"         => "\xd1\xb2",
4832
      "\xd1\xb1"         => "\xd1\xb0",
4833
      "\xd1\xaf"         => "\xd1\xae",
4834
      "\xd1\xad"         => "\xd1\xac",
4835
      "\xd1\xab"         => "\xd1\xaa",
4836
      "\xd1\xa9"         => "\xd1\xa8",
4837
      "\xd1\xa7"         => "\xd1\xa6",
4838
      "\xd1\xa5"         => "\xd1\xa4",
4839
      "\xd1\xa3"         => "\xd1\xa2",
4840
      "\xd1\xa1"         => "\xd1\xa0",
4841
      "\xd1\x9f"         => "\xd0\x8f",
4842
      "\xd1\x9e"         => "\xd0\x8e",
4843
      "\xd1\x9d"         => "\xd0\x8d",
4844
      "\xd1\x9c"         => "\xd0\x8c",
4845
      "\xd1\x9b"         => "\xd0\x8b",
4846
      "\xd1\x9a"         => "\xd0\x8a",
4847
      "\xd1\x99"         => "\xd0\x89",
4848
      "\xd1\x98"         => "\xd0\x88",
4849
      "\xd1\x97"         => "\xd0\x87",
4850
      "\xd1\x96"         => "\xd0\x86",
4851
      "\xd1\x95"         => "\xd0\x85",
4852
      "\xd1\x94"         => "\xd0\x84",
4853
      "\xd1\x93"         => "\xd0\x83",
4854
      "\xd1\x92"         => "\xd0\x82",
4855
      "\xd1\x91"         => "\xd0\x81",
4856
      "\xd1\x90"         => "\xd0\x80",
4857
      "\xd1\x8f"         => "\xd0\xaf",
4858
      "\xd1\x8e"         => "\xd0\xae",
4859
      "\xd1\x8d"         => "\xd0\xad",
4860
      "\xd1\x8c"         => "\xd0\xac",
4861
      "\xd1\x8b"         => "\xd0\xab",
4862
      "\xd1\x8a"         => "\xd0\xaa",
4863
      "\xd1\x89"         => "\xd0\xa9",
4864
      "\xd1\x88"         => "\xd0\xa8",
4865
      "\xd1\x87"         => "\xd0\xa7",
4866
      "\xd1\x86"         => "\xd0\xa6",
4867
      "\xd1\x85"         => "\xd0\xa5",
4868
      "\xd1\x84"         => "\xd0\xa4",
4869
      "\xd1\x83"         => "\xd0\xa3",
4870
      "\xd1\x82"         => "\xd0\xa2",
4871
      "\xd1\x81"         => "\xd0\xa1",
4872
      "\xd1\x80"         => "\xd0\xa0",
4873
      "\xd0\xbf"         => "\xd0\x9f",
4874
      "\xd0\xbe"         => "\xd0\x9e",
4875
      "\xd0\xbd"         => "\xd0\x9d",
4876
      "\xd0\xbc"         => "\xd0\x9c",
4877
      "\xd0\xbb"         => "\xd0\x9b",
4878
      "\xd0\xba"         => "\xd0\x9a",
4879
      "\xd0\xb9"         => "\xd0\x99",
4880
      "\xd0\xb8"         => "\xd0\x98",
4881
      "\xd0\xb7"         => "\xd0\x97",
4882
      "\xd0\xb6"         => "\xd0\x96",
4883
      "\xd0\xb5"         => "\xd0\x95",
4884
      "\xd0\xb4"         => "\xd0\x94",
4885
      "\xd0\xb3"         => "\xd0\x93",
4886
      "\xd0\xb2"         => "\xd0\x92",
4887
      "\xd0\xb1"         => "\xd0\x91",
4888
      "\xd0\xb0"         => "\xd0\x90",
4889
      "\xcf\xbb"         => "\xcf\xba",
4890
      "\xcf\xb8"         => "\xcf\xb7",
4891
      "\xcf\xb5"         => "\xce\x95",
4892
      "\xcf\xb2"         => "\xcf\xb9",
4893
      "\xcf\xb1"         => "\xce\xa1",
4894
      "\xcf\xb0"         => "\xce\x9a",
4895
      "\xcf\xaf"         => "\xcf\xae",
4896
      "\xcf\xad"         => "\xcf\xac",
4897
      "\xcf\xab"         => "\xcf\xaa",
4898
      "\xcf\xa9"         => "\xcf\xa8",
4899
      "\xcf\xa7"         => "\xcf\xa6",
4900
      "\xcf\xa5"         => "\xcf\xa4",
4901
      "\xcf\xa3"         => "\xcf\xa2",
4902
      "\xcf\xa1"         => "\xcf\xa0",
4903
      "\xcf\x9f"         => "\xcf\x9e",
4904
      "\xcf\x9d"         => "\xcf\x9c",
4905
      "\xcf\x9b"         => "\xcf\x9a",
4906
      "\xcf\x99"         => "\xcf\x98",
4907
      "\xcf\x97"         => "\xcf\x8f",
4908
      "\xcf\x96"         => "\xce\xa0",
4909
      "\xcf\x95"         => "\xce\xa6",
4910
      "\xcf\x91"         => "\xce\x98",
4911
      "\xcf\x90"         => "\xce\x92",
4912
      "\xcf\x8e"         => "\xce\x8f",
4913
      "\xcf\x8d"         => "\xce\x8e",
4914
      "\xcf\x8c"         => "\xce\x8c",
4915
      "\xcf\x8b"         => "\xce\xab",
4916
      "\xcf\x8a"         => "\xce\xaa",
4917
      "\xcf\x89"         => "\xce\xa9",
4918
      "\xcf\x88"         => "\xce\xa8",
4919
      "\xcf\x87"         => "\xce\xa7",
4920
      "\xcf\x86"         => "\xce\xa6",
4921
      "\xcf\x85"         => "\xce\xa5",
4922
      "\xcf\x84"         => "\xce\xa4",
4923
      "\xcf\x83"         => "\xce\xa3",
4924
      "\xcf\x82"         => "\xce\xa3",
4925
      "\xcf\x81"         => "\xce\xa1",
4926
      "\xcf\x80"         => "\xce\xa0",
4927
      "\xce\xbf"         => "\xce\x9f",
4928
      "\xce\xbe"         => "\xce\x9e",
4929
      "\xce\xbd"         => "\xce\x9d",
4930
      "\xce\xbc"         => "\xce\x9c",
4931
      "\xce\xbb"         => "\xce\x9b",
4932
      "\xce\xba"         => "\xce\x9a",
4933
      "\xce\xb9"         => "\xce\x99",
4934
      "\xce\xb8"         => "\xce\x98",
4935
      "\xce\xb7"         => "\xce\x97",
4936
      "\xce\xb6"         => "\xce\x96",
4937
      "\xce\xb5"         => "\xce\x95",
4938
      "\xce\xb4"         => "\xce\x94",
4939
      "\xce\xb3"         => "\xce\x93",
4940
      "\xce\xb2"         => "\xce\x92",
4941
      "\xce\xb1"         => "\xce\x91",
4942
      "\xce\xaf"         => "\xce\x8a",
4943
      "\xce\xae"         => "\xce\x89",
4944
      "\xce\xad"         => "\xce\x88",
4945
      "\xce\xac"         => "\xce\x86",
4946
      "\xcd\xbd"         => "\xcf\xbf",
4947
      "\xcd\xbc"         => "\xcf\xbe",
4948
      "\xcd\xbb"         => "\xcf\xbd",
4949
      "\xcd\xb7"         => "\xcd\xb6",
4950
      "\xcd\xb3"         => "\xcd\xb2",
4951
      "\xcd\xb1"         => "\xcd\xb0",
4952
      "\xca\x92"         => "\xc6\xb7",
4953
      "\xca\x8c"         => "\xc9\x85",
4954
      "\xca\x8b"         => "\xc6\xb2",
4955
      "\xca\x8a"         => "\xc6\xb1",
4956
      "\xca\x89"         => "\xc9\x84",
4957
      "\xca\x88"         => "\xc6\xae",
4958
      "\xca\x83"         => "\xc6\xa9",
4959
      "\xca\x80"         => "\xc6\xa6",
4960
      "\xc9\xbd"         => "\xe2\xb1\xa4",
4961
      "\xc9\xb5"         => "\xc6\x9f",
4962
      "\xc9\xb2"         => "\xc6\x9d",
4963
      "\xc9\xb1"         => "\xe2\xb1\xae",
4964
      "\xc9\xaf"         => "\xc6\x9c",
4965
      "\xc9\xab"         => "\xe2\xb1\xa2",
4966
      "\xc9\xa9"         => "\xc6\x96",
4967
      "\xc9\xa8"         => "\xc6\x97",
4968
      "\xc9\xa5"         => "\xea\x9e\x8d",
4969
      "\xc9\xa3"         => "\xc6\x94",
4970
      "\xc9\xa0"         => "\xc6\x93",
4971
      "\xc9\x9b"         => "\xc6\x90",
4972
      "\xc9\x99"         => "\xc6\x8f",
4973
      "\xc9\x97"         => "\xc6\x8a",
4974
      "\xc9\x96"         => "\xc6\x89",
4975
      "\xc9\x94"         => "\xc6\x86",
4976
      "\xc9\x93"         => "\xc6\x81",
4977
      "\xc9\x92"         => "\xe2\xb1\xb0",
4978
      "\xc9\x91"         => "\xe2\xb1\xad",
4979
      "\xc9\x90"         => "\xe2\xb1\xaf",
4980
      "\xc9\x8f"         => "\xc9\x8e",
4981
      "\xc9\x8d"         => "\xc9\x8c",
4982
      "\xc9\x8b"         => "\xc9\x8a",
4983
      "\xc9\x89"         => "\xc9\x88",
4984
      "\xc9\x87"         => "\xc9\x86",
4985
      "\xc9\x82"         => "\xc9\x81",
4986
      "\xc9\x80"         => "\xe2\xb1\xbf",
4987
      "\xc8\xbf"         => "\xe2\xb1\xbe",
4988
      "\xc8\xbc"         => "\xc8\xbb",
4989
      "\xc8\xb3"         => "\xc8\xb2",
4990
      "\xc8\xb1"         => "\xc8\xb0",
4991
      "\xc8\xaf"         => "\xc8\xae",
4992
      "\xc8\xad"         => "\xc8\xac",
4993
      "\xc8\xab"         => "\xc8\xaa",
4994
      "\xc8\xa9"         => "\xc8\xa8",
4995
      "\xc8\xa7"         => "\xc8\xa6",
4996
      "\xc8\xa5"         => "\xc8\xa4",
4997
      "\xc8\xa3"         => "\xc8\xa2",
4998
      "\xc8\x9f"         => "\xc8\x9e",
4999
      "\xc8\x9d"         => "\xc8\x9c",
5000
      "\xc8\x9b"         => "\xc8\x9a",
5001
      "\xc8\x99"         => "\xc8\x98",
5002
      "\xc8\x97"         => "\xc8\x96",
5003
      "\xc8\x95"         => "\xc8\x94",
5004
      "\xc8\x93"         => "\xc8\x92",
5005
      "\xc8\x91"         => "\xc8\x90",
5006
      "\xc8\x8f"         => "\xc8\x8e",
5007
      "\xc8\x8d"         => "\xc8\x8c",
5008
      "\xc8\x8b"         => "\xc8\x8a",
5009
      "\xc8\x89"         => "\xc8\x88",
5010
      "\xc8\x87"         => "\xc8\x86",
5011
      "\xc8\x85"         => "\xc8\x84",
5012
      "\xc8\x83"         => "\xc8\x82",
5013
      "\xc8\x81"         => "\xc8\x80",
5014
      "\xc7\xbf"         => "\xc7\xbe",
5015
      "\xc7\xbd"         => "\xc7\xbc",
5016
      "\xc7\xbb"         => "\xc7\xba",
5017
      "\xc7\xb9"         => "\xc7\xb8",
5018
      "\xc7\xb5"         => "\xc7\xb4",
5019
      "\xc7\xb3"         => "\xc7\xb2",
5020
      "\xc7\xaf"         => "\xc7\xae",
5021
      "\xc7\xad"         => "\xc7\xac",
5022
      "\xc7\xab"         => "\xc7\xaa",
5023
      "\xc7\xa9"         => "\xc7\xa8",
5024
      "\xc7\xa7"         => "\xc7\xa6",
5025
      "\xc7\xa5"         => "\xc7\xa4",
5026
      "\xc7\xa3"         => "\xc7\xa2",
5027
      "\xc7\xa1"         => "\xc7\xa0",
5028
      "\xc7\x9f"         => "\xc7\x9e",
5029
      "\xc7\x9d"         => "\xc6\x8e",
5030
      "\xc7\x9c"         => "\xc7\x9b",
5031
      "\xc7\x9a"         => "\xc7\x99",
5032
      "\xc7\x98"         => "\xc7\x97",
5033
      "\xc7\x96"         => "\xc7\x95",
5034
      "\xc7\x94"         => "\xc7\x93",
5035
      "\xc7\x92"         => "\xc7\x91",
5036
      "\xc7\x90"         => "\xc7\x8f",
5037
      "\xc7\x8e"         => "\xc7\x8d",
5038
      "\xc7\x8c"         => "\xc7\x8b",
5039
      "\xc7\x89"         => "\xc7\x88",
5040
      "\xc7\x86"         => "\xc7\x85",
5041
      "\xc6\xbf"         => "\xc7\xb7",
5042
      "\xc6\xbd"         => "\xc6\xbc",
5043
      "\xc6\xb9"         => "\xc6\xb8",
5044
      "\xc6\xb6"         => "\xc6\xb5",
5045
      "\xc6\xb4"         => "\xc6\xb3",
5046
      "\xc6\xb0"         => "\xc6\xaf",
5047
      "\xc6\xad"         => "\xc6\xac",
5048
      "\xc6\xa8"         => "\xc6\xa7",
5049
      "\xc6\xa5"         => "\xc6\xa4",
5050
      "\xc6\xa3"         => "\xc6\xa2",
5051
      "\xc6\xa1"         => "\xc6\xa0",
5052
      "\xc6\x9e"         => "\xc8\xa0",
5053
      "\xc6\x9a"         => "\xc8\xbd",
5054
      "\xc6\x99"         => "\xc6\x98",
5055
      "\xc6\x95"         => "\xc7\xb6",
5056
      "\xc6\x92"         => "\xc6\x91",
5057
      "\xc6\x8c"         => "\xc6\x8b",
5058
      "\xc6\x88"         => "\xc6\x87",
5059
      "\xc6\x85"         => "\xc6\x84",
5060
      "\xc6\x83"         => "\xc6\x82",
5061
      "\xc6\x80"         => "\xc9\x83",
5062
      "\xc5\xbf"         => "\x53",
5063
      "\xc5\xbe"         => "\xc5\xbd",
5064
      "\xc5\xbc"         => "\xc5\xbb",
5065
      "\xc5\xba"         => "\xc5\xb9",
5066
      "\xc5\xb7"         => "\xc5\xb6",
5067
      "\xc5\xb5"         => "\xc5\xb4",
5068
      "\xc5\xb3"         => "\xc5\xb2",
5069
      "\xc5\xb1"         => "\xc5\xb0",
5070
      "\xc5\xaf"         => "\xc5\xae",
5071
      "\xc5\xad"         => "\xc5\xac",
5072
      "\xc5\xab"         => "\xc5\xaa",
5073
      "\xc5\xa9"         => "\xc5\xa8",
5074
      "\xc5\xa7"         => "\xc5\xa6",
5075
      "\xc5\xa5"         => "\xc5\xa4",
5076
      "\xc5\xa3"         => "\xc5\xa2",
5077
      "\xc5\xa1"         => "\xc5\xa0",
5078
      "\xc5\x9f"         => "\xc5\x9e",
5079
      "\xc5\x9d"         => "\xc5\x9c",
5080
      "\xc5\x9b"         => "\xc5\x9a",
5081
      "\xc5\x99"         => "\xc5\x98",
5082
      "\xc5\x97"         => "\xc5\x96",
5083
      "\xc5\x95"         => "\xc5\x94",
5084
      "\xc5\x93"         => "\xc5\x92",
5085
      "\xc5\x91"         => "\xc5\x90",
5086
      "\xc5\x8f"         => "\xc5\x8e",
5087
      "\xc5\x8d"         => "\xc5\x8c",
5088
      "\xc5\x8b"         => "\xc5\x8a",
5089
      "\xc5\x88"         => "\xc5\x87",
5090
      "\xc5\x86"         => "\xc5\x85",
5091
      "\xc5\x84"         => "\xc5\x83",
5092
      "\xc5\x82"         => "\xc5\x81",
5093
      "\xc5\x80"         => "\xc4\xbf",
5094
      "\xc4\xbe"         => "\xc4\xbd",
5095
      "\xc4\xbc"         => "\xc4\xbb",
5096
      "\xc4\xba"         => "\xc4\xb9",
5097
      "\xc4\xb7"         => "\xc4\xb6",
5098
      "\xc4\xb5"         => "\xc4\xb4",
5099
      "\xc4\xb3"         => "\xc4\xb2",
5100
      "\xc4\xb1"         => "\x49",
5101
      "\xc4\xaf"         => "\xc4\xae",
5102
      "\xc4\xad"         => "\xc4\xac",
5103
      "\xc4\xab"         => "\xc4\xaa",
5104
      "\xc4\xa9"         => "\xc4\xa8",
5105
      "\xc4\xa7"         => "\xc4\xa6",
5106
      "\xc4\xa5"         => "\xc4\xa4",
5107
      "\xc4\xa3"         => "\xc4\xa2",
5108
      "\xc4\xa1"         => "\xc4\xa0",
5109
      "\xc4\x9f"         => "\xc4\x9e",
5110
      "\xc4\x9d"         => "\xc4\x9c",
5111
      "\xc4\x9b"         => "\xc4\x9a",
5112
      "\xc4\x99"         => "\xc4\x98",
5113
      "\xc4\x97"         => "\xc4\x96",
5114
      "\xc4\x95"         => "\xc4\x94",
5115
      "\xc4\x93"         => "\xc4\x92",
5116
      "\xc4\x91"         => "\xc4\x90",
5117
      "\xc4\x8f"         => "\xc4\x8e",
5118
      "\xc4\x8d"         => "\xc4\x8c",
5119
      "\xc4\x8b"         => "\xc4\x8a",
5120
      "\xc4\x89"         => "\xc4\x88",
5121
      "\xc4\x87"         => "\xc4\x86",
5122
      "\xc4\x85"         => "\xc4\x84",
5123
      "\xc4\x83"         => "\xc4\x82",
5124
      "\xc4\x81"         => "\xc4\x80",
5125
      "\xc3\xbf"         => "\xc5\xb8",
5126
      "\xc3\xbe"         => "\xc3\x9e",
5127
      "\xc3\xbd"         => "\xc3\x9d",
5128
      "\xc3\xbc"         => "\xc3\x9c",
5129
      "\xc3\xbb"         => "\xc3\x9b",
5130
      "\xc3\xba"         => "\xc3\x9a",
5131
      "\xc3\xb9"         => "\xc3\x99",
5132
      "\xc3\xb8"         => "\xc3\x98",
5133
      "\xc3\xb6"         => "\xc3\x96",
5134
      "\xc3\xb5"         => "\xc3\x95",
5135
      "\xc3\xb4"         => "\xc3\x94",
5136
      "\xc3\xb3"         => "\xc3\x93",
5137
      "\xc3\xb2"         => "\xc3\x92",
5138
      "\xc3\xb1"         => "\xc3\x91",
5139
      "\xc3\xb0"         => "\xc3\x90",
5140
      "\xc3\xaf"         => "\xc3\x8f",
5141
      "\xc3\xae"         => "\xc3\x8e",
5142
      "\xc3\xad"         => "\xc3\x8d",
5143
      "\xc3\xac"         => "\xc3\x8c",
5144
      "\xc3\xab"         => "\xc3\x8b",
5145
      "\xc3\xaa"         => "\xc3\x8a",
5146
      "\xc3\xa9"         => "\xc3\x89",
5147
      "\xc3\xa8"         => "\xc3\x88",
5148
      "\xc3\xa7"         => "\xc3\x87",
5149
      "\xc3\xa6"         => "\xc3\x86",
5150
      "\xc3\xa5"         => "\xc3\x85",
5151
      "\xc3\xa4"         => "\xc3\x84",
5152
      "\xc3\xa3"         => "\xc3\x83",
5153
      "\xc3\xa2"         => "\xc3\x82",
5154
      "\xc3\xa1"         => "\xc3\x81",
5155
      "\xc3\xa0"         => "\xc3\x80",
5156
      "\xc2\xb5"         => "\xce\x9c",
5157
      "\x7a"             => "\x5a",
5158
      "\x79"             => "\x59",
5159
      "\x78"             => "\x58",
5160
      "\x77"             => "\x57",
5161
      "\x76"             => "\x56",
5162
      "\x75"             => "\x55",
5163
      "\x74"             => "\x54",
5164
      "\x73"             => "\x53",
5165
      "\x72"             => "\x52",
5166
      "\x71"             => "\x51",
5167
      "\x70"             => "\x50",
5168
      "\x6f"             => "\x4f",
5169
      "\x6e"             => "\x4e",
5170
      "\x6d"             => "\x4d",
5171
      "\x6c"             => "\x4c",
5172
      "\x6b"             => "\x4b",
5173
      "\x6a"             => "\x4a",
5174
      "\x69"             => "\x49",
5175
      "\x68"             => "\x48",
5176
      "\x67"             => "\x47",
5177
      "\x66"             => "\x46",
5178
      "\x65"             => "\x45",
5179
      "\x64"             => "\x44",
5180
      "\x63"             => "\x43",
5181
      "\x62"             => "\x42",
5182
      "\x61"             => "\x41",
5183
5184
    );
5185
5186
    return $case;
5187
  }
5188
5189
  /**
5190
   * Translate characters or replace substrings
5191
   *
5192
   * @param string $s
5193
   * @param string $from
5194
   * @param string $to
5195
   *
5196
   * @return string
5197
   */
5198 1
  public static function strtr($s, $from, $to = INF)
5199
  {
5200 1
    if (INF !== $to) {
5201
      $from = self::str_split($from);
5202
      $to = self::str_split($to);
5203
      $a = count($from);
5204
      $b = count($to);
5205
5206
      if ($a > $b) {
5207
        $from = array_slice($from, 0, $b);
5208
      } elseif ($a < $b) {
5209
        $to = array_slice($to, 0, $a);
5210
      }
5211
5212
      $from = array_combine($from, $to);
5213
    }
5214
5215 1
    return strtr($s, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5198 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5216
  }
5217
5218
  /**
5219
   * Binary safe comparison of two strings from an offset, up to length characters
5220
   *
5221
   * @param string  $main_str           The main string being compared.
5222
   * @param string  $str                The secondary string being compared.
5223
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5224
   *                                    end of the string.
5225
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5226
   *                                    the str compared to the length of main_str less the offset.
5227
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5228
   *
5229
   * @return int
5230
   */
5231 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5232
  {
5233 1
    $main_str = self::substr($main_str, $offset, $length);
5234 1
    $str = self::substr($str, 0, self::strlen($main_str));
5235
5236 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5237
  }
5238
5239
  /**
5240
   * case-insensitive string comparison
5241
   *
5242
   * @param string $str1
5243
   * @param string $str2
5244
   *
5245
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5246
   */
5247 8
  public static function strcasecmp($str1, $str2)
5248
  {
5249 8
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5250
  }
5251
5252
  /**
5253
   * uppercase for all words in the string
5254
   *
5255
   * @param  string $string
5256
   * @param array   $exceptions
5257
   *
5258
   * @return string
5259
   */
5260 7
  public static function ucwords($string, $exceptions = array())
5261
  {
5262 7
    if (!$string) {
5263 2
      return '';
5264
    }
5265
5266
    // init
5267 6
    $words = explode(' ', $string);
5268 6
    $newwords = array();
5269
5270 6
    if (count($exceptions) > 0) {
5271 1
      $useExceptions = true;
5272 1
    } else {
5273 6
      $useExceptions = false;
5274
    }
5275
5276 6
    foreach ($words as $word) {
5277
      if (
5278 6
          ($useExceptions === false)
5279
          ||
5280
          (
5281
              $useExceptions === true
5282 1
              &&
5283 1
              !in_array($word, $exceptions, true)
5284 1
          )
5285 6
      ) {
5286 6
        $word = self::ucfirst($word);
5287 6
      }
5288 6
      $newwords[] = $word;
5289 6
    }
5290
5291 6
    return self::ucfirst(implode(' ', $newwords));
5292
  }
5293
5294
  /**
5295
   * Format a number with grouped thousands
5296
   *
5297
   * @param float  $number
5298
   * @param int    $decimals
5299
   * @param string $dec_point
5300
   * @param string $thousands_sep
5301
   *
5302
   * @return string
5303
   */
5304 1
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
5305
  {
5306 1
    if (Bootup::is_php('5.4') === true) {
5307 1
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
5308
        return str_replace(
5309
            array(
5310
                '.',
5311
                ',',
5312
            ),
5313
            array(
5314
                $dec_point,
5315
                $thousands_sep,
5316
            ),
5317
            number_format($number, $decimals, '.', ',')
5318
        );
5319
      }
5320 1
    }
5321
5322 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
5323
  }
5324
5325
  /**
5326
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
5327
   *
5328
   * (PHP 4, PHP 5)<br/>
5329
   * Replace all occurrences of the search string with the replacement string
5330
   *
5331
   * @link http://php.net/manual/en/function.str-replace.php
5332
   *
5333
   * @param mixed $search  <p>
5334
   *                       The value being searched for, otherwise known as the needle.
5335
   *                       An array may be used to designate multiple needles.
5336
   *                       </p>
5337
   * @param mixed $replace <p>
5338
   *                       The replacement value that replaces found search
5339
   *                       values. An array may be used to designate multiple replacements.
5340
   *                       </p>
5341
   * @param mixed $subject <p>
5342
   *                       The string or array being searched and replaced on,
5343
   *                       otherwise known as the haystack.
5344
   *                       </p>
5345
   *                       <p>
5346
   *                       If subject is an array, then the search and
5347
   *                       replace is performed with every entry of
5348
   *                       subject, and the return value is an array as
5349
   *                       well.
5350
   *                       </p>
5351
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
5352
   *
5353
   * @return mixed This function returns a string or an array with the replaced values.
5354
   */
5355 12
  public static function str_replace($search, $replace, $subject, &$count = null)
5356
  {
5357 12
    return str_replace($search, $replace, $subject, $count);
5358
  }
5359
5360
  /**
5361
   * str_ireplace
5362
   *
5363
   * @param string $search
5364
   * @param string $replace
5365
   * @param string $subject
5366
   * @param null   $count
5367
   *
5368
   * @return string
5369
   */
5370 13
  public static function str_ireplace($search, $replace, $subject, &$count = null)
5371
  {
5372 13
    $search = (array)$search;
5373
5374
    /** @noinspection AlterInForeachInspection */
5375 13
    foreach ($search as &$s) {
5376 13
      if ('' === $s .= '') {
5377 1
        $s = '/^(?<=.)$/';
5378 1
      } else {
5379 12
        $s = '/' . preg_quote($s, '/') . '/ui';
5380
      }
5381 13
    }
5382
5383 13
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
5384 13
    $count = $replace;
5385
5386 13
    return $subject;
5387
  }
5388
5389
  /**
5390
   * makes string's first char Lowercase
5391
   *
5392
   * @param    string $str The input string
5393
   *
5394
   * @return   string The resulting string
5395
   */
5396 6
  public static function lcfirst($str)
5397
  {
5398 6
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
5399
  }
5400
5401
  /**
5402
   * find position of last occurrence of a case-insensitive string
5403
   *
5404
   * @param    string $haystack The string to look in
5405
   * @param    string $needle   The string to look for
5406
   * @param    int    $offset   (Optional) Number of characters to ignore in the begining or end
5407
   *
5408
   * @return   int The position of offset
5409
   */
5410 1
  public static function strripos($haystack, $needle, $offset = 0)
5411
  {
5412 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5413
  }
5414
5415
  /**
5416
   * Find position of last occurrence of a string in a string
5417
   *
5418
   * @link http://php.net/manual/en/function.mb-strrpos.php
5419
   *
5420
   * @param string  $haystack     <p>
5421
   *                              The string being checked, for the last occurrence
5422
   *                              of needle
5423
   *                              </p>
5424
   * @param string  $needle       <p>
5425
   *                              The string to find in haystack.
5426
   *                              </p>
5427
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
5428
   *                              the string. Negative values will stop searching at an arbitrary point
5429
   *                              prior to the end of the string.
5430
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
5431
   *
5432
   * @return int the numeric position of
5433
   * the last occurrence of needle in the
5434
   * haystack string. If
5435
   * needle is not found, it returns false.
5436
   */
5437 10
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5438
  {
5439 10
    $haystack = (string)$haystack;
5440 10
    $needle = (string)$needle;
5441
5442 10
    if (!isset($haystack[0]) || !isset($needle[0])) {
5443 2
      return false;
5444
    }
5445
5446
    // init
5447 9
    self::checkForSupport();
5448
5449 9
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5450
      $needle = self::chr($needle);
5451
    }
5452
5453 9
    $needle = (string)$needle;
5454 9
    $offset = (int)$offset;
5455
5456 9
    if ($cleanUtf8 === true) {
5457
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5458
5459 1
      $needle = self::clean($needle);
5460 1
      $haystack = self::clean($haystack);
5461 1
    }
5462
5463 9
    if (self::$support['mbstring'] === true) {
5464 9
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5465
    }
5466
5467
    if (self::$support['iconv'] === true) {
5468
      return grapheme_strrpos($haystack, $needle, $offset);
5469
    }
5470
5471
    // fallback
5472
5473
    if ($offset > 0) {
5474
      $haystack = self::substr($haystack, $offset);
5475
    } elseif ($offset < 0) {
5476
      $haystack = self::substr($haystack, 0, $offset);
5477
    }
5478
5479 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5480
      $left = substr($haystack, 0, $pos);
5481
5482
      // negative offset not supported in PHP strpos(), ignoring
5483
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5484
    }
5485
5486
    return false;
5487
  }
5488
5489
  /**
5490
   * splits a string into smaller chunks and multiple lines, using the specified
5491
   * line ending character
5492
   *
5493
   * @param    string $body     The original string to be split.
5494
   * @param    int    $chunklen The maximum character length of a chunk
5495
   * @param    string $end      The character(s) to be inserted at the end of each chunk
5496
   *
5497
   * @return   string The chunked string
5498
   */
5499 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
5500
  {
5501 1
    return implode($end, self::split($body, $chunklen));
5502
  }
5503
5504
  /**
5505
   * convert to ISO-8859
5506
   *
5507
   * -> alias for "UTF8::to_win1252()"
5508
   *
5509
   * @param   string $text
5510
   *
5511
   * @return  array|string
5512
   */
5513
  public static function to_iso8859($text)
5514
  {
5515
    return self::to_win1252($text);
5516
  }
5517
5518
  /**
5519
   * fix -> utf8-win1252 chars
5520
   *
5521
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
5522
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
5523
   * See: http://en.wikipedia.org/wiki/Windows-1252
5524
   *
5525
   * @deprecated use "UTF8::fix_simple_utf8()"
5526
   *
5527
   * @param   string $string
5528
   *
5529
   * @return  string
5530
   */
5531
  public static function utf8_fix_win1252_chars($string)
5532
  {
5533
    return self::fix_simple_utf8($string);
5534
  }
5535
5536
  /**
5537
   * returns an array of Unicode White Space characters
5538
   *
5539
   * @return   array An array with numeric code point as key and White Space Character as value
5540
   */
5541
  public static function ws()
5542
  {
5543
    return self::$whitespace;
5544
  }
5545
5546
  /**
5547
   * Parses the string into variables
5548
   *
5549
   * WARNING: This differs from parse_str() by returning the results
5550
   *    instead of placing them in the local scope!
5551
   *
5552
   * @link http://php.net/manual/en/function.parse-str.php
5553
   *
5554
   * @param string $str     <p>
5555
   *                        The input string.
5556
   *                        </p>
5557
   * @param array  $result  <p>
5558
   *                        If the second parameter arr is present,
5559
   *                        variables are stored in this variable as array elements instead.
5560
   *                        </p>
5561
   *
5562
   * @return void
5563
   */
5564 1
  public static function parse_str($str, &$result)
5565
  {
5566
    // init
5567 1
    self::checkForSupport();
5568
5569 1
    $str = self::filter($str);
5570
5571 1
    mb_parse_str($str, $result);
5572 1
  }
5573
5574
  /**
5575
   * Get character of a specific character.
5576
   *
5577
   * @param   string $chr Character.
5578
   *
5579
   * @return  string 'RTL' or 'LTR'
5580
   */
5581 1
  public static function getCharDirection($chr)
5582
  {
5583 1
    $c = static::chr_to_decimal($chr);
5584
5585 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
5586 1
      return 'LTR';
5587
    }
5588
5589 1
    if (0x85e >= $c) {
5590
5591 1
      if (0x5be === $c ||
5592 1
          0x5c0 === $c ||
5593 1
          0x5c3 === $c ||
5594 1
          0x5c6 === $c ||
5595 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
5596 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
5597 1
          0x608 === $c ||
5598 1
          0x60b === $c ||
5599 1
          0x60d === $c ||
5600 1
          0x61b === $c ||
5601 1
          (0x61e <= $c && 0x64a >= $c) ||
5602
          (0x66d <= $c && 0x66f >= $c) ||
5603
          (0x671 <= $c && 0x6d5 >= $c) ||
5604
          (0x6e5 <= $c && 0x6e6 >= $c) ||
5605
          (0x6ee <= $c && 0x6ef >= $c) ||
5606
          (0x6fa <= $c && 0x70d >= $c) ||
5607
          0x710 === $c ||
5608
          (0x712 <= $c && 0x72f >= $c) ||
5609
          (0x74d <= $c && 0x7a5 >= $c) ||
5610
          0x7b1 === $c ||
5611
          (0x7c0 <= $c && 0x7ea >= $c) ||
5612
          (0x7f4 <= $c && 0x7f5 >= $c) ||
5613
          0x7fa === $c ||
5614
          (0x800 <= $c && 0x815 >= $c) ||
5615
          0x81a === $c ||
5616
          0x824 === $c ||
5617
          0x828 === $c ||
5618
          (0x830 <= $c && 0x83e >= $c) ||
5619
          (0x840 <= $c && 0x858 >= $c) ||
5620
          0x85e === $c
5621 1
      ) {
5622 1
        return 'RTL';
5623
      }
5624
5625
    } elseif (0x200f === $c) {
5626
5627
      return 'RTL';
5628
5629
    } elseif (0xfb1d <= $c) {
5630
5631
      if (0xfb1d === $c ||
5632
          (0xfb1f <= $c && 0xfb28 >= $c) ||
5633
          (0xfb2a <= $c && 0xfb36 >= $c) ||
5634
          (0xfb38 <= $c && 0xfb3c >= $c) ||
5635
          0xfb3e === $c ||
5636
          (0xfb40 <= $c && 0xfb41 >= $c) ||
5637
          (0xfb43 <= $c && 0xfb44 >= $c) ||
5638
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
5639
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
5640
          (0xfd50 <= $c && 0xfd8f >= $c) ||
5641
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
5642
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
5643
          (0xfe70 <= $c && 0xfe74 >= $c) ||
5644
          (0xfe76 <= $c && 0xfefc >= $c) ||
5645
          (0x10800 <= $c && 0x10805 >= $c) ||
5646
          0x10808 === $c ||
5647
          (0x1080a <= $c && 0x10835 >= $c) ||
5648
          (0x10837 <= $c && 0x10838 >= $c) ||
5649
          0x1083c === $c ||
5650
          (0x1083f <= $c && 0x10855 >= $c) ||
5651
          (0x10857 <= $c && 0x1085f >= $c) ||
5652
          (0x10900 <= $c && 0x1091b >= $c) ||
5653
          (0x10920 <= $c && 0x10939 >= $c) ||
5654
          0x1093f === $c ||
5655
          0x10a00 === $c ||
5656
          (0x10a10 <= $c && 0x10a13 >= $c) ||
5657
          (0x10a15 <= $c && 0x10a17 >= $c) ||
5658
          (0x10a19 <= $c && 0x10a33 >= $c) ||
5659
          (0x10a40 <= $c && 0x10a47 >= $c) ||
5660
          (0x10a50 <= $c && 0x10a58 >= $c) ||
5661
          (0x10a60 <= $c && 0x10a7f >= $c) ||
5662
          (0x10b00 <= $c && 0x10b35 >= $c) ||
5663
          (0x10b40 <= $c && 0x10b55 >= $c) ||
5664
          (0x10b58 <= $c && 0x10b72 >= $c) ||
5665
          (0x10b78 <= $c && 0x10b7f >= $c)
5666
      ) {
5667
        return 'RTL';
5668
      }
5669
    }
5670
5671
    return 'LTR';
5672
  }
5673
5674
  /**
5675
   * Get a decimal code representation of a specific character.
5676
   *
5677
   * @param   string $chr The input character
5678
   *
5679
   * @return  int
5680
   */
5681 2
  public static function chr_to_decimal($chr)
5682
  {
5683 2
    $chr = (string)$chr;
5684 2
    $code = self::ord($chr[0]);
5685 2
    $bytes = 1;
5686
5687 2
    if (!($code & 0x80)) {
5688
      // 0xxxxxxx
5689 2
      return $code;
5690
    }
5691
5692 2
    if (($code & 0xe0) === 0xc0) {
5693
      // 110xxxxx
5694 2
      $bytes = 2;
5695 2
      $code &= ~0xc0;
5696 2
    } elseif (($code & 0xf0) == 0xe0) {
5697
      // 1110xxxx
5698 1
      $bytes = 3;
5699 1
      $code &= ~0xe0;
5700 1
    } elseif (($code & 0xf8) === 0xf0) {
5701
      // 11110xxx
5702
      $bytes = 4;
5703
      $code &= ~0xf0;
5704
    }
5705
5706 2
    for ($i = 2; $i <= $bytes; $i++) {
5707
      // 10xxxxxx
5708 2
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
5709 2
    }
5710
5711 2
    return $code;
5712
  }
5713
5714
  /**
5715
   * Get a UTF-8 character from its decimal code representation.
5716
   *
5717
   * @param   int $code Code.
5718
   *
5719
   * @return  string
5720
   */
5721 1
  public static function decimal_to_chr($code)
5722
  {
5723 1
    self::checkForSupport();
5724
5725 1
    return mb_convert_encoding(
5726 1
        '&#x' . dechex($code) . ';',
5727 1
        'UTF-8',
5728
        'HTML-ENTITIES'
5729 1
    );
5730
  }
5731
5732
  /**
5733
   * return a array with "urlencoded"-win1252 -> UTF-8
5734
   *
5735
   * @return mixed
5736
   */
5737
  protected static function urldecode_fix_win1252_chars()
5738
  {
5739
    static $array = array(
5740
        '%20' => ' ',
5741
        '%21' => '!',
5742
        '%22' => '"',
5743
        '%23' => '#',
5744
        '%24' => '$',
5745
        '%25' => '%',
5746
        '%26' => '&',
5747
        '%27' => "'",
5748
        '%28' => '(',
5749
        '%29' => ')',
5750
        '%2A' => '*',
5751
        '%2B' => '+',
5752
        '%2C' => ',',
5753
        '%2D' => '-',
5754
        '%2E' => '.',
5755
        '%2F' => '/',
5756
        '%30' => '0',
5757
        '%31' => '1',
5758
        '%32' => '2',
5759
        '%33' => '3',
5760
        '%34' => '4',
5761
        '%35' => '5',
5762
        '%36' => '6',
5763
        '%37' => '7',
5764
        '%38' => '8',
5765
        '%39' => '9',
5766
        '%3A' => ':',
5767
        '%3B' => ';',
5768
        '%3C' => '<',
5769
        '%3D' => '=',
5770
        '%3E' => '>',
5771
        '%3F' => '?',
5772
        '%40' => '@',
5773
        '%41' => 'A',
5774
        '%42' => 'B',
5775
        '%43' => 'C',
5776
        '%44' => 'D',
5777
        '%45' => 'E',
5778
        '%46' => 'F',
5779
        '%47' => 'G',
5780
        '%48' => 'H',
5781
        '%49' => 'I',
5782
        '%4A' => 'J',
5783
        '%4B' => 'K',
5784
        '%4C' => 'L',
5785
        '%4D' => 'M',
5786
        '%4E' => 'N',
5787
        '%4F' => 'O',
5788
        '%50' => 'P',
5789
        '%51' => 'Q',
5790
        '%52' => 'R',
5791
        '%53' => 'S',
5792
        '%54' => 'T',
5793
        '%55' => 'U',
5794
        '%56' => 'V',
5795
        '%57' => 'W',
5796
        '%58' => 'X',
5797
        '%59' => 'Y',
5798
        '%5A' => 'Z',
5799
        '%5B' => '[',
5800
        '%5C' => '\\',
5801
        '%5D' => ']',
5802
        '%5E' => '^',
5803
        '%5F' => '_',
5804
        '%60' => '`',
5805
        '%61' => 'a',
5806
        '%62' => 'b',
5807
        '%63' => 'c',
5808
        '%64' => 'd',
5809
        '%65' => 'e',
5810
        '%66' => 'f',
5811
        '%67' => 'g',
5812
        '%68' => 'h',
5813
        '%69' => 'i',
5814
        '%6A' => 'j',
5815
        '%6B' => 'k',
5816
        '%6C' => 'l',
5817
        '%6D' => 'm',
5818
        '%6E' => 'n',
5819
        '%6F' => 'o',
5820
        '%70' => 'p',
5821
        '%71' => 'q',
5822
        '%72' => 'r',
5823
        '%73' => 's',
5824
        '%74' => 't',
5825
        '%75' => 'u',
5826
        '%76' => 'v',
5827
        '%77' => 'w',
5828
        '%78' => 'x',
5829
        '%79' => 'y',
5830
        '%7A' => 'z',
5831
        '%7B' => '{',
5832
        '%7C' => '|',
5833
        '%7D' => '}',
5834
        '%7E' => '~',
5835
        '%7F' => '',
5836
        '%80' => '`',
5837
        '%81' => '',
5838
        '%82' => '‚',
5839
        '%83' => 'ƒ',
5840
        '%84' => '„',
5841
        '%85' => '…',
5842
        '%86' => '†',
5843
        '%87' => '‡',
5844
        '%88' => 'ˆ',
5845
        '%89' => '‰',
5846
        '%8A' => 'Š',
5847
        '%8B' => '‹',
5848
        '%8C' => 'Œ',
5849
        '%8D' => '',
5850
        '%8E' => 'Ž',
5851
        '%8F' => '',
5852
        '%90' => '',
5853
        '%91' => '‘',
5854
        '%92' => '’',
5855
        '%93' => '“',
5856
        '%94' => '”',
5857
        '%95' => '•',
5858
        '%96' => '–',
5859
        '%97' => '—',
5860
        '%98' => '˜',
5861
        '%99' => '™',
5862
        '%9A' => 'š',
5863
        '%9B' => '›',
5864
        '%9C' => 'œ',
5865
        '%9D' => '',
5866
        '%9E' => 'ž',
5867
        '%9F' => 'Ÿ',
5868
        '%A0' => '',
5869
        '%A1' => '¡',
5870
        '%A2' => '¢',
5871
        '%A3' => '£',
5872
        '%A4' => '¤',
5873
        '%A5' => '¥',
5874
        '%A6' => '¦',
5875
        '%A7' => '§',
5876
        '%A8' => '¨',
5877
        '%A9' => '©',
5878
        '%AA' => 'ª',
5879
        '%AB' => '«',
5880
        '%AC' => '¬',
5881
        '%AD' => '',
5882
        '%AE' => '®',
5883
        '%AF' => '¯',
5884
        '%B0' => '°',
5885
        '%B1' => '±',
5886
        '%B2' => '²',
5887
        '%B3' => '³',
5888
        '%B4' => '´',
5889
        '%B5' => 'µ',
5890
        '%B6' => '¶',
5891
        '%B7' => '·',
5892
        '%B8' => '¸',
5893
        '%B9' => '¹',
5894
        '%BA' => 'º',
5895
        '%BB' => '»',
5896
        '%BC' => '¼',
5897
        '%BD' => '½',
5898
        '%BE' => '¾',
5899
        '%BF' => '¿',
5900
        '%C0' => 'À',
5901
        '%C1' => 'Á',
5902
        '%C2' => 'Â',
5903
        '%C3' => 'Ã',
5904
        '%C4' => 'Ä',
5905
        '%C5' => 'Å',
5906
        '%C6' => 'Æ',
5907
        '%C7' => 'Ç',
5908
        '%C8' => 'È',
5909
        '%C9' => 'É',
5910
        '%CA' => 'Ê',
5911
        '%CB' => 'Ë',
5912
        '%CC' => 'Ì',
5913
        '%CD' => 'Í',
5914
        '%CE' => 'Î',
5915
        '%CF' => 'Ï',
5916
        '%D0' => 'Ð',
5917
        '%D1' => 'Ñ',
5918
        '%D2' => 'Ò',
5919
        '%D3' => 'Ó',
5920
        '%D4' => 'Ô',
5921
        '%D5' => 'Õ',
5922
        '%D6' => 'Ö',
5923
        '%D7' => '×',
5924
        '%D8' => 'Ø',
5925
        '%D9' => 'Ù',
5926
        '%DA' => 'Ú',
5927
        '%DB' => 'Û',
5928
        '%DC' => 'Ü',
5929
        '%DD' => 'Ý',
5930
        '%DE' => 'Þ',
5931
        '%DF' => 'ß',
5932
        '%E0' => 'à',
5933
        '%E1' => 'á',
5934
        '%E2' => 'â',
5935
        '%E3' => 'ã',
5936
        '%E4' => 'ä',
5937
        '%E5' => 'å',
5938
        '%E6' => 'æ',
5939
        '%E7' => 'ç',
5940
        '%E8' => 'è',
5941
        '%E9' => 'é',
5942
        '%EA' => 'ê',
5943
        '%EB' => 'ë',
5944
        '%EC' => 'ì',
5945
        '%ED' => 'í',
5946
        '%EE' => 'î',
5947
        '%EF' => 'ï',
5948
        '%F0' => 'ð',
5949
        '%F1' => 'ñ',
5950
        '%F2' => 'ò',
5951
        '%F3' => 'ó',
5952
        '%F4' => 'ô',
5953
        '%F5' => 'õ',
5954
        '%F6' => 'ö',
5955
        '%F7' => '÷',
5956
        '%F8' => 'ø',
5957
        '%F9' => 'ù',
5958
        '%FA' => 'ú',
5959
        '%FB' => 'û',
5960
        '%FC' => 'ü',
5961
        '%FD' => 'ý',
5962
        '%FE' => 'þ',
5963
        '%FF' => 'ÿ',
5964
    );
5965
5966
    return $array;
5967
  }
5968
5969
}
5970