Completed
Push — master ( 0b8d31...8adf69 )
by Lars
04:00
created

UTF8::strlen()   C

Complexity

Conditions 8
Paths 9

Size

Total Lines 29
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 25.0063

Importance

Changes 5
Bugs 1 Features 1
Metric Value
c 5
b 1
f 1
dl 0
loc 29
ccs 5
cts 14
cp 0.357
rs 5.3846
cc 8
eloc 15
nc 9
nop 3
crap 25.0063
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Numeric code point => UTF-8 Character
83
   *
84
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
85
   *
86
   * @var array
87
   */
88
  protected static $whitespace = array(
89
    // NUL Byte
90
    0     => "\x0",
91
    // Tab
92
    9     => "\x9",
93
    // New Line
94
    10    => "\xa",
95
    // Vertical Tab
96
    11    => "\xb",
97
    // Carriage Return
98
    13    => "\xd",
99
    // Ordinary Space
100
    32    => "\x20",
101
    // NO-BREAK SPACE
102
    160   => "\xc2\xa0",
103
    // OGHAM SPACE MARK
104
    5760  => "\xe1\x9a\x80",
105
    // MONGOLIAN VOWEL SEPARATOR
106
    6158  => "\xe1\xa0\x8e",
107
    // EN QUAD
108
    8192  => "\xe2\x80\x80",
109
    // EM QUAD
110
    8193  => "\xe2\x80\x81",
111
    // EN SPACE
112
    8194  => "\xe2\x80\x82",
113
    // EM SPACE
114
    8195  => "\xe2\x80\x83",
115
    // THREE-PER-EM SPACE
116
    8196  => "\xe2\x80\x84",
117
    // FOUR-PER-EM SPACE
118
    8197  => "\xe2\x80\x85",
119
    // SIX-PER-EM SPACE
120
    8198  => "\xe2\x80\x86",
121
    // FIGURE SPACE
122
    8199  => "\xe2\x80\x87",
123
    // PUNCTUATION SPACE
124
    8200  => "\xe2\x80\x88",
125
    // THIN SPACE
126
    8201  => "\xe2\x80\x89",
127
    //HAIR SPACE
128
    8202  => "\xe2\x80\x8a",
129
    // LINE SEPARATOR
130
    8232  => "\xe2\x80\xa8",
131
    // PARAGRAPH SEPARATOR
132
    8233  => "\xe2\x80\xa9",
133
    // NARROW NO-BREAK SPACE
134
    8239  => "\xe2\x80\xaf",
135
    // MEDIUM MATHEMATICAL SPACE
136
    8287  => "\xe2\x81\x9f",
137
    // IDEOGRAPHIC SPACE
138
    12288 => "\xe3\x80\x80",
139
  );
140
141
  /**
142
   * @var array
143
   */
144
  protected static $whitespaceTable = array(
145
      'SPACE'                     => "\x20",
146
      'NO-BREAK SPACE'            => "\xc2\xa0",
147
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
148
      'EN QUAD'                   => "\xe2\x80\x80",
149
      'EM QUAD'                   => "\xe2\x80\x81",
150
      'EN SPACE'                  => "\xe2\x80\x82",
151
      'EM SPACE'                  => "\xe2\x80\x83",
152
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
153
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
154
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
155
      'FIGURE SPACE'              => "\xe2\x80\x87",
156
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
157
      'THIN SPACE'                => "\xe2\x80\x89",
158
      'HAIR SPACE'                => "\xe2\x80\x8a",
159
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
160
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
161
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
162
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
163
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
164
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
165
  );
166
167
  /**
168
   * bidirectional text chars
169
   *
170
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
171
   *
172
   * @var array
173
   */
174
  protected static $bidiUniCodeControlsTable = array(
175
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
176
    8234 => "\xE2\x80\xAA",
177
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
178
    8235 => "\xE2\x80\xAB",
179
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
180
    8236 => "\xE2\x80\xAC",
181
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
182
    8237 => "\xE2\x80\xAD",
183
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
184
    8238 => "\xE2\x80\xAE",
185
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
186
    8294 => "\xE2\x81\xA6",
187
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
188
    8295 => "\xE2\x81\xA7",
189
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
190
    8296 => "\xE2\x81\xA8",
191
    // POP DIRECTIONAL ISOLATE
192
    8297 => "\xE2\x81\xA9",
193
  );
194
195
  /**
196
   * @var array
197
   */
198
  protected static $commonCaseFold = array(
199
      'ſ'            => 's',
200
      "\xCD\x85"     => 'ι',
201
      'ς'            => 'σ',
202
      "\xCF\x90"     => 'β',
203
      "\xCF\x91"     => 'θ',
204
      "\xCF\x95"     => 'φ',
205
      "\xCF\x96"     => 'π',
206
      "\xCF\xB0"     => 'κ',
207
      "\xCF\xB1"     => 'ρ',
208
      "\xCF\xB5"     => 'ε',
209
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
210
      "\xE1\xBE\xBE" => 'ι',
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $brokenUtf8ToUtf8 = array(
217
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
218
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
219
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
220
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
221
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
222
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
223
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
224
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
225
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
226
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
227
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
228
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
229
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
230
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
231
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
232
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
233
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
234
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
235
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
236
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
237
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
238
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
239
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
240
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
241
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
242
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
243
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
244
      'ü'       => 'ü',
245
      'ä'       => 'ä',
246
      'ö'       => 'ö',
247
      'Ö'       => 'Ö',
248
      'ß'       => 'ß',
249
      'Ã '       => 'à',
250
      'á'       => 'á',
251
      'â'       => 'â',
252
      'ã'       => 'ã',
253
      'ù'       => 'ù',
254
      'ú'       => 'ú',
255
      'û'       => 'û',
256
      'Ù'       => 'Ù',
257
      'Ú'       => 'Ú',
258
      'Û'       => 'Û',
259
      'Ü'       => 'Ü',
260
      'ò'       => 'ò',
261
      'ó'       => 'ó',
262
      'ô'       => 'ô',
263
      'è'       => 'è',
264
      'é'       => 'é',
265
      'ê'       => 'ê',
266
      'ë'       => 'ë',
267
      'À'       => 'À',
268
      'Á'       => 'Á',
269
      'Â'       => 'Â',
270
      'Ã'       => 'Ã',
271
      'Ä'       => 'Ä',
272
      'Ã…'       => 'Å',
273
      'Ç'       => 'Ç',
274
      'È'       => 'È',
275
      'É'       => 'É',
276
      'Ê'       => 'Ê',
277
      'Ë'       => 'Ë',
278
      'ÃŒ'       => 'Ì',
279
      'Í'       => 'Í',
280
      'ÃŽ'       => 'Î',
281
      'Ï'       => 'Ï',
282
      'Ñ'       => 'Ñ',
283
      'Ã’'       => 'Ò',
284
      'Ó'       => 'Ó',
285
      'Ô'       => 'Ô',
286
      'Õ'       => 'Õ',
287
      'Ø'       => 'Ø',
288
      'Ã¥'       => 'å',
289
      'æ'       => 'æ',
290
      'ç'       => 'ç',
291
      'ì'       => 'ì',
292
      'í'       => 'í',
293
      'î'       => 'î',
294
      'ï'       => 'ï',
295
      'ð'       => 'ð',
296
      'ñ'       => 'ñ',
297
      'õ'       => 'õ',
298
      'ø'       => 'ø',
299
      'ý'       => 'ý',
300
      'ÿ'       => 'ÿ',
301
      '€'      => '€',
302
  );
303
304
  /**
305
   * @var array
306
   */
307
  protected static $utf8ToWin1252 = array(
308
      "\xe2\x82\xac" => "\x80", // EURO SIGN
309
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
310
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
311
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
312
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
313
      "\xe2\x80\xa0" => "\x86", // DAGGER
314
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
315
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
316
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
317
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
318
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
319
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
320
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
321
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
322
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
323
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
324
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
325
      "\xe2\x80\xa2" => "\x95", // BULLET
326
      "\xe2\x80\x93" => "\x96", // EN DASH
327
      "\xe2\x80\x94" => "\x97", // EM DASH
328
      "\xcb\x9c"     => "\x98", // SMALL TILDE
329
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
330
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
331
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
332
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
333
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
334
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
335
  );
336
337
  /**
338
   * @var array
339
   */
340
  protected static $utf8MSWord = array(
341
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
342
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
343
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
344
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
345
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
346
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
347
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
348
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
349
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
350
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
351
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
352
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
353
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
354
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
355
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
356
  );
357
358
  protected static $iconvEncoding = array(
359
      'ANSI_X3.4-1968',
360
      'ANSI_X3.4-1986',
361
      'ASCII',
362
      'CP367',
363
      'IBM367',
364
      'ISO-IR-6',
365
      'ISO646-US',
366
      'ISO_646.IRV:1991',
367
      'US',
368
      'US-ASCII',
369
      'CSASCII',
370
      'UTF-8',
371
      'ISO-10646-UCS-2',
372
      'UCS-2',
373
      'CSUNICODE',
374
      'UCS-2BE',
375
      'UNICODE-1-1',
376
      'UNICODEBIG',
377
      'CSUNICODE11',
378
      'UCS-2LE',
379
      'UNICODELITTLE',
380
      'ISO-10646-UCS-4',
381
      'UCS-4',
382
      'CSUCS4',
383
      'UCS-4BE',
384
      'UCS-4LE',
385
      'UTF-16',
386
      'UTF-16BE',
387
      'UTF-16LE',
388
      'UTF-32',
389
      'UTF-32BE',
390
      'UTF-32LE',
391
      'UNICODE-1-1-UTF-7',
392
      'UTF-7',
393
      'CSUNICODE11UTF7',
394
      'UCS-2-INTERNAL',
395
      'UCS-2-SWAPPED',
396
      'UCS-4-INTERNAL',
397
      'UCS-4-SWAPPED',
398
      'C99',
399
      'JAVA',
400
      'CP819',
401
      'IBM819',
402
      'ISO-8859-1',
403
      'ISO-IR-100',
404
      'ISO8859-1',
405
      'ISO_8859-1',
406
      'ISO_8859-1:1987',
407
      'L1',
408
      'LATIN1',
409
      'CSISOLATIN1',
410
      'ISO-8859-2',
411
      'ISO-IR-101',
412
      'ISO8859-2',
413
      'ISO_8859-2',
414
      'ISO_8859-2:1987',
415
      'L2',
416
      'LATIN2',
417
      'CSISOLATIN2',
418
      'ISO-8859-3',
419
      'ISO-IR-109',
420
      'ISO8859-3',
421
      'ISO_8859-3',
422
      'ISO_8859-3:1988',
423
      'L3',
424
      'LATIN3',
425
      'CSISOLATIN3',
426
      'ISO-8859-4',
427
      'ISO-IR-110',
428
      'ISO8859-4',
429
      'ISO_8859-4',
430
      'ISO_8859-4:1988',
431
      'L4',
432
      'LATIN4',
433
      'CSISOLATIN4',
434
      'CYRILLIC',
435
      'ISO-8859-5',
436
      'ISO-IR-144',
437
      'ISO8859-5',
438
      'ISO_8859-5',
439
      'ISO_8859-5:1988',
440
      'CSISOLATINCYRILLIC',
441
      'ARABIC',
442
      'ASMO-708',
443
      'ECMA-114',
444
      'ISO-8859-6',
445
      'ISO-IR-127',
446
      'ISO8859-6',
447
      'ISO_8859-6',
448
      'ISO_8859-6:1987',
449
      'CSISOLATINARABIC',
450
      'ECMA-118',
451
      'ELOT_928',
452
      'GREEK',
453
      'GREEK8',
454
      'ISO-8859-7',
455
      'ISO-IR-126',
456
      'ISO8859-7',
457
      'ISO_8859-7',
458
      'ISO_8859-7:1987',
459
      'ISO_8859-7:2003',
460
      'CSISOLATINGREEK',
461
      'HEBREW',
462
      'ISO-8859-8',
463
      'ISO-IR-138',
464
      'ISO8859-8',
465
      'ISO_8859-8',
466
      'ISO_8859-8:1988',
467
      'CSISOLATINHEBREW',
468
      'ISO-8859-9',
469
      'ISO-IR-148',
470
      'ISO8859-9',
471
      'ISO_8859-9',
472
      'ISO_8859-9:1989',
473
      'L5',
474
      'LATIN5',
475
      'CSISOLATIN5',
476
      'ISO-8859-10',
477
      'ISO-IR-157',
478
      'ISO8859-10',
479
      'ISO_8859-10',
480
      'ISO_8859-10:1992',
481
      'L6',
482
      'LATIN6',
483
      'CSISOLATIN6',
484
      'ISO-8859-11',
485
      'ISO8859-11',
486
      'ISO_8859-11',
487
      'ISO-8859-13',
488
      'ISO-IR-179',
489
      'ISO8859-13',
490
      'ISO_8859-13',
491
      'L7',
492
      'LATIN7',
493
      'ISO-8859-14',
494
      'ISO-CELTIC',
495
      'ISO-IR-199',
496
      'ISO8859-14',
497
      'ISO_8859-14',
498
      'ISO_8859-14:1998',
499
      'L8',
500
      'LATIN8',
501
      'ISO-8859-15',
502
      'ISO-IR-203',
503
      'ISO8859-15',
504
      'ISO_8859-15',
505
      'ISO_8859-15:1998',
506
      'LATIN-9',
507
      'ISO-8859-16',
508
      'ISO-IR-226',
509
      'ISO8859-16',
510
      'ISO_8859-16',
511
      'ISO_8859-16:2001',
512
      'L10',
513
      'LATIN10',
514
      'KOI8-R',
515
      'CSKOI8R',
516
      'KOI8-U',
517
      'KOI8-RU',
518
      'CP1250',
519
      'MS-EE',
520
      'WINDOWS-1250',
521
      'CP1251',
522
      'MS-CYRL',
523
      'WINDOWS-1251',
524
      'CP1252',
525
      'MS-ANSI',
526
      'WINDOWS-1252',
527
      'CP1253',
528
      'MS-GREEK',
529
      'WINDOWS-1253',
530
      'CP1254',
531
      'MS-TURK',
532
      'WINDOWS-1254',
533
      'CP1255',
534
      'MS-HEBR',
535
      'WINDOWS-1255',
536
      'CP1256',
537
      'MS-ARAB',
538
      'WINDOWS-1256',
539
      'CP1257',
540
      'WINBALTRIM',
541
      'WINDOWS-1257',
542
      'CP1258',
543
      'WINDOWS-1258',
544
      '850',
545
      'CP850',
546
      'IBM850',
547
      'CSPC850MULTILINGUAL',
548
      '862',
549
      'CP862',
550
      'IBM862',
551
      'CSPC862LATINHEBREW',
552
      '866',
553
      'CP866',
554
      'IBM866',
555
      'CSIBM866',
556
      'MAC',
557
      'MACINTOSH',
558
      'MACROMAN',
559
      'CSMACINTOSH',
560
      'MACCENTRALEUROPE',
561
      'MACICELAND',
562
      'MACCROATIAN',
563
      'MACROMANIA',
564
      'MACCYRILLIC',
565
      'MACUKRAINE',
566
      'MACGREEK',
567
      'MACTURKISH',
568
      'MACHEBREW',
569
      'MACARABIC',
570
      'MACTHAI',
571
      'HP-ROMAN8',
572
      'R8',
573
      'ROMAN8',
574
      'CSHPROMAN8',
575
      'NEXTSTEP',
576
      'ARMSCII-8',
577
      'GEORGIAN-ACADEMY',
578
      'GEORGIAN-PS',
579
      'KOI8-T',
580
      'CP154',
581
      'CYRILLIC-ASIAN',
582
      'PT154',
583
      'PTCP154',
584
      'CSPTCP154',
585
      'KZ-1048',
586
      'RK1048',
587
      'STRK1048-2002',
588
      'CSKZ1048',
589
      'MULELAO-1',
590
      'CP1133',
591
      'IBM-CP1133',
592
      'ISO-IR-166',
593
      'TIS-620',
594
      'TIS620',
595
      'TIS620-0',
596
      'TIS620.2529-1',
597
      'TIS620.2533-0',
598
      'TIS620.2533-1',
599
      'CP874',
600
      'WINDOWS-874',
601
      'VISCII',
602
      'VISCII1.1-1',
603
      'CSVISCII',
604
      'TCVN',
605
      'TCVN-5712',
606
      'TCVN5712-1',
607
      'TCVN5712-1:1993',
608
      'ISO-IR-14',
609
      'ISO646-JP',
610
      'JIS_C6220-1969-RO',
611
      'JP',
612
      'CSISO14JISC6220RO',
613
      'JISX0201-1976',
614
      'JIS_X0201',
615
      'X0201',
616
      'CSHALFWIDTHKATAKANA',
617
      'ISO-IR-87',
618
      'JIS0208',
619
      'JIS_C6226-1983',
620
      'JIS_X0208',
621
      'JIS_X0208-1983',
622
      'JIS_X0208-1990',
623
      'X0208',
624
      'CSISO87JISX0208',
625
      'ISO-IR-159',
626
      'JIS_X0212',
627
      'JIS_X0212-1990',
628
      'JIS_X0212.1990-0',
629
      'X0212',
630
      'CSISO159JISX02121990',
631
      'CN',
632
      'GB_1988-80',
633
      'ISO-IR-57',
634
      'ISO646-CN',
635
      'CSISO57GB1988',
636
      'CHINESE',
637
      'GB_2312-80',
638
      'ISO-IR-58',
639
      'CSISO58GB231280',
640
      'CN-GB-ISOIR165',
641
      'ISO-IR-165',
642
      'ISO-IR-149',
643
      'KOREAN',
644
      'KSC_5601',
645
      'KS_C_5601-1987',
646
      'KS_C_5601-1989',
647
      'CSKSC56011987',
648
      'EUC-JP',
649
      'EUCJP',
650
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
651
      'CSEUCPKDFMTJAPANESE',
652
      'MS_KANJI',
653
      'SHIFT-JIS',
654
      'SHIFT_JIS',
655
      'SJIS',
656
      'CSSHIFTJIS',
657
      'CP932',
658
      'ISO-2022-JP',
659
      'CSISO2022JP',
660
      'ISO-2022-JP-1',
661
      'ISO-2022-JP-2',
662
      'CSISO2022JP2',
663
      'CN-GB',
664
      'EUC-CN',
665
      'EUCCN',
666
      'GB2312',
667
      'CSGB2312',
668
      'GBK',
669
      'CP936',
670
      'MS936',
671
      'WINDOWS-936',
672
      'GB18030',
673
      'ISO-2022-CN',
674
      'CSISO2022CN',
675
      'ISO-2022-CN-EXT',
676
      'HZ',
677
      'HZ-GB-2312',
678
      'EUC-TW',
679
      'EUCTW',
680
      'CSEUCTW',
681
      'BIG-5',
682
      'BIG-FIVE',
683
      'BIG5',
684
      'BIGFIVE',
685
      'CN-BIG5',
686
      'CSBIG5',
687
      'CP950',
688
      'BIG5-HKSCS:1999',
689
      'BIG5-HKSCS:2001',
690
      'BIG5-HKSCS',
691
      'BIG5-HKSCS:2004',
692
      'BIG5HKSCS',
693
      'EUC-KR',
694
      'EUCKR',
695
      'CSEUCKR',
696
      'CP949',
697
      'UHC',
698
      'CP1361',
699
      'JOHAB',
700
      'ISO-2022-KR',
701
      'CSISO2022KR',
702
      'CP856',
703
      'CP922',
704
      'CP943',
705
      'CP1046',
706
      'CP1124',
707
      'CP1129',
708
      'CP1161',
709
      'IBM-1161',
710
      'IBM1161',
711
      'CSIBM1161',
712
      'CP1162',
713
      'IBM-1162',
714
      'IBM1162',
715
      'CSIBM1162',
716
      'CP1163',
717
      'IBM-1163',
718
      'IBM1163',
719
      'CSIBM1163',
720
      'DEC-KANJI',
721
      'DEC-HANYU',
722
      '437',
723
      'CP437',
724
      'IBM437',
725
      'CSPC8CODEPAGE437',
726
      'CP737',
727
      'CP775',
728
      'IBM775',
729
      'CSPC775BALTIC',
730
      '852',
731
      'CP852',
732
      'IBM852',
733
      'CSPCP852',
734
      'CP853',
735
      '855',
736
      'CP855',
737
      'IBM855',
738
      'CSIBM855',
739
      '857',
740
      'CP857',
741
      'IBM857',
742
      'CSIBM857',
743
      'CP858',
744
      '860',
745
      'CP860',
746
      'IBM860',
747
      'CSIBM860',
748
      '861',
749
      'CP-IS',
750
      'CP861',
751
      'IBM861',
752
      'CSIBM861',
753
      '863',
754
      'CP863',
755
      'IBM863',
756
      'CSIBM863',
757
      'CP864',
758
      'IBM864',
759
      'CSIBM864',
760
      '865',
761
      'CP865',
762
      'IBM865',
763
      'CSIBM865',
764
      '869',
765
      'CP-GR',
766
      'CP869',
767
      'IBM869',
768
      'CSIBM869',
769
      'CP1125',
770
      'EUC-JISX0213',
771
      'SHIFT_JISX0213',
772
      'ISO-2022-JP-3',
773
      'BIG5-2003',
774
      'ISO-IR-230',
775
      'TDS565',
776
      'ATARI',
777
      'ATARIST',
778
      'RISCOS-LATIN1',
779
  );
780
781
  /**
782
   * @var array
783
   */
784
  private static $support = array();
785
786
  /**
787
   * __construct()
788
   */
789
  public function __construct()
790 1
  {
791
    self::checkForSupport();
792 1
  }
793 1
794
  /**
795
   * Returns a single UTF-8 character from string.
796
   *
797
   * @param    string $str A UTF-8 string.
798
   * @param    int    $pos The position of character to return.
799
   *
800
   * @return   string Single Multi-Byte character.
801
   */
802
  public static function access($str, $pos)
803 1
  {
804
    // Return the character at the specified position: $str[1] like functionality.
805
806
    return self::substr($str, $pos, 1);
807 1
  }
808
809
  /**
810
   * Prepends BOM character to the string and returns the whole string.
811
   *
812
   * INFO: If BOM already existed there, the Input string is returned.
813
   *
814
   * @param    string $str The input string
815
   *
816
   * @return   string The output string that contains BOM
817
   */
818
  public static function add_bom_to_string($str)
819
  {
820
    if (!self::is_bom(substr($str, 0, 3))) {
821
      $str = self::bom() . $str;
822
    }
823
824
    return $str;
825
  }
826
827
  /**
828
   * Returns the Byte Order Mark Character.
829
   *
830
   * @return   string Byte Order Mark
831
   */
832
  public static function bom()
833 2
  {
834
    return "\xEF\xBB\xBF";
835 2
  }
836
837
  /**
838
   * @alias of UTF8::chr_map()
839
   *
840
   * @param $callback
841
   * @param $str
842
   *
843
   * @return array
844
   */
845
  public static function callback($callback, $str)
846 1
  {
847
    return self::chr_map($callback, $str);
848 1
  }
849
850
  /**
851
   * Returns an array of all lower and upper case UTF-8 encoded characters.
852
   *
853
   * @return   string An array with lower case chars as keys and upper chars as values.
854
   */
855
  protected static function case_table()
856
  {
857
    static $case = array(
858
859
      // lower => upper
860
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
861
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
862
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
863
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
864
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
865
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
866
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
867
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
868
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
869
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
870
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
871
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
872
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
873
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
874
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
875
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
876
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
877
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
878
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
879
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
880
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
881
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
882
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
883
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
884
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
885
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
886
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
887
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
888
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
889
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
890
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
891
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
892
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
893
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
894
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
895
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
896
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
897
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
898
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
899
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
900
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
901
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
902
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
903
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
904
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
905
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
906
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
907
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
908
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
909
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
910
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
911
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
912
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
913
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
914
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
915
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
916
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
917
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
918
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
919
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
920
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
921
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
922
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
923
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
924
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
925
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
926
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
927
      "\xea\x9e\x87"     => "\xea\x9e\x86",
928
      "\xea\x9e\x85"     => "\xea\x9e\x84",
929
      "\xea\x9e\x83"     => "\xea\x9e\x82",
930
      "\xea\x9e\x81"     => "\xea\x9e\x80",
931
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
932
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
933
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
934
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
935
      "\xea\x9d\xad"     => "\xea\x9d\xac",
936
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
937
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
938
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
939
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
940
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
941
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
942
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
943
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
944
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
945
      "\xea\x9d\x99"     => "\xea\x9d\x98",
946
      "\xea\x9d\x97"     => "\xea\x9d\x96",
947
      "\xea\x9d\x95"     => "\xea\x9d\x94",
948
      "\xea\x9d\x93"     => "\xea\x9d\x92",
949
      "\xea\x9d\x91"     => "\xea\x9d\x90",
950
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
951
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
952
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
953
      "\xea\x9d\x89"     => "\xea\x9d\x88",
954
      "\xea\x9d\x87"     => "\xea\x9d\x86",
955
      "\xea\x9d\x85"     => "\xea\x9d\x84",
956
      "\xea\x9d\x83"     => "\xea\x9d\x82",
957
      "\xea\x9d\x81"     => "\xea\x9d\x80",
958
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
959
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
960
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
961
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
962
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
963
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
964
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
965
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
966
      "\xea\x9c\xad"     => "\xea\x9c\xac",
967
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
968
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
969
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
970
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
971
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
972
      "\xea\x9a\x97"     => "\xea\x9a\x96",
973
      "\xea\x9a\x95"     => "\xea\x9a\x94",
974
      "\xea\x9a\x93"     => "\xea\x9a\x92",
975
      "\xea\x9a\x91"     => "\xea\x9a\x90",
976
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
977
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
978
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
979
      "\xea\x9a\x89"     => "\xea\x9a\x88",
980
      "\xea\x9a\x87"     => "\xea\x9a\x86",
981
      "\xea\x9a\x85"     => "\xea\x9a\x84",
982
      "\xea\x9a\x83"     => "\xea\x9a\x82",
983
      "\xea\x9a\x81"     => "\xea\x9a\x80",
984
      "\xea\x99\xad"     => "\xea\x99\xac",
985
      "\xea\x99\xab"     => "\xea\x99\xaa",
986
      "\xea\x99\xa9"     => "\xea\x99\xa8",
987
      "\xea\x99\xa7"     => "\xea\x99\xa6",
988
      "\xea\x99\xa5"     => "\xea\x99\xa4",
989
      "\xea\x99\xa3"     => "\xea\x99\xa2",
990
      "\xea\x99\x9f"     => "\xea\x99\x9e",
991
      "\xea\x99\x9d"     => "\xea\x99\x9c",
992
      "\xea\x99\x9b"     => "\xea\x99\x9a",
993
      "\xea\x99\x99"     => "\xea\x99\x98",
994
      "\xea\x99\x97"     => "\xea\x99\x96",
995
      "\xea\x99\x95"     => "\xea\x99\x94",
996
      "\xea\x99\x93"     => "\xea\x99\x92",
997
      "\xea\x99\x91"     => "\xea\x99\x90",
998
      "\xea\x99\x8f"     => "\xea\x99\x8e",
999
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1000
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1001
      "\xea\x99\x89"     => "\xea\x99\x88",
1002
      "\xea\x99\x87"     => "\xea\x99\x86",
1003
      "\xea\x99\x85"     => "\xea\x99\x84",
1004
      "\xea\x99\x83"     => "\xea\x99\x82",
1005
      "\xea\x99\x81"     => "\xea\x99\x80",
1006
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1007
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1008
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1009
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1010
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1011
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1012
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1013
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1014
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1015
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1016
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1017
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1018
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1019
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1020
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1021
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1022
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1023
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1024
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1025
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1026
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1027
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1028
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1029
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1030
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1031
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1032
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1033
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1034
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1035
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1036
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1037
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1038
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1039
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1040
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1041
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1042
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1043
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1044
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1045
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1046
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1047
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1048
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1049
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1050
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1051
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1052
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1053
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1054
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1055
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1056
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1057
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1058
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1059
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1060
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1061
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1062
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1063
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1064
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1065
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1066
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1067
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1068
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1069
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1070
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1071
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1072
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1073
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1074
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1075
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1076
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1077
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1078
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1079
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1080
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1081
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1082
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1083
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1084
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1085
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1086
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1087
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1088
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1089
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1090
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1091
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1092
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1093
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1094
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1095
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1096
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1097
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1098
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1099
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1100
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1101
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1102
      "\xe2\xb1\xa5"     => "\xc8\xba",
1103
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1104
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1105
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1106
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1107
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1108
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1109
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1110
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1111
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1112
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1113
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1114
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1115
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1116
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1117
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1118
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1119
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1120
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1121
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1122
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1123
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1124
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1125
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1126
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1127
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1128
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1129
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1130
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1131
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1132
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1133
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1134
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1135
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1136
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1137
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1138
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1139
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1140
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1141
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1142
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1143
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1144
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1145
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1146
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1147
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1148
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1149
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1150
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1151
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1152
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1153
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1154
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1155
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1156
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1157
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1158
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1159
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1160
      "\xe1\xbe\xbe"     => "\xce\x99",
1161
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1162
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1163
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1164
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1165
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1166
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1167
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1168
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1169
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1170
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1171
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1172
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1173
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1174
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1175
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1176
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1177
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1178
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1179
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1180
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1181
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1182
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1183
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1184
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1185
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1186
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1187
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1188
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1189
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1190
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1191
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1192
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1193
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1194
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1195
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1196
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1197
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1198
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1199
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1200
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1201
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1202
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1203
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1204
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1205
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1206
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1207
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1208
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1209
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1210
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1211
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1212
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1213
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1214
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1215
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1216
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1217
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1218
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1219
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1220
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1221
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1222
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1223
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1224
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1225
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1226
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1227
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1228
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1229
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1230
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1231
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1232
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1233
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1234
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1235
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1236
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1237
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1238
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1239
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1240
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1241
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1242
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1243
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1244
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1245
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1246
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1247
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1248
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1249
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1250
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1251
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1252
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1253
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1254
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1255
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1256
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1257
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1258
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1259
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1260
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1261
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1262
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1263
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1264
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1265
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1266
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1267
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1268
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1269
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1270
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1271
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1272
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1273
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1274
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1275
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1276
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1277
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1278
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1279
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1280
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1281
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1282
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1283
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1284
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1285
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1286
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1287
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1288
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1289
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1290
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1291
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1292
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1293
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1294
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1295
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1296
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1297
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1298
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1299
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1300
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1301
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1302
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1303
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1304
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1305
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1306
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1307
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1308
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1309
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1310
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1311
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1312
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1313
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1314
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1315
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1316
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1317
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1318
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1319
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1320
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1321
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1322
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1323
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1324
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1325
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1326
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1327
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1328
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1329
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1330
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1331
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1332
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1333
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1334
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1335
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1336
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1337
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1338
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1339
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1340
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1341
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1342
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1343
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1344
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1345
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1346
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1347
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1348
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1349
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1350
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1351
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1352
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1353
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1354
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1355
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1356
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1357
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1358
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1359
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1360
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1361
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1362
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1363
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1364
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1365
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1366
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1367
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1368
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1369
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1370
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1371
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1372
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1373
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1374
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1375
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1376
      "\xd6\x86"         => "\xd5\x96",
1377
      "\xd6\x85"         => "\xd5\x95",
1378
      "\xd6\x84"         => "\xd5\x94",
1379
      "\xd6\x83"         => "\xd5\x93",
1380
      "\xd6\x82"         => "\xd5\x92",
1381
      "\xd6\x81"         => "\xd5\x91",
1382
      "\xd6\x80"         => "\xd5\x90",
1383
      "\xd5\xbf"         => "\xd5\x8f",
1384
      "\xd5\xbe"         => "\xd5\x8e",
1385
      "\xd5\xbd"         => "\xd5\x8d",
1386
      "\xd5\xbc"         => "\xd5\x8c",
1387
      "\xd5\xbb"         => "\xd5\x8b",
1388
      "\xd5\xba"         => "\xd5\x8a",
1389
      "\xd5\xb9"         => "\xd5\x89",
1390
      "\xd5\xb8"         => "\xd5\x88",
1391
      "\xd5\xb7"         => "\xd5\x87",
1392
      "\xd5\xb6"         => "\xd5\x86",
1393
      "\xd5\xb5"         => "\xd5\x85",
1394
      "\xd5\xb4"         => "\xd5\x84",
1395
      "\xd5\xb3"         => "\xd5\x83",
1396
      "\xd5\xb2"         => "\xd5\x82",
1397
      "\xd5\xb1"         => "\xd5\x81",
1398
      "\xd5\xb0"         => "\xd5\x80",
1399
      "\xd5\xaf"         => "\xd4\xbf",
1400
      "\xd5\xae"         => "\xd4\xbe",
1401
      "\xd5\xad"         => "\xd4\xbd",
1402
      "\xd5\xac"         => "\xd4\xbc",
1403
      "\xd5\xab"         => "\xd4\xbb",
1404
      "\xd5\xaa"         => "\xd4\xba",
1405
      "\xd5\xa9"         => "\xd4\xb9",
1406
      "\xd5\xa8"         => "\xd4\xb8",
1407
      "\xd5\xa7"         => "\xd4\xb7",
1408
      "\xd5\xa6"         => "\xd4\xb6",
1409
      "\xd5\xa5"         => "\xd4\xb5",
1410
      "\xd5\xa4"         => "\xd4\xb4",
1411
      "\xd5\xa3"         => "\xd4\xb3",
1412
      "\xd5\xa2"         => "\xd4\xb2",
1413
      "\xd5\xa1"         => "\xd4\xb1",
1414
      "\xd4\xa5"         => "\xd4\xa4",
1415
      "\xd4\xa3"         => "\xd4\xa2",
1416
      "\xd4\xa1"         => "\xd4\xa0",
1417
      "\xd4\x9f"         => "\xd4\x9e",
1418
      "\xd4\x9d"         => "\xd4\x9c",
1419
      "\xd4\x9b"         => "\xd4\x9a",
1420
      "\xd4\x99"         => "\xd4\x98",
1421
      "\xd4\x97"         => "\xd4\x96",
1422
      "\xd4\x95"         => "\xd4\x94",
1423
      "\xd4\x93"         => "\xd4\x92",
1424
      "\xd4\x91"         => "\xd4\x90",
1425
      "\xd4\x8f"         => "\xd4\x8e",
1426
      "\xd4\x8d"         => "\xd4\x8c",
1427
      "\xd4\x8b"         => "\xd4\x8a",
1428
      "\xd4\x89"         => "\xd4\x88",
1429
      "\xd4\x87"         => "\xd4\x86",
1430
      "\xd4\x85"         => "\xd4\x84",
1431
      "\xd4\x83"         => "\xd4\x82",
1432
      "\xd4\x81"         => "\xd4\x80",
1433
      "\xd3\xbf"         => "\xd3\xbe",
1434
      "\xd3\xbd"         => "\xd3\xbc",
1435
      "\xd3\xbb"         => "\xd3\xba",
1436
      "\xd3\xb9"         => "\xd3\xb8",
1437
      "\xd3\xb7"         => "\xd3\xb6",
1438
      "\xd3\xb5"         => "\xd3\xb4",
1439
      "\xd3\xb3"         => "\xd3\xb2",
1440
      "\xd3\xb1"         => "\xd3\xb0",
1441
      "\xd3\xaf"         => "\xd3\xae",
1442
      "\xd3\xad"         => "\xd3\xac",
1443
      "\xd3\xab"         => "\xd3\xaa",
1444
      "\xd3\xa9"         => "\xd3\xa8",
1445
      "\xd3\xa7"         => "\xd3\xa6",
1446
      "\xd3\xa5"         => "\xd3\xa4",
1447
      "\xd3\xa3"         => "\xd3\xa2",
1448
      "\xd3\xa1"         => "\xd3\xa0",
1449
      "\xd3\x9f"         => "\xd3\x9e",
1450
      "\xd3\x9d"         => "\xd3\x9c",
1451
      "\xd3\x9b"         => "\xd3\x9a",
1452
      "\xd3\x99"         => "\xd3\x98",
1453
      "\xd3\x97"         => "\xd3\x96",
1454
      "\xd3\x95"         => "\xd3\x94",
1455
      "\xd3\x93"         => "\xd3\x92",
1456
      "\xd3\x91"         => "\xd3\x90",
1457
      "\xd3\x8f"         => "\xd3\x80",
1458
      "\xd3\x8e"         => "\xd3\x8d",
1459
      "\xd3\x8c"         => "\xd3\x8b",
1460
      "\xd3\x8a"         => "\xd3\x89",
1461
      "\xd3\x88"         => "\xd3\x87",
1462
      "\xd3\x86"         => "\xd3\x85",
1463
      "\xd3\x84"         => "\xd3\x83",
1464
      "\xd3\x82"         => "\xd3\x81",
1465
      "\xd2\xbf"         => "\xd2\xbe",
1466
      "\xd2\xbd"         => "\xd2\xbc",
1467
      "\xd2\xbb"         => "\xd2\xba",
1468
      "\xd2\xb9"         => "\xd2\xb8",
1469
      "\xd2\xb7"         => "\xd2\xb6",
1470
      "\xd2\xb5"         => "\xd2\xb4",
1471
      "\xd2\xb3"         => "\xd2\xb2",
1472
      "\xd2\xb1"         => "\xd2\xb0",
1473
      "\xd2\xaf"         => "\xd2\xae",
1474
      "\xd2\xad"         => "\xd2\xac",
1475
      "\xd2\xab"         => "\xd2\xaa",
1476
      "\xd2\xa9"         => "\xd2\xa8",
1477
      "\xd2\xa7"         => "\xd2\xa6",
1478
      "\xd2\xa5"         => "\xd2\xa4",
1479
      "\xd2\xa3"         => "\xd2\xa2",
1480
      "\xd2\xa1"         => "\xd2\xa0",
1481
      "\xd2\x9f"         => "\xd2\x9e",
1482
      "\xd2\x9d"         => "\xd2\x9c",
1483
      "\xd2\x9b"         => "\xd2\x9a",
1484
      "\xd2\x99"         => "\xd2\x98",
1485
      "\xd2\x97"         => "\xd2\x96",
1486
      "\xd2\x95"         => "\xd2\x94",
1487
      "\xd2\x93"         => "\xd2\x92",
1488
      "\xd2\x91"         => "\xd2\x90",
1489
      "\xd2\x8f"         => "\xd2\x8e",
1490
      "\xd2\x8d"         => "\xd2\x8c",
1491
      "\xd2\x8b"         => "\xd2\x8a",
1492
      "\xd2\x81"         => "\xd2\x80",
1493
      "\xd1\xbf"         => "\xd1\xbe",
1494
      "\xd1\xbd"         => "\xd1\xbc",
1495
      "\xd1\xbb"         => "\xd1\xba",
1496
      "\xd1\xb9"         => "\xd1\xb8",
1497
      "\xd1\xb7"         => "\xd1\xb6",
1498
      "\xd1\xb5"         => "\xd1\xb4",
1499
      "\xd1\xb3"         => "\xd1\xb2",
1500
      "\xd1\xb1"         => "\xd1\xb0",
1501
      "\xd1\xaf"         => "\xd1\xae",
1502
      "\xd1\xad"         => "\xd1\xac",
1503
      "\xd1\xab"         => "\xd1\xaa",
1504
      "\xd1\xa9"         => "\xd1\xa8",
1505
      "\xd1\xa7"         => "\xd1\xa6",
1506
      "\xd1\xa5"         => "\xd1\xa4",
1507
      "\xd1\xa3"         => "\xd1\xa2",
1508
      "\xd1\xa1"         => "\xd1\xa0",
1509
      "\xd1\x9f"         => "\xd0\x8f",
1510
      "\xd1\x9e"         => "\xd0\x8e",
1511
      "\xd1\x9d"         => "\xd0\x8d",
1512
      "\xd1\x9c"         => "\xd0\x8c",
1513
      "\xd1\x9b"         => "\xd0\x8b",
1514
      "\xd1\x9a"         => "\xd0\x8a",
1515
      "\xd1\x99"         => "\xd0\x89",
1516
      "\xd1\x98"         => "\xd0\x88",
1517
      "\xd1\x97"         => "\xd0\x87",
1518
      "\xd1\x96"         => "\xd0\x86",
1519
      "\xd1\x95"         => "\xd0\x85",
1520
      "\xd1\x94"         => "\xd0\x84",
1521
      "\xd1\x93"         => "\xd0\x83",
1522
      "\xd1\x92"         => "\xd0\x82",
1523
      "\xd1\x91"         => "\xd0\x81",
1524
      "\xd1\x90"         => "\xd0\x80",
1525
      "\xd1\x8f"         => "\xd0\xaf",
1526
      "\xd1\x8e"         => "\xd0\xae",
1527
      "\xd1\x8d"         => "\xd0\xad",
1528
      "\xd1\x8c"         => "\xd0\xac",
1529
      "\xd1\x8b"         => "\xd0\xab",
1530
      "\xd1\x8a"         => "\xd0\xaa",
1531
      "\xd1\x89"         => "\xd0\xa9",
1532
      "\xd1\x88"         => "\xd0\xa8",
1533
      "\xd1\x87"         => "\xd0\xa7",
1534
      "\xd1\x86"         => "\xd0\xa6",
1535
      "\xd1\x85"         => "\xd0\xa5",
1536
      "\xd1\x84"         => "\xd0\xa4",
1537
      "\xd1\x83"         => "\xd0\xa3",
1538
      "\xd1\x82"         => "\xd0\xa2",
1539
      "\xd1\x81"         => "\xd0\xa1",
1540
      "\xd1\x80"         => "\xd0\xa0",
1541
      "\xd0\xbf"         => "\xd0\x9f",
1542
      "\xd0\xbe"         => "\xd0\x9e",
1543
      "\xd0\xbd"         => "\xd0\x9d",
1544
      "\xd0\xbc"         => "\xd0\x9c",
1545
      "\xd0\xbb"         => "\xd0\x9b",
1546
      "\xd0\xba"         => "\xd0\x9a",
1547
      "\xd0\xb9"         => "\xd0\x99",
1548
      "\xd0\xb8"         => "\xd0\x98",
1549
      "\xd0\xb7"         => "\xd0\x97",
1550
      "\xd0\xb6"         => "\xd0\x96",
1551
      "\xd0\xb5"         => "\xd0\x95",
1552
      "\xd0\xb4"         => "\xd0\x94",
1553
      "\xd0\xb3"         => "\xd0\x93",
1554
      "\xd0\xb2"         => "\xd0\x92",
1555
      "\xd0\xb1"         => "\xd0\x91",
1556
      "\xd0\xb0"         => "\xd0\x90",
1557
      "\xcf\xbb"         => "\xcf\xba",
1558
      "\xcf\xb8"         => "\xcf\xb7",
1559
      "\xcf\xb5"         => "\xce\x95",
1560
      "\xcf\xb2"         => "\xcf\xb9",
1561
      "\xcf\xb1"         => "\xce\xa1",
1562
      "\xcf\xb0"         => "\xce\x9a",
1563
      "\xcf\xaf"         => "\xcf\xae",
1564
      "\xcf\xad"         => "\xcf\xac",
1565
      "\xcf\xab"         => "\xcf\xaa",
1566
      "\xcf\xa9"         => "\xcf\xa8",
1567
      "\xcf\xa7"         => "\xcf\xa6",
1568
      "\xcf\xa5"         => "\xcf\xa4",
1569
      "\xcf\xa3"         => "\xcf\xa2",
1570
      "\xcf\xa1"         => "\xcf\xa0",
1571
      "\xcf\x9f"         => "\xcf\x9e",
1572
      "\xcf\x9d"         => "\xcf\x9c",
1573
      "\xcf\x9b"         => "\xcf\x9a",
1574
      "\xcf\x99"         => "\xcf\x98",
1575
      "\xcf\x97"         => "\xcf\x8f",
1576
      "\xcf\x96"         => "\xce\xa0",
1577
      "\xcf\x95"         => "\xce\xa6",
1578
      "\xcf\x91"         => "\xce\x98",
1579
      "\xcf\x90"         => "\xce\x92",
1580
      "\xcf\x8e"         => "\xce\x8f",
1581
      "\xcf\x8d"         => "\xce\x8e",
1582
      "\xcf\x8c"         => "\xce\x8c",
1583
      "\xcf\x8b"         => "\xce\xab",
1584
      "\xcf\x8a"         => "\xce\xaa",
1585
      "\xcf\x89"         => "\xce\xa9",
1586
      "\xcf\x88"         => "\xce\xa8",
1587
      "\xcf\x87"         => "\xce\xa7",
1588
      "\xcf\x86"         => "\xce\xa6",
1589
      "\xcf\x85"         => "\xce\xa5",
1590
      "\xcf\x84"         => "\xce\xa4",
1591
      "\xcf\x83"         => "\xce\xa3",
1592
      "\xcf\x82"         => "\xce\xa3",
1593
      "\xcf\x81"         => "\xce\xa1",
1594
      "\xcf\x80"         => "\xce\xa0",
1595
      "\xce\xbf"         => "\xce\x9f",
1596
      "\xce\xbe"         => "\xce\x9e",
1597
      "\xce\xbd"         => "\xce\x9d",
1598
      "\xce\xbc"         => "\xce\x9c",
1599
      "\xce\xbb"         => "\xce\x9b",
1600
      "\xce\xba"         => "\xce\x9a",
1601
      "\xce\xb9"         => "\xce\x99",
1602
      "\xce\xb8"         => "\xce\x98",
1603
      "\xce\xb7"         => "\xce\x97",
1604
      "\xce\xb6"         => "\xce\x96",
1605
      "\xce\xb5"         => "\xce\x95",
1606
      "\xce\xb4"         => "\xce\x94",
1607
      "\xce\xb3"         => "\xce\x93",
1608
      "\xce\xb2"         => "\xce\x92",
1609
      "\xce\xb1"         => "\xce\x91",
1610
      "\xce\xaf"         => "\xce\x8a",
1611
      "\xce\xae"         => "\xce\x89",
1612
      "\xce\xad"         => "\xce\x88",
1613
      "\xce\xac"         => "\xce\x86",
1614
      "\xcd\xbd"         => "\xcf\xbf",
1615
      "\xcd\xbc"         => "\xcf\xbe",
1616
      "\xcd\xbb"         => "\xcf\xbd",
1617
      "\xcd\xb7"         => "\xcd\xb6",
1618
      "\xcd\xb3"         => "\xcd\xb2",
1619
      "\xcd\xb1"         => "\xcd\xb0",
1620
      "\xca\x92"         => "\xc6\xb7",
1621
      "\xca\x8c"         => "\xc9\x85",
1622
      "\xca\x8b"         => "\xc6\xb2",
1623
      "\xca\x8a"         => "\xc6\xb1",
1624
      "\xca\x89"         => "\xc9\x84",
1625
      "\xca\x88"         => "\xc6\xae",
1626
      "\xca\x83"         => "\xc6\xa9",
1627
      "\xca\x80"         => "\xc6\xa6",
1628
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1629
      "\xc9\xb5"         => "\xc6\x9f",
1630
      "\xc9\xb2"         => "\xc6\x9d",
1631
      "\xc9\xb1"         => "\xe2\xb1\xae",
1632
      "\xc9\xaf"         => "\xc6\x9c",
1633
      "\xc9\xab"         => "\xe2\xb1\xa2",
1634
      "\xc9\xa9"         => "\xc6\x96",
1635
      "\xc9\xa8"         => "\xc6\x97",
1636
      "\xc9\xa5"         => "\xea\x9e\x8d",
1637
      "\xc9\xa3"         => "\xc6\x94",
1638
      "\xc9\xa0"         => "\xc6\x93",
1639
      "\xc9\x9b"         => "\xc6\x90",
1640
      "\xc9\x99"         => "\xc6\x8f",
1641
      "\xc9\x97"         => "\xc6\x8a",
1642
      "\xc9\x96"         => "\xc6\x89",
1643
      "\xc9\x94"         => "\xc6\x86",
1644
      "\xc9\x93"         => "\xc6\x81",
1645
      "\xc9\x92"         => "\xe2\xb1\xb0",
1646
      "\xc9\x91"         => "\xe2\xb1\xad",
1647
      "\xc9\x90"         => "\xe2\xb1\xaf",
1648
      "\xc9\x8f"         => "\xc9\x8e",
1649
      "\xc9\x8d"         => "\xc9\x8c",
1650
      "\xc9\x8b"         => "\xc9\x8a",
1651
      "\xc9\x89"         => "\xc9\x88",
1652
      "\xc9\x87"         => "\xc9\x86",
1653
      "\xc9\x82"         => "\xc9\x81",
1654
      "\xc9\x80"         => "\xe2\xb1\xbf",
1655
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1656
      "\xc8\xbc"         => "\xc8\xbb",
1657
      "\xc8\xb3"         => "\xc8\xb2",
1658
      "\xc8\xb1"         => "\xc8\xb0",
1659
      "\xc8\xaf"         => "\xc8\xae",
1660
      "\xc8\xad"         => "\xc8\xac",
1661
      "\xc8\xab"         => "\xc8\xaa",
1662
      "\xc8\xa9"         => "\xc8\xa8",
1663
      "\xc8\xa7"         => "\xc8\xa6",
1664
      "\xc8\xa5"         => "\xc8\xa4",
1665
      "\xc8\xa3"         => "\xc8\xa2",
1666
      "\xc8\x9f"         => "\xc8\x9e",
1667
      "\xc8\x9d"         => "\xc8\x9c",
1668
      "\xc8\x9b"         => "\xc8\x9a",
1669
      "\xc8\x99"         => "\xc8\x98",
1670
      "\xc8\x97"         => "\xc8\x96",
1671
      "\xc8\x95"         => "\xc8\x94",
1672
      "\xc8\x93"         => "\xc8\x92",
1673
      "\xc8\x91"         => "\xc8\x90",
1674
      "\xc8\x8f"         => "\xc8\x8e",
1675
      "\xc8\x8d"         => "\xc8\x8c",
1676
      "\xc8\x8b"         => "\xc8\x8a",
1677
      "\xc8\x89"         => "\xc8\x88",
1678
      "\xc8\x87"         => "\xc8\x86",
1679
      "\xc8\x85"         => "\xc8\x84",
1680
      "\xc8\x83"         => "\xc8\x82",
1681
      "\xc8\x81"         => "\xc8\x80",
1682
      "\xc7\xbf"         => "\xc7\xbe",
1683
      "\xc7\xbd"         => "\xc7\xbc",
1684
      "\xc7\xbb"         => "\xc7\xba",
1685
      "\xc7\xb9"         => "\xc7\xb8",
1686
      "\xc7\xb5"         => "\xc7\xb4",
1687
      "\xc7\xb3"         => "\xc7\xb2",
1688
      "\xc7\xaf"         => "\xc7\xae",
1689
      "\xc7\xad"         => "\xc7\xac",
1690
      "\xc7\xab"         => "\xc7\xaa",
1691
      "\xc7\xa9"         => "\xc7\xa8",
1692
      "\xc7\xa7"         => "\xc7\xa6",
1693
      "\xc7\xa5"         => "\xc7\xa4",
1694
      "\xc7\xa3"         => "\xc7\xa2",
1695
      "\xc7\xa1"         => "\xc7\xa0",
1696
      "\xc7\x9f"         => "\xc7\x9e",
1697
      "\xc7\x9d"         => "\xc6\x8e",
1698
      "\xc7\x9c"         => "\xc7\x9b",
1699
      "\xc7\x9a"         => "\xc7\x99",
1700
      "\xc7\x98"         => "\xc7\x97",
1701
      "\xc7\x96"         => "\xc7\x95",
1702
      "\xc7\x94"         => "\xc7\x93",
1703
      "\xc7\x92"         => "\xc7\x91",
1704
      "\xc7\x90"         => "\xc7\x8f",
1705
      "\xc7\x8e"         => "\xc7\x8d",
1706
      "\xc7\x8c"         => "\xc7\x8b",
1707
      "\xc7\x89"         => "\xc7\x88",
1708
      "\xc7\x86"         => "\xc7\x85",
1709
      "\xc6\xbf"         => "\xc7\xb7",
1710
      "\xc6\xbd"         => "\xc6\xbc",
1711
      "\xc6\xb9"         => "\xc6\xb8",
1712
      "\xc6\xb6"         => "\xc6\xb5",
1713
      "\xc6\xb4"         => "\xc6\xb3",
1714
      "\xc6\xb0"         => "\xc6\xaf",
1715
      "\xc6\xad"         => "\xc6\xac",
1716
      "\xc6\xa8"         => "\xc6\xa7",
1717
      "\xc6\xa5"         => "\xc6\xa4",
1718
      "\xc6\xa3"         => "\xc6\xa2",
1719
      "\xc6\xa1"         => "\xc6\xa0",
1720
      "\xc6\x9e"         => "\xc8\xa0",
1721
      "\xc6\x9a"         => "\xc8\xbd",
1722
      "\xc6\x99"         => "\xc6\x98",
1723
      "\xc6\x95"         => "\xc7\xb6",
1724
      "\xc6\x92"         => "\xc6\x91",
1725
      "\xc6\x8c"         => "\xc6\x8b",
1726
      "\xc6\x88"         => "\xc6\x87",
1727
      "\xc6\x85"         => "\xc6\x84",
1728
      "\xc6\x83"         => "\xc6\x82",
1729
      "\xc6\x80"         => "\xc9\x83",
1730
      "\xc5\xbf"         => "\x53",
1731
      "\xc5\xbe"         => "\xc5\xbd",
1732
      "\xc5\xbc"         => "\xc5\xbb",
1733
      "\xc5\xba"         => "\xc5\xb9",
1734
      "\xc5\xb7"         => "\xc5\xb6",
1735
      "\xc5\xb5"         => "\xc5\xb4",
1736
      "\xc5\xb3"         => "\xc5\xb2",
1737
      "\xc5\xb1"         => "\xc5\xb0",
1738
      "\xc5\xaf"         => "\xc5\xae",
1739
      "\xc5\xad"         => "\xc5\xac",
1740
      "\xc5\xab"         => "\xc5\xaa",
1741
      "\xc5\xa9"         => "\xc5\xa8",
1742
      "\xc5\xa7"         => "\xc5\xa6",
1743
      "\xc5\xa5"         => "\xc5\xa4",
1744
      "\xc5\xa3"         => "\xc5\xa2",
1745
      "\xc5\xa1"         => "\xc5\xa0",
1746
      "\xc5\x9f"         => "\xc5\x9e",
1747
      "\xc5\x9d"         => "\xc5\x9c",
1748
      "\xc5\x9b"         => "\xc5\x9a",
1749
      "\xc5\x99"         => "\xc5\x98",
1750
      "\xc5\x97"         => "\xc5\x96",
1751
      "\xc5\x95"         => "\xc5\x94",
1752
      "\xc5\x93"         => "\xc5\x92",
1753
      "\xc5\x91"         => "\xc5\x90",
1754
      "\xc5\x8f"         => "\xc5\x8e",
1755
      "\xc5\x8d"         => "\xc5\x8c",
1756
      "\xc5\x8b"         => "\xc5\x8a",
1757
      "\xc5\x88"         => "\xc5\x87",
1758
      "\xc5\x86"         => "\xc5\x85",
1759
      "\xc5\x84"         => "\xc5\x83",
1760
      "\xc5\x82"         => "\xc5\x81",
1761
      "\xc5\x80"         => "\xc4\xbf",
1762
      "\xc4\xbe"         => "\xc4\xbd",
1763
      "\xc4\xbc"         => "\xc4\xbb",
1764
      "\xc4\xba"         => "\xc4\xb9",
1765
      "\xc4\xb7"         => "\xc4\xb6",
1766
      "\xc4\xb5"         => "\xc4\xb4",
1767
      "\xc4\xb3"         => "\xc4\xb2",
1768
      "\xc4\xb1"         => "\x49",
1769
      "\xc4\xaf"         => "\xc4\xae",
1770
      "\xc4\xad"         => "\xc4\xac",
1771
      "\xc4\xab"         => "\xc4\xaa",
1772
      "\xc4\xa9"         => "\xc4\xa8",
1773
      "\xc4\xa7"         => "\xc4\xa6",
1774
      "\xc4\xa5"         => "\xc4\xa4",
1775
      "\xc4\xa3"         => "\xc4\xa2",
1776
      "\xc4\xa1"         => "\xc4\xa0",
1777
      "\xc4\x9f"         => "\xc4\x9e",
1778
      "\xc4\x9d"         => "\xc4\x9c",
1779
      "\xc4\x9b"         => "\xc4\x9a",
1780
      "\xc4\x99"         => "\xc4\x98",
1781
      "\xc4\x97"         => "\xc4\x96",
1782
      "\xc4\x95"         => "\xc4\x94",
1783
      "\xc4\x93"         => "\xc4\x92",
1784
      "\xc4\x91"         => "\xc4\x90",
1785
      "\xc4\x8f"         => "\xc4\x8e",
1786
      "\xc4\x8d"         => "\xc4\x8c",
1787
      "\xc4\x8b"         => "\xc4\x8a",
1788
      "\xc4\x89"         => "\xc4\x88",
1789
      "\xc4\x87"         => "\xc4\x86",
1790
      "\xc4\x85"         => "\xc4\x84",
1791
      "\xc4\x83"         => "\xc4\x82",
1792
      "\xc4\x81"         => "\xc4\x80",
1793
      "\xc3\xbf"         => "\xc5\xb8",
1794
      "\xc3\xbe"         => "\xc3\x9e",
1795
      "\xc3\xbd"         => "\xc3\x9d",
1796
      "\xc3\xbc"         => "\xc3\x9c",
1797
      "\xc3\xbb"         => "\xc3\x9b",
1798
      "\xc3\xba"         => "\xc3\x9a",
1799
      "\xc3\xb9"         => "\xc3\x99",
1800
      "\xc3\xb8"         => "\xc3\x98",
1801
      "\xc3\xb6"         => "\xc3\x96",
1802
      "\xc3\xb5"         => "\xc3\x95",
1803
      "\xc3\xb4"         => "\xc3\x94",
1804
      "\xc3\xb3"         => "\xc3\x93",
1805
      "\xc3\xb2"         => "\xc3\x92",
1806
      "\xc3\xb1"         => "\xc3\x91",
1807
      "\xc3\xb0"         => "\xc3\x90",
1808
      "\xc3\xaf"         => "\xc3\x8f",
1809
      "\xc3\xae"         => "\xc3\x8e",
1810
      "\xc3\xad"         => "\xc3\x8d",
1811
      "\xc3\xac"         => "\xc3\x8c",
1812
      "\xc3\xab"         => "\xc3\x8b",
1813
      "\xc3\xaa"         => "\xc3\x8a",
1814
      "\xc3\xa9"         => "\xc3\x89",
1815
      "\xc3\xa8"         => "\xc3\x88",
1816
      "\xc3\xa7"         => "\xc3\x87",
1817
      "\xc3\xa6"         => "\xc3\x86",
1818
      "\xc3\xa5"         => "\xc3\x85",
1819
      "\xc3\xa4"         => "\xc3\x84",
1820
      "\xc3\xa3"         => "\xc3\x83",
1821
      "\xc3\xa2"         => "\xc3\x82",
1822
      "\xc3\xa1"         => "\xc3\x81",
1823
      "\xc3\xa0"         => "\xc3\x80",
1824
      "\xc2\xb5"         => "\xce\x9c",
1825
      "\x7a"             => "\x5a",
1826
      "\x79"             => "\x59",
1827
      "\x78"             => "\x58",
1828
      "\x77"             => "\x57",
1829
      "\x76"             => "\x56",
1830
      "\x75"             => "\x55",
1831
      "\x74"             => "\x54",
1832
      "\x73"             => "\x53",
1833
      "\x72"             => "\x52",
1834
      "\x71"             => "\x51",
1835
      "\x70"             => "\x50",
1836
      "\x6f"             => "\x4f",
1837
      "\x6e"             => "\x4e",
1838
      "\x6d"             => "\x4d",
1839
      "\x6c"             => "\x4c",
1840
      "\x6b"             => "\x4b",
1841
      "\x6a"             => "\x4a",
1842
      "\x69"             => "\x49",
1843
      "\x68"             => "\x48",
1844
      "\x67"             => "\x47",
1845
      "\x66"             => "\x46",
1846
      "\x65"             => "\x45",
1847
      "\x64"             => "\x44",
1848
      "\x63"             => "\x43",
1849
      "\x62"             => "\x42",
1850
      "\x61"             => "\x41",
1851
1852
    );
1853
1854
    return $case;
1855
  }
1856
1857
  /**
1858
   * check for UTF8-Support
1859
   */
1860
  public static function checkForSupport()
1861 157
  {
1862
    if (!isset(self::$support['mbstring'])) {
1863 157
1864
      self::$support['mbstring'] = self::mbstring_loaded();
1865 1
      self::$support['iconv'] = self::iconv_loaded();
1866 1
      self::$support['intl'] = self::intl_loaded();
1867 1
      self::$support['intlChar'] = self::intlChar_loaded();
1868 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1869 1
    }
1870 157
  }
1871
1872
  /**
1873
   * Generates a UTF-8 encoded character from the given code point.
1874
   *
1875
   * @param    int $code_point The code point for which to generate a character.
1876
   *
1877
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1878
   */
1879 8
  public static function chr($code_point)
1880
  {
1881 8
    self::checkForSupport();
1882
1883 8
    $i = (int)$code_point;
1884
1885
    if (self::$support['intlChar'] === true) {
1886
      return \IntlChar::chr($code_point);
1887
    }
1888
1889
    if ($i !== $code_point) {
1890 8
      $i = (int)self::hex_to_int($code_point);
1891
    }
1892
1893
    if (!$i) {
1894
      return '';
1895
    }
1896
1897
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1898
  }
1899
1900
  /**
1901
   * Applies callback to all characters of a string.
1902 1
   *
1903
   * @param    string $callback The callback function.
1904 1
   * @param    string $str      UTF-8 string to run callback on.
1905
   *
1906 1
   * @return   array The outcome of callback.
1907
   */
1908
1909
  public static function chr_map($callback, $str)
1910
  {
1911
    $chars = self::split($str);
1912
1913
    return array_map($callback, $chars);
1914
  }
1915
1916
  /**
1917
   * Generates an array of byte length of each character of a Unicode string.
1918
   *
1919
   * 1 byte => U+0000  - U+007F
1920
   * 2 byte => U+0080  - U+07FF
1921 2
   * 3 byte => U+0800  - U+FFFF
1922
   * 4 byte => U+10000 - U+10FFFF
1923 2
   *
1924 2
   * @param    string $str The original Unicode string.
1925
   *
1926
   * @return   array An array of byte lengths of each character.
1927 2
   */
1928
  public static function chr_size_list($str)
1929
  {
1930
    if (!$str) {
1931
      return array();
1932
    }
1933
1934
    return array_map('strlen', self::split($str));
1935
  }
1936
1937 2
  /**
1938
   * Get a decimal code representation of a specific character.
1939 2
   *
1940 2
   * @param   string $chr The input character
1941 2
   *
1942
   * @return  int
1943 2
   */
1944
  public static function chr_to_decimal($chr)
1945 2
  {
1946
    $chr = (string)$chr;
1947
    $code = self::ord($chr[0]);
1948 2
    $bytes = 1;
1949
1950 2
    if (!($code & 0x80)) {
1951 2
      // 0xxxxxxx
1952 2
      return $code;
1953
    }
1954 1
1955 1
    if (($code & 0xe0) === 0xc0) {
1956 1
      // 110xxxxx
1957
      $bytes = 2;
1958
      $code &= ~0xc0;
1959
    } elseif (($code & 0xf0) === 0xe0) {
1960
      // 1110xxxx
1961
      $bytes = 3;
1962 2
      $code &= ~0xe0;
1963
    } elseif (($code & 0xf8) === 0xf0) {
1964 2
      // 11110xxx
1965 2
      $bytes = 4;
1966
      $code &= ~0xf0;
1967 2
    }
1968
1969
    for ($i = 2; $i <= $bytes; $i++) {
1970
      // 10xxxxxx
1971
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1972
    }
1973
1974
    return $code;
1975
  }
1976
1977
  /**
1978
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1979
   *
1980
   * @param    string $chr The input character
1981
   * @param    string $pfix
1982
   *
1983
   * @return   string The code point encoded as U+xxxx
1984
   */
1985
  public static function chr_to_hex($chr, $pfix = 'U+')
1986
  {
1987
    return self::int_to_hex(self::ord($chr), $pfix);
1988
  }
1989
1990
  /**
1991
   * Splits a string into smaller chunks and multiple lines, using the specified
1992
   * line ending character.
1993 1
   *
1994
   * @param    string $body     The original string to be split.
1995 1
   * @param    int    $chunklen The maximum character length of a chunk.
1996
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1997
   *
1998
   * @return   string The chunked string
1999
   */
2000
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2001
  {
2002
    return implode($end, self::split($body, $chunklen));
2003
  }
2004
2005
  /**
2006
   * accepts a string and removes all non-UTF-8 characters from it.
2007
   *
2008
   * @param string $str                     The string to be sanitized.
2009 35
   * @param bool   $remove_bom
2010
   * @param bool   $normalize_whitespace
2011
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2012
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2013
   *
2014
   * @return string Clean UTF-8 encoded string
2015
   */
2016
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2017
  {
2018
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2019
    // caused connection reset problem on larger strings
2020
2021
    $regx = '/
2022
      (
2023
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2024 35
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2025 35
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2026
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2027 35
        ){1,100}                      # ...one or more times
2028 35
      )
2029
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2030 35
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2031 7
    /x';
2032 7
    $str = preg_replace($regx, '$1', $str);
2033
2034 35
    $str = self::replace_diamond_question_mark($str, '');
2035 1
    $str = self::remove_invisible_characters($str);
2036 1
2037
    if ($normalize_whitespace === true) {
2038 35
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2039 4
    }
2040 4
2041
    if ($normalize_msword === true) {
2042 35
      $str = self::normalize_msword($str);
2043
    }
2044
2045
    if ($remove_bom === true) {
2046
      $str = self::removeBOM($str);
2047
    }
2048
2049
    return $str;
2050
  }
2051
2052 3
  /**
2053
   * Clean-up a and show only printable UTF-8 chars at the end.
2054 3
   *
2055
   * @param string $str
2056 3
   *
2057 1
   * @return string
2058
   */
2059
  public static function cleanup($str)
2060
  {
2061 3
    $str = (string)$str;
2062
2063
    if (!isset($str[0])) {
2064
      return '';
2065
    }
2066
2067
    // fixed ISO <-> UTF-8 Errors
2068 3
    $str = self::fix_simple_utf8($str);
2069
2070 3
    // remove all none UTF-8 symbols
2071
    // && remove diamond question mark (�)
2072
    // && remove remove invisible characters (e.g. "\0")
2073
    // && remove BOM
2074
    // && normalize whitespace chars (but keep non-breaking-spaces)
2075
    $str = self::clean($str, true, true, false, true);
2076
2077
    return (string)$str;
2078
  }
2079
2080
  /**
2081
   * Accepts a string and returns an array of Unicode code points.
2082 3
   *
2083
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
2084 3
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
2085 3
   *                          default, code points will be returned as integers.
2086 3
   *
2087
   * @return   array The array of code points
2088 3
   */
2089
  public static function codepoints($arg, $u_style = false)
2090 3
  {
2091 3
    if (is_string($arg)) {
2092 3
      $arg = self::split($arg);
2093
    }
2094 3
2095
    $arg = array_map(
2096 3
        array(
2097
            '\\voku\\helper\\UTF8',
2098
            'ord',
2099
        ),
2100
        $arg
2101
    );
2102
2103
    if ($u_style) {
2104
      $arg = array_map(
2105
          array(
2106 3
              '\\voku\\helper\\UTF8',
2107
              'int_to_hex',
2108
          ),
2109
          $arg
2110
      );
2111
    }
2112
2113
    return $arg;
2114
  }
2115
2116
  /**
2117 3
   * Returns count of characters used in a string.
2118
   *
2119 3
   * @param    string $str The input string.
2120
   *
2121 3
   * @return   array An associative array of Character as keys and
2122
   *           their count as values.
2123 3
   */
2124
  public static function count_chars($str) // there is no $mode parameters
2125
  {
2126
    $array = array_count_values(self::split($str));
2127
2128
    ksort($array);
2129
2130
    return $array;
2131
  }
2132
2133 1
  /**
2134
   * Get a UTF-8 character from its decimal code representation.
2135 1
   *
2136
   * @param   int $code Code.
2137 1
   *
2138 1
   * @return  string
2139 1
   */
2140
  public static function decimal_to_chr($code)
2141 1
  {
2142
    self::checkForSupport();
2143
2144
    return \mb_convert_encoding(
2145
        '&#x' . dechex($code) . ';',
2146
        'UTF-8',
2147
        'HTML-ENTITIES'
2148
    );
2149
  }
2150
2151
  /**
2152
   * encode a string
2153
   *
2154
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2155 11
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2156
   *
2157 11
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2158
   * @param string $str      the string
2159 11
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2160 11
   *                         otherwise we auto-detect the current string-encoding
2161
   *
2162
   * @return string
2163 1
   */
2164 1
  public static function encode($encoding, $str, $force = true)
2165
  {
2166
    $str = (string)$str;
2167
    $encoding = (string)$encoding;
2168
2169
    if (!isset($str[0], $encoding[0])) {
2170
      return $str;
2171
    }
2172
2173
    $encoding = self::normalizeEncoding($encoding);
2174
    $encodingDetected = self::str_detect_encoding($str);
2175
2176
    if (
2177
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2178
        &&
2179
        (
2180
            $force === true
2181
            ||
2182
            $encodingDetected !== $encoding
2183
        )
2184
    ) {
2185
      self::checkForSupport();
2186
2187 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2188
          $encoding === 'UTF-8'
2189
          &&
2190
          (
2191
              $force === true
2192
              || $encodingDetected === 'UTF-8'
2193
              || $encodingDetected === 'WINDOWS-1252'
2194
              || $encodingDetected === 'ISO-8859-1'
2195
          )
2196
      ) {
2197
        return self::to_utf8($str);
2198
      }
2199
2200 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2201
          $encoding === 'ISO-8859-1'
2202
          &&
2203
          (
2204
              $force === true
2205
              || $encodingDetected === 'ISO-8859-1'
2206
              || $encodingDetected === 'UTF-8'
2207
          )
2208
      ) {
2209
        return self::to_win1252($str);
2210
      }
2211
2212
      $strEncoded = \mb_convert_encoding(
2213
          $str,
2214
          $encoding,
2215
          $encodingDetected
2216
      );
2217
2218
      if ($strEncoded) {
2219
        return $strEncoded;
2220
      }
2221
    }
2222
2223
    return $str;
2224
  }
2225
2226
  /**
2227
   * Callback function for preg_replace_callback use.
2228
   *
2229
   * @param  array $matches PREG matches
2230
   *
2231
   * @return string
2232
   */
2233
  protected static function entityCallback($matches)
2234
  {
2235
    self::checkForSupport();
2236
2237
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2238
2239
    if ($return === "'") {
2240
      return '&#x27;';
2241
    }
2242
2243
    return $return;
2244
  }
2245
2246
  /**
2247
   * Reads entire file into a string.
2248
   *
2249
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2250
   *
2251
   * @link http://php.net/manual/en/function.file-get-contents.php
2252 2
   *
2253
   * @param string   $filename      <p>
2254
   *                                Name of the file to read.
2255 2
   *                                </p>
2256 2
   * @param int      $flags         [optional] <p>
2257
   *                                Prior to PHP 6, this parameter is called
2258 2
   *                                use_include_path and is a bool.
2259 2
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2260
   *                                to trigger include path
2261
   *                                search.
2262
   *                                </p>
2263 2
   *                                <p>
2264 2
   *                                The value of flags can be any combination of
2265
   *                                the following flags (with some restrictions), joined with the
2266 2
   *                                binary OR (|)
2267 2
   *                                operator.
2268
   *                                </p>
2269 2
   *                                <p>
2270 1
   *                                <table>
2271 1
   *                                Available flags
2272 2
   *                                <tr valign="top">
2273
   *                                <td>Flag</td>
2274
   *                                <td>Description</td>
2275
   *                                </tr>
2276 2
   *                                <tr valign="top">
2277
   *                                <td>
2278
   *                                FILE_USE_INCLUDE_PATH
2279
   *                                </td>
2280 2
   *                                <td>
2281 2
   *                                Search for filename in the include directory.
2282
   *                                See include_path for more
2283 2
   *                                information.
2284
   *                                </td>
2285 2
   *                                </tr>
2286 1
   *                                <tr valign="top">
2287 1
   *                                <td>
2288 1
   *                                FILE_TEXT
2289 1
   *                                </td>
2290 1
   *                                <td>
2291 1
   *                                As of PHP 6, the default encoding of the read
2292
   *                                data is UTF-8. You can specify a different encoding by creating a
2293 2
   *                                custom context or by changing the default using
2294 2
   *                                stream_default_encoding. This flag cannot be
2295 2
   *                                used with FILE_BINARY.
2296 2
   *                                </td>
2297
   *                                </tr>
2298
   *                                <tr valign="top">
2299 2
   *                                <td>
2300
   *                                FILE_BINARY
2301
   *                                </td>
2302
   *                                <td>
2303
   *                                With this flag, the file is read in binary mode. This is the default
2304
   *                                setting and cannot be used with FILE_TEXT.
2305
   *                                </td>
2306
   *                                </tr>
2307
   *                                </table>
2308
   *                                </p>
2309 1
   * @param resource $context       [optional] <p>
2310
   *                                A valid context resource created with
2311 1
   *                                stream_context_create. If you don't need to use a
2312
   *                                custom context, you can skip this parameter by &null;.
2313
   *                                </p>
2314
   * @param int      $offset        [optional] <p>
2315
   *                                The offset where the reading starts.
2316
   *                                </p>
2317
   * @param int      $maxlen        [optional] <p>
2318
   *                                Maximum length of data read. The default is to read until end
2319
   *                                of file is reached.
2320
   *                                </p>
2321
   * @param int      $timeout
2322
   *
2323 7
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2324
   *                                default utf-8 chars
2325 7
   *
2326 7
   * @return string The function returns the read data or false on failure.
2327 2
   */
2328
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2329 1
  {
2330 2
    // init
2331 2
    $timeout = (int)$timeout;
2332 7
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2333 1
2334 1
    if ($timeout && $context === null) {
2335 1
      $context = stream_context_create(
2336 1
          array(
2337 7
              'http' =>
2338 7
                  array(
2339
                      'timeout' => $timeout,
2340
                  ),
2341
          )
2342 7
      );
2343 7
    }
2344 1
2345 1
    if (is_int($maxlen)) {
2346 7
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2347
    } else {
2348 7
      $data = file_get_contents($filename, $flags, $context, $offset);
2349 5
    }
2350 5
2351 4
    // return false on error
2352
    if ($data === false) {
2353
      return false;
2354
    }
2355 7
2356
    if ($convertToUtf8 === true) {
2357
      self::checkForSupport();
2358
2359
      $data = self::encode('UTF-8', $data, false);
2360 7
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2361 7
    }
2362 7
2363
    // clean utf-8 string
2364 7
    return $data;
2365
  }
2366
2367
  /**
2368
   * Checks if a file starts with BOM character.
2369
   *
2370
   * @param    string $file_path Path to a valid file.
2371
   *
2372
   * @return   bool True if the file has BOM at the start, False otherwise.
2373
   */
2374
  public static function file_has_bom($file_path)
2375
  {
2376
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
2377
  }
2378
2379
  /**
2380
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2381
   *
2382
   * @param mixed  $var
2383
   * @param int    $normalization_form
2384
   * @param string $leading_combining
2385
   *
2386
   * @return mixed
2387
   */
2388
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2389
  {
2390
    switch (gettype($var)) {
2391 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2392
        foreach ($var as $k => $v) {
2393
          /** @noinspection AlterInForeachInspection */
2394
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2395
        }
2396
        break;
2397 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2398
        foreach ($var as $k => $v) {
2399
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2400
        }
2401
        break;
2402
      case 'string':
2403 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2404
          // Workaround https://bugs.php.net/65732
2405
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2406
        }
2407 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2408
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2409
            $n = '-';
2410
          } else {
2411
            $n = \Normalizer::normalize($var, $normalization_form);
2412
2413
            if (isset($n[0])) {
2414
              $var = $n;
2415
            } else {
2416
              $var = self::encode('UTF-8', $var);
2417 1
            }
2418
2419 1
          }
2420 1
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2421 1
            // Prevent leading combining chars
2422 1
            // for NFC-safe concatenations.
2423
            $var = $leading_combining . $var;
2424
          }
2425 1
        }
2426
        break;
2427
    }
2428
2429
    return $var;
2430
  }
2431
2432
  /**
2433
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2434
   *
2435
   * @param int    $type
2436
   * @param string $var
2437 1
   * @param int    $filter
2438
   * @param mixed  $option
2439 1
   *
2440 1
   * @return mixed
2441 1
   */
2442 1 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2443
  {
2444
    if (4 > func_num_args()) {
2445 1
      $var = filter_input($type, $var, $filter);
2446
    } else {
2447
      $var = filter_input($type, $var, $filter, $option);
2448
    }
2449
2450
    return self::filter($var);
2451
  }
2452
2453
  /**
2454
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2455
   *
2456
   * @param int   $type
2457 1
   * @param mixed $definition
2458
   * @param bool  $add_empty
2459 1
   *
2460
   * @return mixed
2461
   */
2462 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2463
  {
2464
    if (2 > func_num_args()) {
2465
      $a = filter_input_array($type);
2466
    } else {
2467
      $a = filter_input_array($type, $definition, $add_empty);
2468
    }
2469 8
2470
    return self::filter($a);
2471 8
  }
2472 8
2473
  /**
2474 8
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2475
   *
2476 8
   * @param mixed $var
2477 2
   * @param int   $filter
2478
   * @param mixed $option
2479
   *
2480 8
   * @return mixed
2481 1
   */
2482 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2483 1
  {
2484
    if (3 > func_num_args()) {
2485 8
      $var = filter_var($var, $filter);
2486
    } else {
2487
      $var = filter_var($var, $filter, $option);
2488
    }
2489
2490
    return self::filter($var);
2491
  }
2492
2493
  /**
2494
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2495 1
   *
2496
   * @param array $data
2497 1
   * @param mixed $definition
2498
   * @param bool  $add_empty
2499
   *
2500
   * @return mixed
2501
   */
2502 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2503
  {
2504
    if (2 > func_num_args()) {
2505
      $a = filter_var_array($data);
2506
    } else {
2507 1
      $a = filter_var_array($data, $definition, $add_empty);
2508 1
    }
2509 1
2510 1
    return self::filter($a);
2511 1
  }
2512
2513 1
  /**
2514
   * Checks if the number of Unicode characters in a string are not
2515
   * more than the specified integer.
2516
   *
2517
   * @param    string $str      The original string to be checked.
2518
   * @param    int    $box_size The size in number of chars to be checked against string.
2519
   *
2520
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2521
   */
2522
  public static function fits_inside($str, $box_size)
2523 1
  {
2524
    return (self::strlen($str) <= $box_size);
2525 1
  }
2526
2527 1
  /**
2528 1
   * Fixing a broken UTF-8 string.
2529
   *
2530
   * @param string $str
2531 1
   *
2532
   * @return string
2533 1
   */
2534 1
  public static function fix_simple_utf8($str)
2535 1
  {
2536 1
    static $brokenUtf8ToUtf8Keys = null;
2537 1
    static $brokenUtf8ToUtf8Values = null;
2538 1
2539 1
    $str = (string)$str;
2540 1
2541 1
    if (!isset($str[0])) {
2542 1
      return '';
2543 1
    }
2544
2545
    if ($brokenUtf8ToUtf8Keys === null) {
2546
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2547
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2548
    }
2549
2550
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2551
  }
2552
2553
  /**
2554
   * Fix a double (or multiple) encoded UTF8 string.
2555
   *
2556
   * @param array|string $str
2557
   *
2558
   * @return string
2559
   */
2560
  public static function fix_utf8($str)
2561
  {
2562
    if (is_array($str)) {
2563 1
2564 1
      foreach ($str as $k => $v) {
2565
        /** @noinspection AlterInForeachInspection */
2566
        $str[$k] = self::fix_utf8($v);
2567
      }
2568
2569
      return $str;
2570
    }
2571
2572
    $last = '';
2573
    while ($last !== $str) {
2574
      $last = $str;
2575
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2575 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2576
    }
2577
2578
    return $str;
2579
  }
2580
2581
  /**
2582
   * Get character of a specific character.
2583
   *
2584
   * @param   string $char Character.
2585
   *
2586
   * @return  string 'RTL' or 'LTR'
2587
   */
2588
  public static function getCharDirection($char)
2589
  {
2590
    // init
2591
    self::checkForSupport();
2592
2593
    if (self::$support['intlChar'] === true) {
2594
      $tmpReturn = \IntlChar::charDirection($char);
2595
2596
      // from "IntlChar"-Class
2597
      $charDirection = array(
2598
          'RTL' => array(1, 13, 14, 15, 21),
2599
          'LTR' => array(0, 11, 12, 20),
2600
      );
2601
2602
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2603
        return 'LTR';
2604
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2605
        return 'RTL';
2606
      }
2607
    }
2608
2609
    $c = static::chr_to_decimal($char);
2610
2611
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2612
      return 'LTR';
2613
    }
2614
2615
    if (0x85e >= $c) {
2616
2617
      if (0x5be === $c ||
2618
          0x5c0 === $c ||
2619
          0x5c3 === $c ||
2620
          0x5c6 === $c ||
2621
          (0x5d0 <= $c && 0x5ea >= $c) ||
2622
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2623 2
          0x608 === $c ||
2624
          0x60b === $c ||
2625 2
          0x60d === $c ||
2626 2
          0x61b === $c ||
2627 2
          (0x61e <= $c && 0x64a >= $c) ||
2628
          (0x66d <= $c && 0x66f >= $c) ||
2629
          (0x671 <= $c && 0x6d5 >= $c) ||
2630
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2631
          (0x6ee <= $c && 0x6ef >= $c) ||
2632
          (0x6fa <= $c && 0x70d >= $c) ||
2633
          0x710 === $c ||
2634
          (0x712 <= $c && 0x72f >= $c) ||
2635
          (0x74d <= $c && 0x7a5 >= $c) ||
2636
          0x7b1 === $c ||
2637
          (0x7c0 <= $c && 0x7ea >= $c) ||
2638
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2639
          0x7fa === $c ||
2640 1
          (0x800 <= $c && 0x815 >= $c) ||
2641
          0x81a === $c ||
2642 1
          0x824 === $c ||
2643 1
          0x828 === $c ||
2644
          (0x830 <= $c && 0x83e >= $c) ||
2645 1
          (0x840 <= $c && 0x858 >= $c) ||
2646 1
          0x85e === $c
2647
      ) {
2648
        return 'RTL';
2649
      }
2650 1
2651
    } elseif (0x200f === $c) {
2652 1
2653 1
      return 'RTL';
2654 1
2655
    } elseif (0xfb1d <= $c) {
2656 1
2657 1
      if (0xfb1d === $c ||
2658 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2659 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2660 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2661
          0xfb3e === $c ||
2662 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2663
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2664 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2665 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2666
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2667
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2668
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2669 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2670 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
2671
          (0x10800 <= $c && 0x10805 >= $c) ||
2672 1
          0x10808 === $c ||
2673
          (0x1080a <= $c && 0x10835 >= $c) ||
2674 1
          (0x10837 <= $c && 0x10838 >= $c) ||
2675 1
          0x1083c === $c ||
2676 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2677
          (0x10857 <= $c && 0x1085f >= $c) ||
2678 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2679
          (0x10920 <= $c && 0x10939 >= $c) ||
2680
          0x1093f === $c ||
2681
          0x10a00 === $c ||
2682
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2683
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2684
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2685
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2686
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2687
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2688
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2689
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2690
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2691
          (0x10b78 <= $c && 0x10b7f >= $c)
2692
      ) {
2693
        return 'RTL';
2694
      }
2695
    }
2696
2697
    return 'LTR';
2698
  }
2699
2700
  /**
2701
   * get data from "/data/*.ser"
2702
   *
2703
   * @param string $file
2704
   *
2705
   * @return bool|string|array|int false on error
2706
   */
2707
  protected static function getData($file)
2708 1
  {
2709
    $file = __DIR__ . '/data/' . $file . '.php';
2710 1
    if (file_exists($file)) {
2711 1
      /** @noinspection PhpIncludeInspection */
2712
      return require $file;
2713 1
    } else {
2714 1
      return false;
2715 1
    }
2716 1
  }
2717 1
2718 1
  /**
2719
   * Creates a random string of UTF-8 characters.
2720
   *
2721
   * @param    int $len The length of string in characters.
2722
   *
2723
   * @return   string String consisting of random characters.
2724
   */
2725
  public static function hash($len = 8)
2726
  {
2727
    static $chars = array();
2728
    static $chars_len = null;
2729
2730
    if ($len <= 0) {
2731
      return '';
2732
    }
2733
2734
    // init
2735
    self::checkForSupport();
2736
2737
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2738
      if (self::$support['pcre_utf8'] === true) {
2739
        $chars = array_map(
2740
            array(
2741
                '\\voku\\helper\\UTF8',
2742
                'chr',
2743
            ),
2744
            range(48, 79)
2745
        );
2746
2747
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2748
2749
        $chars = array_values(array_filter($chars));
2750
      } else {
2751
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2752
      }
2753
2754
      $chars_len = count($chars);
2755
    }
2756
2757
    $hash = '';
2758
2759
    for (; $len; --$len) {
2760
      $hash .= $chars[mt_rand() % $chars_len];
2761
    }
2762
2763
    return $hash;
2764
  }
2765
2766
  /**
2767
   * Converts hexadecimal U+xxxx code point representation to Integer.
2768
   *
2769
   * INFO: opposite to UTF8::int_to_hex( )
2770
   *
2771
   * @param    string $str The hexadecimal code point representation.
2772
   *
2773
   * @return   int The code point, or 0 on failure.
2774
   */
2775
  public static function hex_to_int($str)
2776
  {
2777
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2778
      return intval($match[1], 16);
2779
    }
2780
2781
    return 0;
2782
  }
2783
2784
  /**
2785
   * Converts a UTF-8 string to a series of HTML numbered entities.
2786
   *
2787
   * e.g.: &#123;&#39;&#1740;
2788
   *
2789
   * @param  string $str The Unicode string to be encoded as numbered entities.
2790 15
   *
2791
   * @return string HTML numbered entities.
2792 15
   */
2793
  public static function html_encode($str)
2794 15
  {
2795 3
    return implode(
2796
        array_map(
2797
            array(
2798 15
                '\\voku\\helper\\UTF8',
2799 4
                'single_chr_html_encode',
2800
            ),
2801
            self::split($str)
2802 15
        )
2803 3
    );
2804 3
  }
2805 3
2806
  /**
2807
   * UTF-8 version of html_entity_decode()
2808 3
   *
2809
   * The reason we are not using html_entity_decode() by itself is because
2810
   * while it is not technically correct to leave out the semicolon
2811 15
   * at the end of an entity most browsers will still interpret the entity
2812
   * correctly. html_entity_decode() does not convert entities without
2813 15
   * semicolons, so we are left with our own little solution here. Bummer.
2814
   *
2815
   * Convert all HTML entities to their applicable characters
2816 15
   *
2817 15
   * @link http://php.net/manual/en/function.html-entity-decode.php
2818 15
   *
2819
   * @param string $str      <p>
2820 15
   *                         The input string.
2821
   *                         </p>
2822 15
   * @param int    $flags    [optional] <p>
2823
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2824 15
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2825
   *                         <table>
2826
   *                         Available <i>flags</i> constants
2827
   *                         <tr valign="top">
2828
   *                         <td>Constant Name</td>
2829
   *                         <td>Description</td>
2830
   *                         </tr>
2831
   *                         <tr valign="top">
2832
   *                         <td><b>ENT_COMPAT</b></td>
2833
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2834 12
   *                         </tr>
2835
   *                         <tr valign="top">
2836 12
   *                         <td><b>ENT_QUOTES</b></td>
2837
   *                         <td>Will convert both double and single quotes.</td>
2838 12
   *                         </tr>
2839
   *                         <tr valign="top">
2840 12
   *                         <td><b>ENT_NOQUOTES</b></td>
2841 5
   *                         <td>Will leave both double and single quotes unconverted.</td>
2842
   *                         </tr>
2843
   *                         <tr valign="top">
2844 11
   *                         <td><b>ENT_HTML401</b></td>
2845
   *                         <td>
2846
   *                         Handle code as HTML 4.01.
2847
   *                         </td>
2848
   *                         </tr>
2849
   *                         <tr valign="top">
2850
   *                         <td><b>ENT_XML1</b></td>
2851
   *                         <td>
2852
   *                         Handle code as XML 1.
2853
   *                         </td>
2854
   *                         </tr>
2855
   *                         <tr valign="top">
2856
   *                         <td><b>ENT_XHTML</b></td>
2857
   *                         <td>
2858
   *                         Handle code as XHTML.
2859
   *                         </td>
2860
   *                         </tr>
2861
   *                         <tr valign="top">
2862
   *                         <td><b>ENT_HTML5</b></td>
2863
   *                         <td>
2864
   *                         Handle code as HTML 5.
2865
   *                         </td>
2866
   *                         </tr>
2867
   *                         </table>
2868
   *                         </p>
2869
   * @param string $encoding [optional] <p>
2870
   *                         Encoding to use.
2871
   *                         </p>
2872
   *
2873
   * @return string the decoded string.
2874
   */
2875
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2876
  {
2877
    $str = (string)$str;
2878
2879
    if (!isset($str[0])) {
2880
      return '';
2881
    }
2882
2883
    if (strpos($str, '&') === false) {
2884
      return $str;
2885
    }
2886
2887
    if ($flags === null) {
2888
      if (Bootup::is_php('5.4') === true) {
2889
        $flags = ENT_COMPAT | ENT_HTML5;
2890
      } else {
2891
        $flags = ENT_COMPAT;
2892
      }
2893
    }
2894
2895
    do {
2896
      $str_compare = $str;
2897
2898
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2899
2900
      // decode numeric & UTF16 two byte entities
2901
      $str = html_entity_decode(
2902
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2903
          $flags,
2904
          $encoding
2905
      );
2906
2907
    } while ($str_compare !== $str);
2908
2909
    return $str;
2910
  }
2911
2912
  /**
2913
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2914
   *
2915
   * @link http://php.net/manual/en/function.htmlentities.php
2916
   *
2917
   * @param string $str           <p>
2918
   *                              The input string.
2919
   *                              </p>
2920
   * @param int    $flags         [optional] <p>
2921
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2922
   *                              invalid code unit sequences and the used document type. The default is
2923
   *                              ENT_COMPAT | ENT_HTML401.
2924
   *                              <table>
2925
   *                              Available <i>flags</i> constants
2926
   *                              <tr valign="top">
2927
   *                              <td>Constant Name</td>
2928
   *                              <td>Description</td>
2929
   *                              </tr>
2930
   *                              <tr valign="top">
2931
   *                              <td><b>ENT_COMPAT</b></td>
2932
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2933
   *                              </tr>
2934
   *                              <tr valign="top">
2935
   *                              <td><b>ENT_QUOTES</b></td>
2936
   *                              <td>Will convert both double and single quotes.</td>
2937
   *                              </tr>
2938
   *                              <tr valign="top">
2939
   *                              <td><b>ENT_NOQUOTES</b></td>
2940
   *                              <td>Will leave both double and single quotes unconverted.</td>
2941
   *                              </tr>
2942
   *                              <tr valign="top">
2943
   *                              <td><b>ENT_IGNORE</b></td>
2944
   *                              <td>
2945
   *                              Silently discard invalid code unit sequences instead of returning
2946
   *                              an empty string. Using this flag is discouraged as it
2947
   *                              may have security implications.
2948
   *                              </td>
2949
   *                              </tr>
2950 2
   *                              <tr valign="top">
2951
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2952 2
   *                              <td>
2953
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2954
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2955
   *                              </td>
2956
   *                              </tr>
2957
   *                              <tr valign="top">
2958
   *                              <td><b>ENT_DISALLOWED</b></td>
2959
   *                              <td>
2960
   *                              Replace invalid code points for the given document type with a
2961
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2962
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2963
   *                              instance, to ensure the well-formedness of XML documents with
2964
   *                              embedded external content.
2965
   *                              </td>
2966
   *                              </tr>
2967
   *                              <tr valign="top">
2968
   *                              <td><b>ENT_HTML401</b></td>
2969
   *                              <td>
2970
   *                              Handle code as HTML 4.01.
2971
   *                              </td>
2972
   *                              </tr>
2973
   *                              <tr valign="top">
2974
   *                              <td><b>ENT_XML1</b></td>
2975
   *                              <td>
2976
   *                              Handle code as XML 1.
2977
   *                              </td>
2978
   *                              </tr>
2979
   *                              <tr valign="top">
2980
   *                              <td><b>ENT_XHTML</b></td>
2981
   *                              <td>
2982
   *                              Handle code as XHTML.
2983
   *                              </td>
2984
   *                              </tr>
2985
   *                              <tr valign="top">
2986
   *                              <td><b>ENT_HTML5</b></td>
2987
   *                              <td>
2988
   *                              Handle code as HTML 5.
2989
   *                              </td>
2990
   *                              </tr>
2991
   *                              </table>
2992
   *                              </p>
2993
   * @param string $encoding      [optional] <p>
2994
   *                              Like <b>htmlspecialchars</b>,
2995
   *                              <b>htmlentities</b> takes an optional third argument
2996
   *                              <i>encoding</i> which defines encoding used in
2997
   *                              conversion.
2998
   *                              Although this argument is technically optional, you are highly
2999
   *                              encouraged to specify the correct value for your code.
3000
   *                              </p>
3001
   * @param bool   $double_encode [optional] <p>
3002
   *                              When <i>double_encode</i> is turned off PHP will not
3003
   *                              encode existing html entities. The default is to convert everything.
3004
   *                              </p>
3005
   *
3006
   *
3007
   * @return string the encoded string.
3008
   * </p>
3009
   * <p>
3010
   * If the input <i>string</i> contains an invalid code unit
3011
   * sequence within the given <i>encoding</i> an empty string
3012
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3013
   * <b>ENT_SUBSTITUTE</b> flags are set.
3014
   */
3015
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3016
  {
3017
    return htmlentities($str, $flags, $encoding, $double_encode);
3018
  }
3019
3020
  /**
3021
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
3022
   *
3023
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3024
   *
3025
   * @param string $str           <p>
3026
   *                              The string being converted.
3027
   *                              </p>
3028
   * @param int    $flags         [optional] <p>
3029
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3030
   *                              invalid code unit sequences and the used document type. The default is
3031
   *                              ENT_COMPAT | ENT_HTML401.
3032
   *                              <table>
3033
   *                              Available <i>flags</i> constants
3034
   *                              <tr valign="top">
3035
   *                              <td>Constant Name</td>
3036
   *                              <td>Description</td>
3037
   *                              </tr>
3038
   *                              <tr valign="top">
3039
   *                              <td><b>ENT_COMPAT</b></td>
3040
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3041
   *                              </tr>
3042
   *                              <tr valign="top">
3043
   *                              <td><b>ENT_QUOTES</b></td>
3044
   *                              <td>Will convert both double and single quotes.</td>
3045
   *                              </tr>
3046
   *                              <tr valign="top">
3047
   *                              <td><b>ENT_NOQUOTES</b></td>
3048
   *                              <td>Will leave both double and single quotes unconverted.</td>
3049
   *                              </tr>
3050
   *                              <tr valign="top">
3051
   *                              <td><b>ENT_IGNORE</b></td>
3052
   *                              <td>
3053
   *                              Silently discard invalid code unit sequences instead of returning
3054
   *                              an empty string. Using this flag is discouraged as it
3055
   *                              may have security implications.
3056
   *                              </td>
3057
   *                              </tr>
3058
   *                              <tr valign="top">
3059
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3060
   *                              <td>
3061
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3062 1
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3063
   *                              </td>
3064 1
   *                              </tr>
3065
   *                              <tr valign="top">
3066
   *                              <td><b>ENT_DISALLOWED</b></td>
3067
   *                              <td>
3068
   *                              Replace invalid code points for the given document type with a
3069
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3070
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3071
   *                              instance, to ensure the well-formedness of XML documents with
3072 1
   *                              embedded external content.
3073
   *                              </td>
3074 1
   *                              </tr>
3075
   *                              <tr valign="top">
3076
   *                              <td><b>ENT_HTML401</b></td>
3077
   *                              <td>
3078
   *                              Handle code as HTML 4.01.
3079
   *                              </td>
3080
   *                              </tr>
3081
   *                              <tr valign="top">
3082
   *                              <td><b>ENT_XML1</b></td>
3083
   *                              <td>
3084
   *                              Handle code as XML 1.
3085
   *                              </td>
3086
   *                              </tr>
3087
   *                              <tr valign="top">
3088
   *                              <td><b>ENT_XHTML</b></td>
3089
   *                              <td>
3090
   *                              Handle code as XHTML.
3091
   *                              </td>
3092
   *                              </tr>
3093
   *                              <tr valign="top">
3094
   *                              <td><b>ENT_HTML5</b></td>
3095
   *                              <td>
3096
   *                              Handle code as HTML 5.
3097
   *                              </td>
3098
   *                              </tr>
3099
   *                              </table>
3100
   *                              </p>
3101
   * @param string $encoding      [optional] <p>
3102
   *                              Defines encoding used in conversion.
3103 1
   *                              </p>
3104
   *                              <p>
3105 1
   *                              For the purposes of this function, the encodings
3106
   *                              ISO-8859-1, ISO-8859-15,
3107
   *                              UTF-8, cp866,
3108
   *                              cp1251, cp1252, and
3109
   *                              KOI8-R are effectively equivalent, provided the
3110
   *                              <i>string</i> itself is valid for the encoding, as
3111
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3112
   *                              the same positions in all of these encodings.
3113
   *                              </p>
3114
   * @param bool   $double_encode [optional] <p>
3115 1
   *                              When <i>double_encode</i> is turned off PHP will not
3116
   *                              encode existing html entities, the default is to convert everything.
3117 1
   *                              </p>
3118
   *
3119
   * @return string The converted string.
3120
   * </p>
3121
   * <p>
3122
   * If the input <i>string</i> contains an invalid code unit
3123
   * sequence within the given <i>encoding</i> an empty string
3124
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3125
   * <b>ENT_SUBSTITUTE</b> flags are set.
3126
   */
3127 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3128
  {
3129 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3130
  }
3131
3132
  /**
3133
   * checks whether iconv is available on the server
3134
   *
3135
   * @return   bool True if available, False otherwise
3136
   */
3137
  public static function iconv_loaded()
3138
  {
3139
    return extension_loaded('iconv') ? true : false;
3140
  }
3141
3142
  /**
3143
   * Converts Integer to hexadecimal U+xxxx code point representation.
3144
   *
3145
   * @param    int    $int The integer to be converted to hexadecimal code point.
3146
   * @param    string $pfix
3147
   *
3148
   * @return   string The code point, or empty string on failure.
3149
   */
3150
  public static function int_to_hex($int, $pfix = 'U+')
3151
  {
3152
    if (ctype_digit((string)$int)) {
3153
      $hex = dechex((int)$int);
3154
3155
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3156
3157
      return $pfix . $hex;
3158
    }
3159
3160
    return '';
3161
  }
3162
3163
  /**
3164
   * checks whether intl is available on the server
3165
   *
3166
   * @return   bool True if available, False otherwise
3167
   */
3168
  public static function intl_loaded()
3169
  {
3170
    return extension_loaded('intl') ? true : false;
3171
  }
3172
3173
  /**
3174
   * checks whether intl-char is available on the server
3175
   *
3176
   * @return   bool True if available, False otherwise
3177
   */
3178
  public static function intlChar_loaded()
3179 16
  {
3180
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3181 16
  }
3182
3183
  /**
3184
   * alias for "UTF8::is_ascii()"
3185
   *
3186
   * @param string $str
3187
   *
3188
   * @return boolean
3189
   */
3190
  public static function isAscii($str)
3191
  {
3192 4
    return self::is_ascii($str);
3193
  }
3194 4
3195
  /**
3196
   * alias for "UTF8::is_base64"
3197
   *
3198
   * @param string $str
3199
   *
3200
   * @return bool
3201
   */
3202
  public static function isBase64($str)
3203
  {
3204 1
    return self::is_base64($str);
3205
  }
3206 1
3207
  /**
3208 1
   * alias for "UTF8::is_bom"
3209 1
   *
3210
   * @param string $utf8_chr
3211
   *
3212 1
   * @return boolean
3213 1
   */
3214
  public static function isBom($utf8_chr)
3215 1
  {
3216
    return self::is_bom($utf8_chr);
3217
  }
3218
3219
  /**
3220
   * Try to check if a string is a json-string...
3221
   *
3222
   * @param $str
3223
   *
3224
   * @return bool
3225
   */
3226 4
  public static function isJson($str)
3227
  {
3228
    $str = (string)$str;
3229 4
3230
    if (!isset($str[0])) {
3231
      return false;
3232 4
    }
3233
3234 4
    if (
3235 4
        is_object(json_decode($str))
3236 4
        &&
3237 4
        json_last_error() === JSON_ERROR_NONE
3238 3
    ) {
3239
      return true;
3240 4
    } else {
3241
      return false;
3242
    }
3243
  }
3244
3245
  /**
3246
   * check if string contains any html-tags <lall>
3247
   *
3248
   * @param string $str
3249
   *
3250
   * @return boolean
3251
   */
3252
  public static function isHtml($str)
3253
  {
3254
    $str = (string)$str;
3255
3256
    if (!isset($str[0])) {
3257
      return false;
3258
    }
3259
3260
    // init
3261
    $matches = array();
3262
3263
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3264
3265
    if (count($matches) == 0) {
3266
      return false;
3267
    } else {
3268
      return true;
3269
    }
3270
  }
3271
3272
  /**
3273 2
   * alias for "UTF8::is_utf8"
3274
   *
3275 2
   * @param string $str
3276
   *
3277
   * @return bool
3278
   */
3279
  public static function isUtf8($str)
3280
  {
3281
    return self::is_utf8($str);
3282
  }
3283
3284
  /**
3285 2
   * Checks if a string is 7 bit ASCII.
3286
   *
3287 2
   * @param    string $str The string to check.
3288 2
   *
3289
   * @return   bool <strong>true</strong> if it is ASCII<br />
3290 2
   *                <strong>false</strong> otherwise
3291 2
   */
3292 2
  public static function is_ascii($str)
3293 2
  {
3294 2
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3295 2
  }
3296 2
3297 2
  /**
3298 2
   * Returns true if the string is base64 encoded, false otherwise.
3299 1
   *
3300 1
   * @param string $str
3301 2
   *
3302 2
   * @return bool Whether or not $str is base64 encoded
3303 2
   */
3304
  public static function is_base64($str)
3305 2
  {
3306 2
    $str = (string)$str;
3307 2
3308 2
    if (!isset($str[0])) {
3309 2
      return false;
3310 2
    }
3311 2
3312 2
    if (base64_encode(base64_decode($str, true)) === $str) {
3313 2
      return true;
3314 1
    } else {
3315 1
      return false;
3316 2
    }
3317 2
  }
3318 2
3319
  /**
3320 2
   * Check if the input is binary... (is look like a hack)
3321 1
   *
3322 1
   * @param string $input
3323
   *
3324 1
   * @return bool
3325
   */
3326
  public static function is_binary($input)
3327
  {
3328 2
3329
    $testLength = strlen($input);
3330 2
3331
    if (
3332
        preg_match('~^[01]+$~', $input)
3333
        ||
3334
        substr_count($input, "\x00") > 0
3335
        ||
3336
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3337
    ) {
3338
      return true;
3339
    } else {
3340 2
      return false;
3341
    }
3342 2
  }
3343 2
3344
  /**
3345 2
   * Check if the file is binary.
3346 2
   *
3347 2
   * @param string $file
3348 2
   *
3349 2
   * @return boolean
3350 2
   */
3351 2
  public static function is_binary_file($file)
3352 2
  {
3353 2
    try {
3354
      $fp = fopen($file, 'r');
3355
      $block = fread($fp, 512);
3356 2
      fclose($fp);
3357 2
    } catch (\Exception $e) {
3358 2
      $block = '';
3359
    }
3360 2
3361 2
    return self::is_binary($block);
3362 2
  }
3363 1
3364 1
  /**
3365 1
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3366 1
   *
3367 1
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3368 1
   *
3369
   * @param    string $utf8_chr The input string.
3370
   *
3371 1
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3372 1
   */
3373 1
  public static function is_bom($utf8_chr)
3374
  {
3375 2
    return ($utf8_chr === self::bom());
3376
  }
3377
3378
  /**
3379
   * Check if the string is UTF-16.
3380
   *
3381
   * @param string $str
3382
   *
3383 2
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3384
   */
3385 2 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3386
  {
3387
    if (self::is_binary($str)) {
3388
      self::checkForSupport();
3389
3390
      $maybeUTF16LE = 0;
3391
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3392
      if ($test !== false && strlen($test) > 1) {
3393
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3394
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3395
        if ($test3 === $test) {
3396
          $strChars = self::count_chars($str);
3397 34
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3398
            if (in_array($test3char, $strChars, true) === true) {
3399 34
              $maybeUTF16LE++;
3400
            }
3401 34
          }
3402 3
        }
3403
      }
3404
3405 32
      $maybeUTF16BE = 0;
3406
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3407
      if ($test !== false && strlen($test) > 1) {
3408
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3409
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3410
        if ($test3 === $test) {
3411
          $strChars = self::count_chars($str);
3412
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3413
            if (in_array($test3char, $strChars, true) === true) {
3414
              $maybeUTF16BE++;
3415 32
            }
3416
          }
3417 32
        }
3418 32
      }
3419 32
3420
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3421
        if ($maybeUTF16LE > $maybeUTF16BE) {
3422 32
          return 1;
3423 32
        } else {
3424 32
          return 2;
3425
        }
3426
      }
3427 32
3428
    }
3429 30
3430 32
    return false;
3431
  }
3432 28
3433 28
  /**
3434 28
   * Check if the string is UTF-32.
3435 28
   *
3436 30
   * @param string $str
3437
   *
3438 13
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3439 13
   */
3440 13 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3441 13
  {
3442 23
    if (self::is_binary($str)) {
3443
      self::checkForSupport();
3444 6
3445 6
      $maybeUTF32LE = 0;
3446 6
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3447 6
      if ($test !== false && strlen($test) > 1) {
3448 12
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3449
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3450
        if ($test3 === $test) {
3451
          $strChars = self::count_chars($str);
3452
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3453
            if (in_array($test3char, $strChars, true) === true) {
3454
              $maybeUTF32LE++;
3455
            }
3456
          }
3457 3
        }
3458 3
      }
3459 3
3460 3
      $maybeUTF32BE = 0;
3461 7
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3462
      if ($test !== false && strlen($test) > 1) {
3463 3
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3464 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3465 3
        if ($test3 === $test) {
3466 3
          $strChars = self::count_chars($str);
3467 3
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3468
            if (in_array($test3char, $strChars, true) === true) {
3469
              $maybeUTF32BE++;
3470
            }
3471 3
          }
3472
        }
3473 32
      }
3474
3475
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3476 30
        if ($maybeUTF32LE > $maybeUTF32BE) {
3477
          return 1;
3478 28
        } else {
3479 28
          return 2;
3480 28
        }
3481 28
      }
3482
3483
    }
3484
3485
    return false;
3486 28
  }
3487
3488
  /**
3489
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3490
   *
3491
   * @see    http://hsivonen.iki.fi/php-utf8/
3492 28
   *
3493 28
   * @param    string $str The string to be checked.
3494 28
   *
3495 28
   * @return   bool
3496
   */
3497 28
  public static function is_utf8($str)
3498
  {
3499 28
    $str = (string)$str;
3500 28
3501 5
    if (!isset($str[0])) {
3502
      return true;
3503
    }
3504 28
3505 28
    if (self::pcre_utf8_support() !== true) {
3506 28
3507 28
      // If even just the first character can be matched, when the /u
3508 28
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3509
      // invalid, nothing at all will match, even if the string contains
3510
      // some valid sequences
3511
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3512
3513 13
    } else {
3514
3515
      $mState = 0; // cached expected number of octets after the current octet
3516 32
      // until the beginning of the next UTF8 character sequence
3517
      $mUcs4 = 0; // cached Unicode character
3518 14
      $mBytes = 1; // cached expected number of octets in the current sequence
3519
      $len = strlen($str);
3520
3521
      /** @noinspection ForeachInvariantsInspection */
3522
      for ($i = 0; $i < $len; $i++) {
3523
        $in = ord($str[$i]);
3524
        if ($mState === 0) {
3525
          // When mState is zero we expect either a US-ASCII character or a
3526
          // multi-octet sequence.
3527
          if (0 === (0x80 & $in)) {
3528
            // US-ASCII, pass straight through.
3529
            $mBytes = 1;
3530 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3531
            // First octet of 2 octet sequence.
3532
            $mUcs4 = $in;
3533
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3534
            $mState = 1;
3535
            $mBytes = 2;
3536
          } elseif (0xE0 === (0xF0 & $in)) {
3537
            // First octet of 3 octet sequence.
3538
            $mUcs4 = $in;
3539
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3540
            $mState = 2;
3541
            $mBytes = 3;
3542 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3543
            // First octet of 4 octet sequence.
3544
            $mUcs4 = $in;
3545
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3546
            $mState = 3;
3547
            $mBytes = 4;
3548
          } elseif (0xF8 === (0xFC & $in)) {
3549
            /* First octet of 5 octet sequence.
3550
            *
3551
            * This is illegal because the encoded codepoint must be either
3552
            * (a) not the shortest form or
3553
            * (b) outside the Unicode range of 0-0x10FFFF.
3554
            * Rather than trying to resynchronize, we will carry on until the end
3555
            * of the sequence and let the later error handling code catch it.
3556
            */
3557
            $mUcs4 = $in;
3558 2
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3559
            $mState = 4;
3560 2
            $mBytes = 5;
3561 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3562 2
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3563 2
            $mUcs4 = $in;
3564 2
            $mUcs4 = ($mUcs4 & 1) << 30;
3565
            $mState = 5;
3566
            $mBytes = 6;
3567
          } else {
3568 2
            /* Current octet is neither in the US-ASCII range nor a legal first
3569
             * octet of a multi-octet sequence.
3570
             */
3571
            return false;
3572
          }
3573
        } else {
3574
          // When mState is non-zero, we expect a continuation of the multi-octet
3575
          // sequence
3576
          if (0x80 === (0xC0 & $in)) {
3577
            // Legal continuation.
3578
            $shift = ($mState - 1) * 6;
3579
            $tmp = $in;
3580
            $tmp = ($tmp & 0x0000003F) << $shift;
3581
            $mUcs4 |= $tmp;
3582
            /**
3583
             * End of the multi-octet sequence. mUcs4 now contains the final
3584
             * Unicode code point to be output
3585
             */
3586
            if (0 === --$mState) {
3587
              /*
3588
              * Check for illegal sequences and code points.
3589
              */
3590
              // From Unicode 3.1, non-shortest form is illegal
3591
              if (
3592
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3593
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3594
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3595
                  (4 < $mBytes) ||
3596
                  // From Unicode 3.2, surrogate characters are illegal.
3597
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3598
                  // Code points outside the Unicode range are illegal.
3599
                  ($mUcs4 > 0x10FFFF)
3600
              ) {
3601
                return false;
3602
              }
3603
              // initialize UTF8 cache
3604
              $mState = 0;
3605
              $mUcs4 = 0;
3606
              $mBytes = 1;
3607 1
            }
3608
          } else {
3609 1
            /**
3610
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3611 1
             * Incomplete multi-octet sequence.
3612
             */
3613
            return false;
3614 1
          }
3615
        }
3616
      }
3617 1
3618
      return true;
3619
    }
3620
  }
3621
3622
  /**
3623
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3624
   * Decodes a JSON string
3625
   *
3626
   * @link http://php.net/manual/en/function.json-decode.php
3627 6
   *
3628
   * @param string $json    <p>
3629 6
   *                        The <i>json</i> string being decoded.
3630
   *                        </p>
3631
   *                        <p>
3632
   *                        This function only works with UTF-8 encoded strings.
3633
   *                        </p>
3634
   *                        <p>PHP implements a superset of
3635
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3636
   *                        only supports these values when they are nested inside an array or an object.
3637
   *                        </p>
3638
   * @param bool   $assoc   [optional] <p>
3639
   *                        When <b>TRUE</b>, returned objects will be converted into
3640
   *                        associative arrays.
3641
   *                        </p>
3642 24
   * @param int    $depth   [optional] <p>
3643
   *                        User specified recursion depth.
3644 24
   *                        </p>
3645
   * @param int    $options [optional] <p>
3646 24
   *                        Bitmask of JSON decode options. Currently only
3647 2
   *                        <b>JSON_BIGINT_AS_STRING</b>
3648
   *                        is supported (default is to cast large integers as floats)
3649
   *                        </p>
3650 23
   *
3651
   * @return mixed the value encoded in <i>json</i> in appropriate
3652 23
   * PHP type. Values true, false and
3653
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3654
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3655
   * <i>json</i> cannot be decoded or if the encoded
3656
   * data is deeper than the recursion limit.
3657
   */
3658
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3659
  {
3660
    $json = self::filter($json);
3661
3662 1
    if (Bootup::is_php('5.4') === true) {
3663
      $json = json_decode($json, $assoc, $depth, $options);
3664 1
    } else {
3665
      $json = json_decode($json, $assoc, $depth);
3666
    }
3667
3668 1
    return $json;
3669
  }
3670
3671
  /**
3672
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3673
   * Returns the JSON representation of a value
3674
   *
3675
   * @link http://php.net/manual/en/function.json-encode.php
3676
   *
3677
   * @param mixed $value   <p>
3678
   *                       The <i>value</i> being encoded. Can be any type except
3679 1
   *                       a resource.
3680
   *                       </p>
3681 1
   *                       <p>
3682 1
   *                       All string data must be UTF-8 encoded.
3683 1
   *                       </p>
3684
   *                       <p>PHP implements a superset of
3685 1
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3686
   *                       only supports these values when they are nested inside an array or an object.
3687
   *                       </p>
3688
   * @param int   $options [optional] <p>
3689
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3690
   *                       <b>JSON_HEX_TAG</b>,
3691
   *                       <b>JSON_HEX_AMP</b>,
3692
   *                       <b>JSON_HEX_APOS</b>,
3693
   *                       <b>JSON_NUMERIC_CHECK</b>,
3694 2
   *                       <b>JSON_PRETTY_PRINT</b>,
3695
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3696 2
   *                       <b>JSON_FORCE_OBJECT</b>,
3697
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3698 2
   *                       constants is described on
3699 2
   *                       the JSON constants page.
3700 2
   *                       </p>
3701
   * @param int   $depth   [optional] <p>
3702 2
   *                       Set the maximum depth. Must be greater than zero.
3703
   *                       </p>
3704
   *
3705
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3706
   */
3707
  public static function json_encode($value, $options = 0, $depth = 512)
3708
  {
3709
    $value = self::filter($value);
3710
3711
    if (Bootup::is_php('5.5')) {
3712 1
      $json = json_encode($value, $options, $depth);
3713
    } else {
3714 1
      $json = json_encode($value, $options);
3715
    }
3716
3717
    return $json;
3718 1
  }
3719
3720
  /**
3721
   * Makes string's first char lowercase.
3722
   *
3723
   * @param    string $str The input string
3724
   *
3725
   * @return   string The resulting string
3726
   */
3727
  public static function lcfirst($str)
3728 13
  {
3729
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3730 13
  }
3731
3732 13
  /**
3733
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3734
   *
3735 13
   * WARNING: This is much slower then "ltrim()" !!!!
3736 13
   *
3737 13
   * @param    string $str   The string to be trimmed
3738 13
   * @param    string $chars Optional characters to be stripped
3739 13
   *
3740 13
   * @return   string The string with unwanted characters stripped from the left
3741 13
   */
3742 13 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3743 13
  {
3744 13
    $str = (string)$str;
3745 13
3746 13
    if (!isset($str[0])) {
3747 13
      return '';
3748 13
    }
3749
3750 13
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3751 2
3752
    return preg_replace("/^{$chars}+/u", '', $str);
3753
  }
3754 13
3755
  /**
3756
   * Returns the UTF-8 character with the maximum code point in the given data.
3757
   *
3758
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3759
   *
3760
   * @return   string The character with the highest code point than others.
3761
   */
3762 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3763
  {
3764 2
    if (is_array($arg)) {
3765
      $arg = implode($arg);
3766 2
    }
3767 2
3768
    return self::chr(max(self::codepoints($arg)));
3769 2
  }
3770 1
3771 1
  /**
3772 1
   * Calculates and returns the maximum number of bytes taken by any
3773
   * UTF-8 encoded character in the given string.
3774 2
   *
3775
   * @param    string $str The original Unicode string.
3776
   *
3777
   * @return   int An array of byte lengths of each character.
3778
   */
3779
  public static function max_chr_width($str)
3780
  {
3781
    $bytes = self::chr_size_list($str);
3782
    if (count($bytes) > 0) {
3783
      return (int)max($bytes);
3784
    } else {
3785
      return 0;
3786 8
    }
3787
  }
3788 8
3789 8
  /**
3790
   * checks whether mbstring is available on the server
3791 8
   *
3792
   * @return   bool True if available, False otherwise
3793 8
   */
3794
  public static function mbstring_loaded()
3795 2
  {
3796
    $return = extension_loaded('mbstring');
3797 2
3798
    if ($return === true) {
3799 1
      \mb_internal_encoding('UTF-8');
3800 1
    }
3801
3802 2
    return $return;
3803 2
  }
3804
3805 8
  /**
3806 8
   * Returns the UTF-8 character with the minimum code point in the given data.
3807 1
   *
3808 1
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3809
   *
3810 8
   * @return   string The character with the lowest code point than others.
3811 8
   */
3812 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3813 8
  {
3814
    if (is_array($arg)) {
3815
      $arg = implode($arg);
3816
    }
3817
3818
    return self::chr(min(self::codepoints($arg)));
3819
  }
3820
3821
  /**
3822
   * Normalize the encoding-name input.
3823
   *
3824
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3825
   *
3826 1
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3827
   */
3828 1
  public static function normalizeEncoding($encoding)
3829 1
  {
3830
    static $staticNormalizeEncodingCache = array();
3831
3832
    if (!$encoding) {
3833
      return $encoding;
3834
    }
3835
3836
    if (in_array($encoding, self::$iconvEncoding, true)) {
3837
      return $encoding;
3838
    }
3839
3840
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3841
      return $staticNormalizeEncodingCache[$encoding];
3842 1
    }
3843
3844 1
    $encodingOrig = $encoding;
3845
    $encoding = strtoupper($encoding);
3846
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3847
3848
    $equivalences = array(
3849
        'ISO88591'    => 'ISO-8859-1',
3850
        'ISO8859'     => 'ISO-8859-1',
3851
        'ISO'         => 'ISO-8859-1',
3852
        'LATIN1'      => 'ISO-8859-1',
3853
        'LATIN'       => 'ISO-8859-1',
3854
        'UTF16'       => 'UTF-16',
3855 15
        'UTF32'       => 'UTF-32',
3856
        'UTF8'        => 'UTF-8',
3857 15
        'UTF'         => 'UTF-8',
3858 2
        'UTF7'        => 'UTF-7',
3859
        'WIN1252'     => 'ISO-8859-1',
3860
        'WINDOWS1252' => 'ISO-8859-1',
3861 14
        '8BIT'        => 'CP850',
3862 14
        'BINARY'      => 'CP850',
3863
    );
3864 14
3865 2
    if (!empty($equivalences[$encodingUpperHelper])) {
3866
      $encoding = $equivalences[$encodingUpperHelper];
3867
    }
3868 13
3869 7
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3870
3871
    return $encoding;
3872 12
  }
3873 8
3874
  /**
3875
   * Normalize MS Word special characters.
3876 10
   *
3877
   * @param string $str The string to be normalized.
3878
   *
3879
   * @return string
3880
   */
3881
  public static function normalize_msword($str)
3882
  {
3883
    static $utf8MSWordKeys = null;
3884
    static $utf8MSWordValues = null;
3885
3886
    if ($utf8MSWordKeys === null) {
3887
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3888
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3889
    }
3890
3891
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3892
  }
3893
3894
  /**
3895
   * Normalize the whitespace.
3896
   *
3897 1
   * @param string $str                     The string to be normalized.
3898
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3899
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3900 1
   *
3901
   * @return string
3902 1
   */
3903
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3904 1
  {
3905 1
    static $whitespaces = array();
3906
    static $bidiUniCodeControls = null;
3907
3908
    $cacheKey = (int)$keepNonBreakingSpace;
3909
3910
    if (!isset($whitespaces[$cacheKey])) {
3911
3912 33
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3913
3914
      if ($keepNonBreakingSpace === true) {
3915 33
        /** @noinspection OffsetOperationsInspection */
3916
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3917
      }
3918
3919
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3920
    }
3921
3922
    if ($keepBidiUnicodeControls === false) {
3923
      if ($bidiUniCodeControls === null) {
3924
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3925
      }
3926 1
3927
      $str = str_replace($bidiUniCodeControls, '', $str);
3928 1
    }
3929 1
3930
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3931
  }
3932 1
3933
  /**
3934 1
   * Format a number with grouped thousands.
3935
   *
3936
   * @param float  $number
3937 1
   * @param int    $decimals
3938
   * @param string $dec_point
3939
   * @param string $thousands_sep
3940 1
   *
3941
   * @return string
3942
   */
3943
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3944 1
  {
3945
    $thousands_sep = (string)$thousands_sep;
3946 1
    $dec_point = (string)$dec_point;
3947
3948
    if (
3949 1
        isset($thousands_sep[1], $dec_point[1])
3950
        &&
3951
        Bootup::is_php('5.4') === true
3952 1
    ) {
3953
      return str_replace(
3954
          array(
3955
              '.',
3956 1
              ',',
3957
          ),
3958 1
          array(
3959 1
              $dec_point,
3960 1
              $thousands_sep,
3961 1
          ),
3962 1
          number_format($number, $decimals, '.', ',')
3963
      );
3964
    }
3965
3966
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3967
  }
3968
3969
  /**
3970
   * Calculates Unicode code point of the given UTF-8 encoded character.
3971
   *
3972
   * @param    string $s The character of which to calculate code point.
3973
   *
3974
   * @return   int Unicode code point of the given character,<br />
3975 7
   *           0 on invalid UTF-8 byte sequence.
3976
   */
3977 7
  public static function ord($s)
3978
  {
3979
    if (!$s && $s !== '0') {
3980 7
      return 0;
3981 2
    }
3982 2
3983 7
    // init
3984
    self::checkForSupport();
3985 7
3986
    if (self::$support['intlChar'] === true) {
3987
      $tmpReturn = \IntlChar::ord($s);
3988 3
      if ($tmpReturn) {
3989 1
        return $tmpReturn;
3990 1
      }
3991
    }
3992
3993
    $s = unpack('C*', substr($s, 0, 4));
3994 3
    $a = $s ? $s[1] : 0;
3995 1
3996 1
    if (0xF0 <= $a && isset($s[4])) {
3997 3
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3998
    }
3999 7
4000
    if (0xE0 <= $a && isset($s[3])) {
4001
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
4002 3
    }
4003 1
4004 1
    if (0xC0 <= $a && isset($s[2])) {
4005
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
4006
    }
4007
4008 3
    return $a;
4009 1
  }
4010 1
4011 3
  /**
4012
   * Parses the string into variables.
4013 7
   *
4014
   * WARNING: This differs from parse_str() by returning the results
4015
   *    instead of placing them in the local scope!
4016
   *
4017
   * @link http://php.net/manual/en/function.parse-str.php
4018
   *
4019
   * @param string $str     <p>
4020
   *                        The input string.
4021
   *                        </p>
4022
   * @param array  $result  <p>
4023
   *                        If the second parameter arr is present,
4024 1
   *                        variables are stored in this variable as array elements instead.
4025
   *                        </p>
4026 1
   *
4027 1
   * @return void
4028 1
   */
4029
  public static function parse_str($str, &$result)
4030 1
  {
4031 1
    // init
4032 1
    self::checkForSupport();
4033 1
4034 1
    $str = self::filter($str);
4035
4036 1
    \mb_parse_str($str, $result);
4037
  }
4038
4039
  /**
4040
   * checks if \u modifier is available that enables Unicode support in PCRE.
4041
   *
4042
   * @return   bool True if support is available, false otherwise
4043
   */
4044
  public static function pcre_utf8_support()
4045
  {
4046
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4047
    return (bool)@preg_match('//u', '');
4048
  }
4049
4050
  /**
4051
   * Create an array containing a range of UTF-8 characters.
4052 36
   *
4053
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4054
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4055 36
   *
4056
   * @return   array
4057
   */
4058
  public static function range($var1, $var2)
4059 36
  {
4060 36
    if (!$var1 || !$var2) {
4061 36
      return array();
4062 36
    }
4063
4064 36 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4065
      $start = (int)$var1;
4066
    } elseif (ctype_xdigit($var1)) {
4067 36
      $start = (int)self::hex_to_int($var1);
4068 36
    } else {
4069
      $start = self::ord($var1);
4070 36
    }
4071
4072
    if (!$start) {
4073
      return array();
4074
    }
4075
4076 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4077
      $end = (int)$var2;
4078
    } elseif (ctype_xdigit($var2)) {
4079
      $end = (int)self::hex_to_int($var2);
4080
    } else {
4081 36
      $end = self::ord($var2);
4082
    }
4083 36
4084
    if (!$end) {
4085 36
      return array();
4086 36
    }
4087 36
4088
    return array_map(
4089 36
        array(
4090 36
            '\\voku\\helper\\UTF8',
4091 36
            'chr',
4092
        ),
4093 36
        range($start, $end)
4094
    );
4095
  }
4096
4097
  /**
4098
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4099
   *
4100
   * @param string $str
4101
   *
4102
   * @return string
4103
   */
4104
  public static function removeBOM($str = '')
4105
  {
4106 23
    // INFO: https://en.wikipedia.org/wiki/Byte_order_mark
4107
4108 23
    if (0 === strpos($str, "\xef\xbb\xbf")) { // UTF-8 BOM
4109
      $str = substr($str, 3);
4110 23
    } elseif (0 === strpos($str, '')) { // UTF-8 BOM as "WINDOWS-1252"
4111 5
      $str = substr($str, 6); // INFO: one char has (maybe) more then one byte ...
4112
    } elseif (0 === strpos($str, "\x00\x00\xfe\xff")) { // UTF-32 (BE) BOM
4113
      $str = substr($str, 4);
4114 19
    } elseif (0 === strpos($str, "\xff\xfe\x00\x00")) { // UTF-32 (LE) BOM
4115
      $str = substr($str, 4);
4116 19
    } elseif (0 === strpos($str, "\xfe\xff")) { // UTF-16 (BE) BOM
4117
      $str = substr($str, 2);
4118
    } elseif (0 === strpos($str, 'þÿ')) { // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4119
      $str = substr($str, 4);
4120
    } elseif (0 === strpos($str, "\xff\xfe")) { // UTF-16 (LE) BOM
4121
      $str = substr($str, 2);
4122
    } elseif (0 === strpos($str, 'ÿþ')) { // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4123
      $str = substr($str, 4);
4124
    }
4125
4126
    return $str;
4127 40
  }
4128
4129 40
  /**
4130
   * Removes duplicate occurrences of a string in another string.
4131 40
   *
4132
   * @param    string       $str  The base string
4133 40
   * @param    string|array $what String to search for in the base string
4134 30
   *
4135
   * @return   string The result string with removed duplicates
4136
   */
4137 16
  public static function remove_duplicates($str, $what = ' ')
4138
  {
4139 16
    if (is_string($what)) {
4140 15
      $what = array($what);
4141
    }
4142 15
4143 14
    if (is_array($what)) {
4144 15
      foreach ($what as $item) {
4145 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4146 1
      }
4147
    }
4148
4149 16
    return $str;
4150
  }
4151 16
4152
  /**
4153 16
   * Remove Invisible Characters
4154 16
   *
4155 16
   * This prevents sandwiching null characters
4156
   * between ascii characters, like Java\0script.
4157
   *
4158
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4159 16
   *
4160
   * @param  string $str
4161 16
   * @param  bool   $url_encoded
4162
   *
4163
   * @return  string
4164
   */
4165
  public static function remove_invisible_characters($str, $url_encoded = true)
4166
  {
4167
    // init
4168
    $non_displayables = array();
4169
4170
    // every control character except newline (dec 10),
4171
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4172
    if ($url_encoded) {
4173
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4174
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4175
    }
4176
4177
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4178
4179
    do {
4180
      $str = preg_replace($non_displayables, '', $str, -1, $count);
4181 2
    } while ($count !== 0);
4182
4183 2
    return $str;
4184 1
  }
4185
4186
  /**
4187 2
   * replace diamond question mark (�)
4188
   *
4189
   * @param string $str
4190
   * @param string $unknown
4191
   *
4192
   * @return string
4193
   */
4194
  public static function replace_diamond_question_mark($str, $unknown = '?')
4195
  {
4196
    return str_replace(
4197
        array(
4198
            "\xEF\xBF\xBD",
4199 25
            '�',
4200
        ),
4201 25
        array(
4202
            $unknown,
4203 25
            $unknown,
4204 5
        ),
4205
        $str
4206
    );
4207
  }
4208 24
4209 24
  /**
4210 24
   * Strip whitespace or other characters from end of a UTF-8 string.
4211
   *
4212 24
   * WARNING: This is much slower then "rtrim()" !!!!
4213
   *
4214 24
   * @param    string $str   The string to be trimmed
4215
   * @param    string $chars Optional characters to be stripped
4216
   *
4217
   * @return   string The string with unwanted characters stripped from the right
4218 24
   */
4219 24 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4220 24
  {
4221 24
    $str = (string)$str;
4222 24
4223
    if (!isset($str[0])) {
4224 24
      return '';
4225
    }
4226
4227
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4228
4229
    return preg_replace("/{$chars}+$/u", '', $str);
4230
  }
4231
4232
  /**
4233
   * rxClass
4234
   *
4235
   * @param string $s
4236
   * @param string $class
4237
   *
4238
   * @return string
4239
   */
4240
  protected static function rxClass($s, $class = '')
4241
  {
4242
    static $rxClassCache = array();
4243
4244
    $cacheKey = $s . $class;
4245
4246
    if (isset($rxClassCache[$cacheKey])) {
4247
      return $rxClassCache[$cacheKey];
4248
    }
4249
4250
    $class = array($class);
4251
4252
    /** @noinspection SuspiciousLoopInspection */
4253
    foreach (self::str_split($s) as $s) {
4254
      if ('-' === $s) {
4255
        $class[0] = '-' . $class[0];
4256 24
      } elseif (!isset($s[2])) {
4257 5
        $class[0] .= preg_quote($s, '/');
4258
      } elseif (1 === self::strlen($s)) {
4259 5
        $class[0] .= $s;
4260 5
      } else {
4261
        $class[] = $s;
4262 24
      }
4263
    }
4264
4265
    $class[0] = '[' . $class[0] . ']';
4266 24
4267
    if (1 === count($class)) {
4268
      $return = $class[0];
4269
    } else {
4270
      $return = '(?:' . implode('|', $class) . ')';
4271
    }
4272
4273
    $rxClassCache[$cacheKey] = $return;
4274
4275
    return $return;
4276
  }
4277 3
4278
  /**
4279
   * Echo native UTF8-Support libs, e.g. for debugging.
4280
   */
4281
  public static function showSupport()
4282
  {
4283
    foreach (self::$support as $utf8Support) {
4284 3
      echo $utf8Support . "\n<br>";
4285 2
    }
4286 1
  }
4287 2
4288 1
  /**
4289 2
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4290
   *
4291 2
   * @param    string $chr The Unicode character to be encoded as numbered entity.
4292
   *
4293
   * @return   string The HTML numbered entity.
4294 2
   */
4295
  public static function single_chr_html_encode($chr)
4296
  {
4297
    if (!$chr) {
4298
      return '';
4299
    }
4300 3
4301 1
    return '&#' . self::ord($chr) . ';';
4302
  }
4303
4304
  /**
4305
   * Convert a string to an array of Unicode characters.
4306
   *
4307
   * @param    string  $str       The string to split into array.
4308
   * @param    int     $length    Max character length of each array element.
4309
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4310 3
   *
4311 3
   * @return   array An array containing chunks of the string.
4312 3
   */
4313 3
  public static function split($str, $length = 1, $cleanUtf8 = false)
4314 3
  {
4315 3
    $str = (string)$str;
4316 3
4317 3
    if (!isset($str[0])) {
4318
      return array();
4319
    }
4320 3
4321 3
    // init
4322 3
    self::checkForSupport();
4323 3
    $str = (string)$str;
4324
    $ret = array();
4325
4326
    if (self::$support['pcre_utf8'] === true) {
4327
4328
      if ($cleanUtf8 === true) {
4329
        $str = self::clean($str);
4330
      }
4331
4332
      preg_match_all('/./us', $str, $retArray);
4333
      if (isset($retArray[0])) {
4334
        $ret = $retArray[0];
4335
      }
4336
      unset($retArray);
4337
4338
    } else {
4339
4340
      // fallback
4341
4342
      $len = strlen($str);
4343
4344
      /** @noinspection ForeachInvariantsInspection */
4345
      for ($i = 0; $i < $len; $i++) {
4346
        if (($str[$i] & "\x80") === "\x00") {
4347
          $ret[] = $str[$i];
4348
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4349
          if (($str[$i + 1] & "\xC0") === "\x80") {
4350
            $ret[] = $str[$i] . $str[$i + 1];
4351
4352
            $i++;
4353 13
          }
4354 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4355 13
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4356
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4357
4358 13
            $i += 2;
4359 13
          }
4360 1
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4361 1 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4362 12
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4363
4364 13
            $i += 3;
4365
          }
4366 13
        }
4367 13
      }
4368
    }
4369 13
4370
    if ($length > 1) {
4371
      $ret = array_chunk($ret, $length);
4372
4373
      $ret = array_map('implode', $ret);
4374
    }
4375
4376
    if (isset($ret[0]) && $ret[0] === '') {
4377
      return array();
4378
    }
4379
4380
    return $ret;
4381 1
  }
4382
4383 1
  /**
4384
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4385
   *
4386
   * @param string $str
4387 1
   *
4388
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4389 1
   *                      otherwise it will return false.
4390
   */
4391
  public static function str_detect_encoding($str)
4392
  {
4393 1
4394 1
    //
4395
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4396
    //
4397 1
4398 1
    if (self::is_binary($str)) {
4399 1
      if (self::is_utf16($str) === 1) {
4400 1
        return 'UTF-16LE';
4401
      } elseif (self::is_utf16($str) === 2) {
4402 1
        return 'UTF-16BE';
4403
      } elseif (self::is_utf32($str) === 1) {
4404
        return 'UTF-32LE';
4405 1
      } elseif (self::is_utf32($str) === 2) {
4406
        return 'UTF-32BE';
4407
      }
4408 1
    }
4409
4410
    //
4411
    // 2.) simple check for ASCII chars
4412
    //
4413
4414
    if (self::is_ascii($str) === true) {
4415
      return 'ASCII';
4416
    }
4417
4418
    //
4419
    // 3.) simple check for UTF-8 chars
4420
    //
4421 2
4422
    if (self::is_utf8($str) === true) {
4423 2
      return 'UTF-8';
4424
    }
4425 2
4426 2
    //
4427
    // 4.) check via "\mb_detect_encoding()"
4428 2
    //
4429
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4430
4431 2
    $detectOrder = array(
4432 2
        'windows-1251',
4433 2
        'ISO-8859-1',
4434 2
        'ASCII',
4435 2
        'UTF-8',
4436
    );
4437 2
4438 2
    self::checkForSupport();
4439 2
4440 2
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4441 2
    if ($encoding) {
4442 2
      return $encoding;
4443
    }
4444 2
4445 2
    //
4446 2
    // 5.) check via "iconv()"
4447 2
    //
4448 2
4449 2
    $md5 = md5($str);
4450
    foreach (self::$iconvEncoding as $encodingTmp) {
4451 2
      # INFO: //IGNORE and //TRANSLIT still throw notice
4452
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4453
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4454 2
        return $encodingTmp;
4455
      }
4456
    }
4457
4458
    return false;
4459
  }
4460
4461
  /**
4462
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4463
   *
4464
   * @link  http://php.net/manual/en/function.str-ireplace.php
4465
   *
4466
   * @param mixed $search  <p>
4467
   *                       Every replacement with search array is
4468
   *                       performed on the result of previous replacement.
4469
   *                       </p>
4470
   * @param mixed $replace <p>
4471
   *                       </p>
4472
   * @param mixed $subject <p>
4473
   *                       If subject is an array, then the search and
4474
   *                       replace is performed with every entry of
4475 1
   *                       subject, and the return value is an array as
4476
   *                       well.
4477 1
   *                       </p>
4478
   * @param int   $count   [optional] <p>
4479 1
   *                       The number of matched and replaced needles will
4480
   *                       be returned in count which is passed by
4481
   *                       reference.
4482
   *                       </p>
4483
   *
4484
   * @return mixed a string or an array of replacements.
4485
   * @since 5.0
4486
   */
4487
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4488
  {
4489
    $search = (array)$search;
4490
4491
    /** @noinspection AlterInForeachInspection */
4492
    foreach ($search as &$s) {
4493
      if ('' === $s .= '') {
4494
        $s = '/^(?<=.)$/';
4495
      } else {
4496
        $s = '/' . preg_quote($s, '/') . '/ui';
4497
      }
4498
    }
4499
4500
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4501
    $count = $replace;
4502
4503
    return $subject;
4504
  }
4505
4506
  /**
4507
   * Limit the number of characters in a string, but also after the next word.
4508
   *
4509
   * @param  string $str
4510
   * @param  int    $length
4511
   * @param  string $strAddOn
4512 12
   *
4513
   * @return string
4514 12
   */
4515
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4516
  {
4517
    $str = (string)$str;
4518
4519
    if (!isset($str[0])) {
4520
      return '';
4521
    }
4522
4523
    $length = (int)$length;
4524
4525
    if (self::strlen($str) <= $length) {
4526
      return $str;
4527
    }
4528
4529
    if (self::substr($str, $length - 1, 1) === ' ') {
4530
      return self::substr($str, 0, $length - 1) . $strAddOn;
4531
    }
4532
4533
    $str = self::substr($str, 0, $length);
4534
    $array = explode(' ', $str);
4535
    array_pop($array);
4536
    $new_str = implode(' ', $array);
4537
4538
    if ($new_str === '') {
4539
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4540
    } else {
4541
      $str = $new_str . $strAddOn;
4542 1
    }
4543
4544 1
    return $str;
4545
  }
4546 1
4547 1
  /**
4548 1
   * Pad a UTF-8 string to given length with another string.
4549
   *
4550 1
   * @param    string $input      The input string
4551 1
   * @param    int    $pad_length The length of return string
4552 1
   * @param    string $pad_string String to use for padding the input string
4553 1
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4554
   *
4555
   * @return   string Returns the padded string
4556 1
   */
4557
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4558
  {
4559
    $input_length = self::strlen($input);
4560
4561
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4562
      $ps_length = self::strlen($pad_string);
4563
4564
      $diff = $pad_length - $input_length;
4565
4566
      switch ($pad_type) {
4567 17 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4568
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4569
          $pre = self::substr($pre, 0, $diff);
4570 17
          $post = '';
4571
          break;
4572 17
4573
        case STR_PAD_BOTH:
4574
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4575
          $pre = self::substr($pre, 0, (int)$diff / 2);
4576
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4577
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4578 17
          break;
4579 17
4580 17
        case STR_PAD_RIGHT:
4581 17 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4582 17
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4583 16
          $post = self::substr($post, 0, $diff);
4584 16
          $pre = '';
4585 17
      }
4586
4587
      return $pre . $input . $post;
4588
    }
4589
4590 17
    return $input;
4591 17
  }
4592
4593
  /**
4594 1
   * Repeat a string.
4595 1
   *
4596
   * @param string $input      <p>
4597
   *                           The string to be repeated.
4598 1
   *                           </p>
4599 1
   * @param int    $multiplier <p>
4600 1
   *                           Number of time the input string should be
4601 1
   *                           repeated.
4602 1
   *                           </p>
4603
   *                           <p>
4604 1
   *                           multiplier has to be greater than or equal to 0.
4605
   *                           If the multiplier is set to 0, the function
4606 1
   *                           will return an empty string.
4607
   *                           </p>
4608
   *
4609
   * @return string the repeated string.
4610
   */
4611
  public static function str_repeat($input, $multiplier)
4612
  {
4613
    $input = self::filter($input);
4614
4615
    return str_repeat($input, $multiplier);
4616 1
  }
4617
4618 1
  /**
4619
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4620 1
   *
4621
   * (PHP 4, PHP 5)<br/>
4622
   * Replace all occurrences of the search string with the replacement string
4623
   *
4624
   * @link http://php.net/manual/en/function.str-replace.php
4625 1
   *
4626 1
   * @param mixed $search  <p>
4627
   *                       The value being searched for, otherwise known as the needle.
4628
   *                       An array may be used to designate multiple needles.
4629 1
   *                       </p>
4630 1
   * @param mixed $replace <p>
4631 1
   *                       The replacement value that replaces found search
4632
   *                       values. An array may be used to designate multiple replacements.
4633 1
   *                       </p>
4634
   * @param mixed $subject <p>
4635
   *                       The string or array being searched and replaced on,
4636
   *                       otherwise known as the haystack.
4637
   *                       </p>
4638
   *                       <p>
4639
   *                       If subject is an array, then the search and
4640
   *                       replace is performed with every entry of
4641
   *                       subject, and the return value is an array as
4642
   *                       well.
4643
   *                       </p>
4644
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4645
   *
4646
   * @return mixed This function returns a string or an array with the replaced values.
4647
   */
4648
  public static function str_replace($search, $replace, $subject, &$count = null)
4649
  {
4650
    return str_replace($search, $replace, $subject, $count);
4651
  }
4652
4653
  /**
4654 8
   * Shuffles all the characters in the string.
4655
   *
4656 8
   * @param    string $str The input string
4657
   *
4658 8
   * @return   string The shuffled string.
4659
   */
4660 8
  public static function str_shuffle($str)
4661 2
  {
4662
    $array = self::split($str);
4663
4664 7
    shuffle($array);
4665
4666 7
    return implode('', $array);
4667 7
  }
4668 7
4669
  /**
4670 7
   * Sort all characters according to code points.
4671
   *
4672 7
   * @param    string $str    A UTF-8 string.
4673 6
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4674
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4675
   *
4676 4
   * @return   string String of sorted characters
4677
   */
4678
  public static function str_sort($str, $unique = false, $desc = false)
4679 4
  {
4680 4
    $array = self::codepoints($str);
4681 4
4682
    if ($unique) {
4683 4
      $array = array_flip(array_flip($array));
4684 3
    }
4685
4686 3
    if ($desc) {
4687 3
      arsort($array);
4688 3
    } else {
4689
      asort($array);
4690 3
    }
4691 1
4692
    return self::string($array);
4693 1
  }
4694 1
4695 1
  /**
4696
   * Convert a string to an array.
4697 1
   *
4698
   * @param string $str
4699
   * @param int    $len
4700
   *
4701
   * @return array
4702
   */
4703
  public static function str_split($str, $len = 1)
4704
  {
4705
    // init
4706
    self::checkForSupport();
4707
    $len = (int)$len;
4708
4709
    if ($len < 1) {
4710
      return str_split($str, $len);
4711
    }
4712 1
4713 3
    if (self::$support['intl'] === true) {
4714
      $a = array();
4715 4
      $p = 0;
4716
      $l = strlen($str);
4717
      while ($p < $l) {
4718
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4719
      }
4720 4
    } else {
4721
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4722
      $a = $a[0];
4723
    }
4724
4725 4
    if ($len === 1) {
4726 4
      return $a;
4727 2
    }
4728 2
4729
    $arrayOutput = array();
4730 2
    $p = -1;
4731 2
4732 1
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4733
    foreach ($a as $l => $a) {
4734 2
      if ($l % $len) {
4735
        $arrayOutput[$p] .= $a;
4736 4
      } else {
4737 4
        $arrayOutput[++$p] = $a;
4738 4
      }
4739 4
    }
4740 1
4741
    return $arrayOutput;
4742 7
  }
4743
4744 7
  /**
4745
   * Get a binary representation of a specific character.
4746
   *
4747
   * @param   string $str The input character.
4748
   *
4749
   * @return  string
4750
   */
4751
  public static function str_to_binary($str)
4752
  {
4753
    $str = (string)$str;
4754
4755
    if (!isset($str[0])) {
4756 1
      return '';
4757
    }
4758 1
4759 1
    // init
4760 1
    $out = null;
4761 1
    $max = strlen($str);
4762
4763 1
    /** @noinspection ForeachInvariantsInspection */
4764
    for ($i = 0; $i < $max; ++$i) {
4765
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4766
    }
4767 1
4768
    return $out;
4769
  }
4770
4771
  /**
4772
   * US-ASCII transliterations of Unicode text.
4773
   *
4774
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4775
   * Warning: you should only pass this well formed UTF-8!
4776 1
   * Be aware it works by making a copy of the input string which it appends transliterated
4777
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4778
   * requiring up to the same amount again as the input string
4779 1
   *
4780
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4781
   *
4782
   * @author <[email protected]>
4783
   *
4784
   * @param string $str     UTF-8 string to convert
4785
   * @param string $unknown Character use if character unknown. (default is ?)
4786
   *
4787
   * @return string US-ASCII string
4788
   */
4789
  public static function str_transliterate($str, $unknown = '?')
4790 8
  {
4791
    static $UTF8_TO_ASCII;
4792 8
4793
    $str = (string)$str;
4794
4795
    if (!isset($str[0])) {
4796
      return '';
4797
    }
4798
4799
    $str = self::clean($str);
4800
4801
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4802
    $chars = $ar[0];
4803
    foreach ($chars as &$c) {
4804
4805 8
      $ordC0 = ord($c[0]);
4806
4807 8
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4808 5
        continue;
4809 5
      }
4810 8
4811
      $ordC1 = ord($c[1]);
4812
4813
      // ASCII - next please
4814
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4815
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4816
      }
4817
4818
      if ($ordC0 >= 224) {
4819
        $ordC2 = ord($c[2]);
4820
4821
        if ($ordC0 <= 239) {
4822
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4823 5
        }
4824
4825 5
        if ($ordC0 >= 240) {
4826
          $ordC3 = ord($c[3]);
4827
4828
          if ($ordC0 <= 247) {
4829 5
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4830
          }
4831
4832 5
          if ($ordC0 >= 248) {
4833
            $ordC4 = ord($c[4]);
4834
4835 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4836 5
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4837 5
            }
4838
4839
            if ($ordC0 >= 252) {
4840
              $ordC5 = ord($c[5]);
4841
4842 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4843
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4844
              }
4845
            }
4846
          }
4847
        }
4848
      }
4849
4850 2
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4851
        $c = $unknown;
4852 2
        continue;
4853 2
      }
4854
4855 2
      if (!isset($ord)) {
4856 2
        $c = $unknown;
4857 2
        continue;
4858
      }
4859 2
4860 2
      $bank = $ord >> 8;
4861
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4862
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4863
        if (file_exists($bankfile)) {
4864
          /** @noinspection PhpIncludeInspection */
4865
          require $bankfile;
4866
        } else {
4867
          $UTF8_TO_ASCII[$bank] = array();
4868
        }
4869
      }
4870 1
4871
      $newchar = $ord & 255;
4872 1
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4873
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4874
      } else {
4875
        $c = $unknown;
4876
      }
4877
    }
4878
4879
    return implode('', $chars);
4880
  }
4881
4882
  /**
4883
   * Counts number of words in the UTF-8 string.
4884
   *
4885
   * @param string $str    The input string.
4886
   * @param int    $format <strong>0</strong> => return a number of words<br />
4887
   *                       <strong>1</strong> => return an array of words
4888
   *                       <strong>2</strong> => return an array of words with word-offset as key
4889
   * @param string $charlist
4890
   *
4891
   * @return array|float The number of words in the string
4892
   */
4893
  public static function str_word_count($str, $format = 0, $charlist = '')
4894 2
  {
4895
    $charlist = self::rxClass($charlist, '\pL');
4896
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4897 2
4898
    $len = count($strParts);
4899 2
4900
    if ($format === 1) {
4901
4902
      $numberOfWords = array();
4903
      for ($i = 1; $i < $len; $i += 2) {
4904
        $numberOfWords[] = $strParts[$i];
4905
      }
4906
4907
    } elseif ($format === 2) {
4908
4909
      self::checkForSupport();
4910
4911
      $numberOfWords = array();
4912
      $offset = self::strlen($strParts[0]);
4913
      for ($i = 1; $i < $len; $i += 2) {
4914
        $numberOfWords[$offset] = $strParts[$i];
4915
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4916
      }
4917
4918
    } else {
4919
4920
      $numberOfWords = ($len - 1) / 2;
4921
4922
    }
4923
4924
    return $numberOfWords;
4925 8
  }
4926
4927 8
  /**
4928 8
   * Case-insensitive string comparison.
4929
   *
4930 8
   * @param string $str1
4931 2
   * @param string $str2
4932
   *
4933
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4934
   */
4935 7
  public static function strcasecmp($str1, $str2)
4936
  {
4937 7
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4938 1
  }
4939 1
4940 1
  /**
4941
   * String comparison.
4942
   *
4943 7
   * @param string $str1
4944 1
   * @param string $str2
4945 1
   *
4946
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4947 7
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4948
   *              <strong>0</strong> if they are equal.
4949
   */
4950
  public static function strcmp($str1, $str2)
4951
  {
4952
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4953
        \Normalizer::normalize($str1, \Normalizer::NFD),
4954
        \Normalizer::normalize($str2, \Normalizer::NFD)
4955
    );
4956
  }
4957
4958
  /**
4959 7
   * Find length of initial segment not matching mask.
4960
   *
4961 7
   * @param string $str
4962 2
   * @param string $charList
4963
   * @param int    $offset
4964
   * @param int    $length
4965
   *
4966 5
   * @return int|null
4967
   */
4968 5
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4969
  {
4970
    if ('' === $charList .= '') {
4971
      return null;
4972
    }
4973
4974
    if ($offset || 2147483647 !== $length) {
4975
      $str = (string)self::substr($str, $offset, $length);
4976
    } else {
4977
      $str = (string)$str;
4978
    }
4979
4980
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4981
      /** @noinspection OffsetOperationsInspection */
4982
      return self::strlen($length[1]);
4983
    } else {
4984
      return self::strlen($str);
4985 66
    }
4986
  }
4987 66
4988
  /**
4989 66
   * Makes a UTF-8 string from code points.
4990 4
   *
4991
   * @param    array $array Integer or Hexadecimal codepoints
4992
   *
4993
   * @return   string UTF-8 encoded string
4994 65
   */
4995
  public static function string($array)
4996
  {
4997 65
    return implode(
4998
        array_map(
4999
            array(
5000
                '\\voku\\helper\\UTF8',
5001 65
                'chr',
5002
            ),
5003
            $array
5004
        )
5005 65
    );
5006
  }
5007
5008
  /**
5009
   * Checks if string starts with "UTF-8 BOM" character.
5010
   *
5011
   * @param    string $str The input string.
5012
   *
5013
   * @return   bool True if the string has BOM at the start, False otherwise.
5014
   */
5015
  public static function string_has_bom($str)
5016
  {
5017 1
    return self::is_bom(substr($str, 0, 3));
5018
  }
5019 1
5020
  /**
5021
   * Strip HTML and PHP tags from a string.
5022
   *
5023
   * @link http://php.net/manual/en/function.strip-tags.php
5024
   *
5025
   * @param string $str            <p>
5026
   *                               The input string.
5027
   *                               </p>
5028
   * @param string $allowable_tags [optional] <p>
5029
   *                               You can use the optional second parameter to specify tags which should
5030
   *                               not be stripped.
5031 2
   *                               </p>
5032
   *                               <p>
5033 2
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5034
   *                               can not be changed with allowable_tags.
5035
   *                               </p>
5036
   *
5037
   * @return string the stripped string.
5038
   */
5039
  public static function strip_tags($str, $allowable_tags = null)
5040
  {
5041
    //clean broken utf8
5042
    $str = self::clean($str);
5043
5044
    return strip_tags($str, $allowable_tags);
5045
  }
5046
5047
  /**
5048
   * Finds position of first occurrence of a string within another, case insensitive.
5049
   *
5050
   * @link http://php.net/manual/en/function.mb-stripos.php
5051
   *
5052
   * @param string  $haystack  <p>
5053
   *                           The string from which to get the position of the first occurrence
5054
   *                           of needle
5055
   *                           </p>
5056
   * @param string  $needle    <p>
5057
   *                           The string to find in haystack
5058
   *                           </p>
5059
   * @param int     $offset    [optional] <p>
5060
   *                           The position in haystack
5061
   *                           to start searching
5062
   *                           </p>
5063
   * @param string  $encoding
5064
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5065
   *
5066
   * @return int Return the numeric position of the first occurrence of
5067
   * needle in the haystack
5068
   * string, or false if needle is not found.
5069
   */
5070
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5071
  {
5072
    $haystack = (string)$haystack;
5073
    $needle = (string)$needle;
5074
5075
    if (!isset($haystack[0], $needle[0])) {
5076
      return false;
5077
    }
5078
5079
    // init
5080
    self::checkForSupport();
5081
5082
    if ($cleanUtf8 === true) {
5083
      $haystack = self::clean($haystack);
5084
      $needle = self::clean($needle);
5085
    }
5086
5087
    // INFO: this is only a fallback for old versions
5088
    if ($encoding === true || $encoding === false) {
5089
      $encoding = 'UTF-8';
5090
    }
5091
5092
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5093
  }
5094
5095
  /**
5096
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5097
   *
5098
   * @param string $str
5099
   * @param string $needle
5100
   * @param bool   $before_needle
5101
   *
5102
   * @return false|string
5103 11
   */
5104
  public static function stristr($str, $needle, $before_needle = false)
5105 11
  {
5106 11
    if ('' === $needle .= '') {
5107
      return false;
5108 11
    }
5109 2
5110
    // init
5111
    self::checkForSupport();
5112
5113 10
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5114 10
  }
5115
5116
  /**
5117
   * Get the string length, not the byte-length!
5118 10
   *
5119
   * @link     http://php.net/manual/en/function.mb-strlen.php
5120
   *
5121
   * @param string  $str       The string being checked for length.
5122 10
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5123
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5124
   *
5125
   * @return int the number of characters in
5126 1
   *           string str having character encoding
5127 1
   *           encoding. A multi-byte character is
5128 1
   *           counted as 1.
5129
   */
5130 10
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5131
  {
5132
    $str = (string)$str;
5133 10
5134 1
    if (!isset($str[0])) {
5135 1
      return 0;
5136
    }
5137 10
5138
    // INFO: this is only a fallback for old versions
5139
    if ($encoding === true || $encoding === false) {
5140
      $encoding = 'UTF-8';
5141
    }
5142
5143
    $encoding = self::normalizeEncoding($encoding);
5144
5145
    switch ($encoding) {
5146
      case 'ASCII':
5147
      case 'CP850':
5148
        return strlen($str);
5149
    }
5150
5151
    self::checkForSupport();
5152
5153
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5154
      $str = self::clean($str);
5155
    }
5156
5157
    return \mb_strlen($str, $encoding);
5158
  }
5159
5160
  /**
5161
   * Case insensitive string comparisons using a "natural order" algorithm.
5162
   *
5163
   * @param string $str1
5164
   * @param string $str2
5165
   *
5166
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5167
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5168
   *             <strong>0</strong> if they are equal
5169
   */
5170
  public static function strnatcasecmp($str1, $str2)
5171
  {
5172
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5173
  }
5174
5175
  /**
5176
   * String comparisons using a "natural order" algorithm
5177
   *
5178
   * @link  http://php.net/manual/en/function.strnatcmp.php
5179
   *
5180
   * @param string $str1 <p>
5181
   *                     The first string.
5182
   *                     </p>
5183
   * @param string $str2 <p>
5184
   *                     The second string.
5185
   *                     </p>
5186 1
   *
5187
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5188 1
   * str1 is less than str2; &gt;
5189
   * 0 if str1 is greater than
5190 1
   * str2, and 0 if they are equal.
5191
   * @since 4.0
5192
   * @since 5.0
5193
   */
5194
  public static function strnatcmp($str1, $str2)
5195
  {
5196
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5197
  }
5198
5199
  /**
5200 4
   * Binary safe case-insensitive string comparison of the first n characters
5201
   *
5202 4
   * @link  http://php.net/manual/en/function.strncasecmp.php
5203
   *
5204
   * @param string $str1 <p>
5205
   *                     The first string.
5206
   *                     </p>
5207
   * @param string $str2 <p>
5208
   *                     The second string.
5209
   *                     </p>
5210
   * @param int    $len  <p>
5211
   *                     The length of strings to be used in the comparison.
5212
   *                     </p>
5213
   *
5214
   * @return int &lt; 0 if <i>str1</i> is less than
5215
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5216
   * greater than <i>str2</i>, and 0 if they are equal.
5217
   * @since 4.0.4
5218
   * @since 5.0
5219
   */
5220
  public static function strncasecmp($str1, $str2, $len)
5221
  {
5222
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5223
  }
5224
5225
  /**
5226
   * Binary safe string comparison of the first n characters
5227
   *
5228
   * @link  http://php.net/manual/en/function.strncmp.php
5229
   *
5230
   * @param string $str1 <p>
5231
   *                     The first string.
5232
   *                     </p>
5233 1
   * @param string $str2 <p>
5234
   *                     The second string.
5235 1
   *                     </p>
5236
   * @param int    $len  <p>
5237 1
   *                     Number of characters to use in the comparison.
5238
   *                     </p>
5239
   *
5240
   * @return int &lt; 0 if <i>str1</i> is less than
5241
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5242
   * is greater than <i>str2</i>, and 0 if they are
5243
   * equal.
5244
   * @since 4.0
5245
   * @since 5.0
5246
   */
5247
  public static function strncmp($str1, $str2, $len)
5248
  {
5249 1
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5250
  }
5251 1
5252
  /**
5253
   * Search a string for any of a set of characters
5254
   *
5255
   * @link  http://php.net/manual/en/function.strpbrk.php
5256
   *
5257
   * @param string $haystack  <p>
5258
   *                          The string where char_list is looked for.
5259
   *                          </p>
5260
   * @param string $char_list <p>
5261
   *                          This parameter is case sensitive.
5262
   *                          </p>
5263
   *
5264
   * @return string a string starting from the character found, or false if it is
5265
   * not found.
5266
   * @since 5.0
5267
   */
5268
  public static function strpbrk($haystack, $char_list)
5269
  {
5270
    $haystack = (string)$haystack;
5271
    $char_list = (string)$char_list;
5272
5273
    if (!isset($haystack[0], $char_list[0])) {
5274
      return false;
5275
    }
5276 10
5277
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5278 10
      return substr($haystack, strpos($haystack, $m[0]));
5279 10
    } else {
5280
      return false;
5281 10
    }
5282 2
  }
5283
5284
  /**
5285
   * Find position of first occurrence of string in a string.
5286 9
   *
5287
   * @link http://php.net/manual/en/function.mb-strpos.php
5288 9
   *
5289
   * @param string  $haystack     <p>
5290
   *                              The string being checked.
5291
   *                              </p>
5292 9
   * @param string  $needle       <p>
5293 9
   *                              The position counted from the beginning of haystack.
5294
   *                              </p>
5295 9
   * @param int     $offset       [optional] <p>
5296
   *                              The search offset. If it is not specified, 0 is used.
5297
   *                              </p>
5298 1
   * @param string  $encoding
5299 1
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5300 1
   *
5301
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5302 9
   *             If needle is not found it returns false.
5303 9
   */
5304
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5305
  {
5306
    $haystack = (string)$haystack;
5307
    $needle = (string)$needle;
5308
5309
    if (!isset($haystack[0], $needle[0])) {
5310
      return false;
5311
    }
5312
5313
    // init
5314
    self::checkForSupport();
5315
    $offset = (int)$offset;
5316
5317
    // iconv and mbstring do not support integer $needle
5318
5319
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5320
      $needle = self::chr($needle);
5321
    }
5322
5323
    if ($cleanUtf8 === true) {
5324
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5325
      // iconv_strpos is not tolerant to invalid characters
5326
5327
      $needle = self::clean((string)$needle);
5328
      $haystack = self::clean($haystack);
5329
    }
5330
5331
    if (self::$support['mbstring'] === true) {
5332
5333
      // INFO: this is only a fallback for old versions
5334
      if ($encoding === true || $encoding === false) {
5335
        $encoding = 'UTF-8';
5336
      }
5337
5338
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5339 6
    }
5340
5341 6
    if (self::$support['iconv'] === true) {
5342
      // ignore invalid negative offset to keep compatility
5343
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5344
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5345 6
    }
5346
5347
    if ($offset > 0) {
5348
      $haystack = self::substr($haystack, $offset);
5349
    }
5350
5351 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5352
      $left = substr($haystack, 0, $pos);
5353
5354
      // negative offset not supported in PHP strpos(), ignoring
5355
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5356
    }
5357
5358
    return false;
5359
  }
5360
5361
  /**
5362
   * Finds the last occurrence of a character in a string within another.
5363
   *
5364
   * @link http://php.net/manual/en/function.mb-strrchr.php
5365
   *
5366 1
   * @param string $haystack <p>
5367
   *                         The string from which to get the last occurrence
5368 1
   *                         of needle
5369
   *                         </p>
5370 1
   * @param string $needle   <p>
5371
   *                         The string to find in haystack
5372
   *                         </p>
5373
   * @param bool   $part     [optional] <p>
5374
   *                         Determines which portion of haystack
5375
   *                         this function returns.
5376
   *                         If set to true, it returns all of haystack
5377
   *                         from the beginning to the last occurrence of needle.
5378
   *                         If set to false, it returns all of haystack
5379
   *                         from the last occurrence of needle to the end,
5380
   *                         </p>
5381
   * @param string $encoding [optional] <p>
5382
   *                         Character encoding name to use.
5383 10
   *                         If it is omitted, internal character encoding is used.
5384
   *                         </p>
5385 10
   *
5386 10
   * @return string the portion of haystack.
5387 10
   * or false if needle is not found.
5388
   */
5389 10
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5390 1
  {
5391 1
    self::checkForSupport();
5392 1
5393
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5394 10
  }
5395
5396 10
  /**
5397
   * Reverses characters order in the string.
5398 10
   *
5399 1
   * @param    string $str The input string
5400 1
   *
5401
   * @return   string The string with characters in the reverse sequence
5402
   */
5403 10
  public static function strrev($str)
5404 10
  {
5405
    return implode(array_reverse(self::split($str)));
5406 10
  }
5407
5408 10
  /**
5409
   * Finds the last occurrence of a character in a string within another, case insensitive.
5410
   *
5411
   * @link http://php.net/manual/en/function.mb-strrichr.php
5412
   *
5413
   * @param string $haystack <p>
5414
   *                         The string from which to get the last occurrence
5415
   *                         of needle
5416
   *                         </p>
5417
   * @param string $needle   <p>
5418
   *                         The string to find in haystack
5419
   *                         </p>
5420
   * @param bool   $part     [optional] <p>
5421
   *                         Determines which portion of haystack
5422
   *                         this function returns.
5423
   *                         If set to true, it returns all of haystack
5424 20
   *                         from the beginning to the last occurrence of needle.
5425
   *                         If set to false, it returns all of haystack
5426 20
   *                         from the last occurrence of needle to the end,
5427
   *                         </p>
5428 20
   * @param string $encoding [optional] <p>
5429 5
   *                         Character encoding name to use.
5430
   *                         If it is omitted, internal character encoding is used.
5431
   *                         </p>
5432
   *
5433 18
   * @return string the portion of haystack.
5434
   * or false if needle is not found.
5435 18
   */
5436
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5437
  {
5438
    self::checkForSupport();
5439
5440
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5441
  }
5442
5443
  /**
5444
   * Find position of last occurrence of a case-insensitive string.
5445 3
   *
5446
   * @param    string $haystack The string to look in
5447 3
   * @param    string $needle   The string to look for
5448
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5449
   *
5450
   * @return   int The position of offset
5451
   */
5452
  public static function strripos($haystack, $needle, $offset = 0)
5453
  {
5454
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5455
  }
5456
5457
  /**
5458
   * Find position of last occurrence of a string in a string.
5459
   *
5460
   * @link http://php.net/manual/en/function.mb-strrpos.php
5461
   *
5462 16
   * @param string     $haystack  <p>
5463
   *                              The string being checked, for the last occurrence
5464 16
   *                              of needle
5465
   *                              </p>
5466 16
   * @param string|int $needle    <p>
5467 4
   *                              The string to find in haystack.
5468
   *                              Or a code point as int.
5469
   *                              </p>
5470
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5471 15
   *                              the string. Negative values will stop searching at an arbitrary point
5472
   *                              prior to the end of the string.
5473 15
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5474 15
   *
5475
   * @return int the numeric position of
5476
   * the last occurrence of needle in the
5477
   * haystack string. If
5478
   * needle is not found, it returns false.
5479
   */
5480
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5481
  {
5482
    $haystack = (string)$haystack;
5483
5484
    if (((int)$needle) === $needle && ($needle >= 0)) {
5485
      $needle = self::chr($needle);
5486
    }
5487
5488
    $needle = (string)$needle;
5489
5490
    if (!isset($haystack[0], $needle[0])) {
5491
      return false;
5492
    }
5493
5494
    // init
5495
    self::checkForSupport();
5496
5497
    $needle = (string)$needle;
5498
    $offset = (int)$offset;
5499
5500
    if ($cleanUtf8 === true) {
5501
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5502
5503 1
      $needle = self::clean($needle);
5504
      $haystack = self::clean($haystack);
5505 1
    }
5506
5507
    if (self::$support['mbstring'] === true) {
5508
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5509
    }
5510
5511
    if (self::$support['iconv'] === true) {
5512
      return \grapheme_strrpos($haystack, $needle, $offset);
5513
    }
5514
5515
    // fallback
5516
5517
    if ($offset > 0) {
5518
      $haystack = self::substr($haystack, $offset);
5519
    } elseif ($offset < 0) {
5520 1
      $haystack = self::substr($haystack, 0, $offset);
5521
    }
5522
5523 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5524
      $left = substr($haystack, 0, $pos);
5525
5526
      // negative offset not supported in PHP strpos(), ignoring
5527
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5528
    }
5529
5530 1
    return false;
5531
  }
5532
5533 1
  /**
5534
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5535 1
   * mask.
5536
   *
5537
   * @param string $str
5538
   * @param string $mask
5539
   * @param int    $offset
5540
   * @param int    $length
5541
   *
5542
   * @return int|null
5543
   */
5544
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5545
  {
5546
    if ($offset || 2147483647 !== $length) {
5547
      $str = self::substr($str, $offset, $length);
5548
    }
5549
5550
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5551
  }
5552
5553
  /**
5554
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5555
   *
5556
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5557
   *
5558 39
   * @param string $haystack      <p>
5559
   *                              The input string. Must be valid UTF-8.
5560 39
   *                              </p>
5561
   * @param string $needle        <p>
5562 39
   *                              The string to look for. Must be valid UTF-8.
5563 9
   *                              </p>
5564
   * @param bool   $before_needle [optional] <p>
5565
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5566
   *                              haystack before the first occurrence of the needle (excluding the needle).
5567 37
   *                              </p>
5568
   *
5569 37
   * @return string the portion of string, or FALSE if needle is not found.
5570
   */
5571
  public static function strstr($haystack, $needle, $before_needle = false)
5572
  {
5573 1
    self::checkForSupport();
5574 1
5575
    return \grapheme_strstr($haystack, $needle, $before_needle);
5576 37
  }
5577 22
5578 22
  /**
5579 33
   * Unicode transformation for case-less matching.
5580
   *
5581
   * @link http://unicode.org/reports/tr21/tr21-5.html
5582 37
   *
5583
   * @param string $str
5584
   * @param bool   $full
5585 37
   *
5586 1
   * @return string
5587 1
   */
5588
  public static function strtocasefold($str, $full = true)
5589 37
  {
5590
    static $fullCaseFold = null;
5591
    static $commonCaseFoldKeys = null;
5592
    static $commonCaseFoldValues = null;
5593
5594
    if ($commonCaseFoldKeys === null) {
5595
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5596
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5597
    }
5598
5599
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5600
5601
    if ($full) {
5602
5603
      if ($fullCaseFold === null) {
5604
        $fullCaseFold = self::getData('caseFolding_full');
5605
      }
5606
5607
      /** @noinspection OffsetOperationsInspection */
5608
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5609
    }
5610
5611
    $str = self::clean($str);
5612
5613
    return self::strtolower($str);
5614
  }
5615
5616
  /**
5617
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5618 1
   * Make a string lowercase.
5619
   *
5620 1
   * @link http://php.net/manual/en/function.mb-strtolower.php
5621 1
   *
5622
   * @param string $str <p>
5623 1
   *                    The string being lowercased.
5624
   *                    </p>
5625
   * @param string $encoding
5626
   *
5627
   * @return string str with all alphabetic characters converted to lowercase.
5628
   */
5629
  public static function strtolower($str, $encoding = 'UTF-8')
5630
  {
5631
    $str = (string)$str;
5632
5633
    if (!isset($str[0])) {
5634
      return '';
5635
    }
5636
5637
    // init
5638
    self::checkForSupport();
5639
5640
    return \mb_strtolower($str, $encoding);
5641
  }
5642
5643
  /**
5644
   * Generic case sensitive transformation for collation matching.
5645
   *
5646
   * @param string $s
5647
   *
5648
   * @return string
5649
   */
5650
  protected static function strtonatfold($s)
5651
  {
5652
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5653
  }
5654
5655
  /**
5656
   * Make a string uppercase.
5657
   *
5658
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5659
   *
5660
   * @param string $str <p>
5661
   *                    The string being uppercased.
5662
   *                    </p>
5663
   * @param string $encoding
5664
   *
5665 6
   * @return string str with all alphabetic characters converted to uppercase.
5666
   */
5667
  public static function strtoupper($str, $encoding = 'UTF-8')
5668 6
  {
5669 1
    $str = (string)$str;
5670
5671
    if (!isset($str[0])) {
5672 1
      return '';
5673 1
    }
5674 1
5675 1
    // init
5676
    self::checkForSupport();
5677
5678
    if (self::$support['mbstring'] === true) {
5679 1
      return \mb_strtoupper($str, $encoding);
5680 1
    } else {
5681 1
5682 1
      // fallback
5683 1
5684 1
      static $caseTableKeys = null;
5685 1
      static $caseTableValues = null;
5686 1
5687
      if ($caseTableKeys === null) {
5688
        $caseTable = self::case_table();
5689
        $caseTableKeys = array_keys($caseTable);
5690 1
        $caseTableValues = array_values($caseTable);
5691 1
      }
5692 1
5693 1
      $str = self::clean($str);
5694 1
5695 1
      return str_replace($caseTableKeys, $caseTableValues, $str);
5696 1
    }
5697 1
  }
5698
5699
  /**
5700 1
   * Translate characters or replace sub-strings.
5701 1
   *
5702 1
   * @link  http://php.net/manual/en/function.strtr.php
5703 1
   *
5704
   * @param string       $str  <p>
5705
   *                           The string being translated.
5706
   *                           </p>
5707 1
   * @param string|array $from <p>
5708
   *                           The string replacing from.
5709 6
   *                           </p>
5710 1
   * @param string|array $to   <p>
5711 1
   *                           The string being translated to to.
5712 1
   *                           </p>
5713 1
   *
5714
   * @return string This function returns a copy of str,
5715 1
   * translating all occurrences of each character in
5716
   * from to the corresponding character in
5717
   * to.
5718 6
   * @since 4.0
5719 6
   * @since 5.0
5720
   */
5721 6
  public static function strtr($str, $from, $to = INF)
5722 4
  {
5723
    if (INF !== $to) {
5724 4
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5724 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5725 4
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5725 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5726
      $countFrom = count($from);
5727 6
      $countTo = count($to);
5728
5729 6
      if ($countFrom > $countTo) {
5730
        $from = array_slice($from, 0, $countTo);
5731
      } elseif ($countFrom < $countTo) {
5732
        $to = array_slice($to, 0, $countFrom);
5733
      }
5734
5735
      $from = array_combine($from, $to);
5736
    }
5737
5738
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5721 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5739
  }
5740 1
5741
  /**
5742 1
   * Return the width of a string.
5743
   *
5744 1
   * @param string $s
5745 1
   *
5746
   * @return int
5747
   */
5748 1
  public static function strwidth($s)
5749
  {
5750 1
    // init
5751 1
    self::checkForSupport();
5752
5753 1
    return \mb_strwidth($s, 'UTF-8');
5754
  }
5755 1
5756 1
  /**
5757
   * Get part of a string.
5758 1
   *
5759
   * @link http://php.net/manual/en/function.mb-substr.php
5760 1
   *
5761
   * @param string  $str       <p>
5762 1
   *                           The string being checked.
5763
   *                           </p>
5764 1
   * @param int     $start     <p>
5765
   *                           The first position used in str.
5766
   *                           </p>
5767
   * @param int     $length    [optional] <p>
5768
   *                           The maximum length of the returned string.
5769
   *                           </p>
5770
   * @param string  $encoding
5771
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5772
   *
5773
   * @return string mb_substr returns the portion of
5774
   * str specified by the start and length parameters.
5775 6
   */
5776
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5777 6
  {
5778
    $str = (string)$str;
5779
5780
    if (!isset($str[0])) {
5781
      return '';
5782
    }
5783
5784
    // init
5785
    self::checkForSupport();
5786
5787
    if ($cleanUtf8 === true) {
5788
      // iconv and mbstring are not tolerant to invalid encoding
5789
      // further, their behaviour is inconsistent with that of PHP's substr
5790
5791
      $str = self::clean($str);
5792
    }
5793
5794
    if ($length === null) {
5795
      $length = (int)self::strlen($str);
5796
    } else {
5797
      $length = (int)$length;
5798
    }
5799
5800
    if (self::$support['mbstring'] === true) {
5801
5802
      // INFO: this is only a fallback for old versions
5803
      if ($encoding === true || $encoding === false) {
5804
        $encoding = 'UTF-8';
5805
      }
5806
5807
      return \mb_substr($str, $start, $length, $encoding);
5808
    }
5809
5810
    if (self::$support['iconv'] === true) {
5811
      return (string)\grapheme_substr($str, $start, $length);
5812 7
    }
5813
5814 7
    // fallback
5815
5816 7
    // split to array, and remove invalid characters
5817
    $array = self::split($str);
5818 7
5819 2
    // extract relevant part, and join to make sting again
5820
    return implode(array_slice($array, $start, $length));
5821
  }
5822 6
5823
  /**
5824 6
   * Binary safe comparison of two strings from an offset, up to length characters.
5825 3
   *
5826
   * @param string  $main_str           The main string being compared.
5827 3
   * @param string  $str                The secondary string being compared.
5828
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5829 3
   *                                    end of the string.
5830
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5831
   *                                    the str compared to the length of main_str less the offset.
5832 3
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5833
   *
5834 3
   * @return int
5835 3
   */
5836
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5837
  {
5838 3
    $main_str = self::substr($main_str, $offset, $length);
5839 3
    $str = self::substr($str, 0, self::strlen($main_str));
5840 3
5841
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5842
  }
5843
5844
  /**
5845
   * Count the number of substring occurrences
5846
   *
5847
   * @link  http://php.net/manual/en/function.substr-count.php
5848
   *
5849
   * @param string $haystack <p>
5850
   *                         The string to search in
5851
   *                         </p>
5852 3
   * @param string $needle   <p>
5853
   *                         The substring to search for
5854 1
   *                         </p>
5855 1
   * @param int    $offset   [optional] <p>
5856 1
   *                         The offset where to start counting
5857
   *                         </p>
5858 1
   * @param int    $length   [optional] <p>
5859 1
   *                         The maximum length after the specified offset to search for the
5860 1
   *                         substring. It outputs a warning if the offset plus the length is
5861 1
   *                         greater than the haystack length.
5862
   *                         </p>
5863 1
   *
5864
   * @return int This functions returns an integer.
5865
   * @since 4.0
5866 1
   * @since 5.0
5867
   */
5868
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5869 1
  {
5870
    $haystack = (string)$haystack;
5871 3
    $needle = (string)$needle;
5872 1
5873 1
    if (!isset($haystack[0], $needle[0])) {
5874
      return 0;
5875 3
    }
5876 3
5877
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5878 3
      $offset = (int)$offset;
5879 3
      $length = (int)$length;
5880
5881 6
      $haystack = self::substr($haystack, $offset, $length);
5882
    }
5883
5884
    self::checkForSupport();
5885
5886
    return \mb_substr_count($haystack, $needle);
5887
  }
5888
5889
  /**
5890
   * Replace text within a portion of a string.
5891
   *
5892
   * source: https://gist.github.com/stemar/8287074
5893
   *
5894
   * @param string|array   $str
5895
   * @param string|array   $replacement
5896
   * @param int|array      $start
5897
   * @param null|int|array $length
5898
   *
5899
   * @return array|string
5900
   */
5901
  public static function substr_replace($str, $replacement, $start, $length = null)
5902
  {
5903 2
    if (is_array($str)) {
5904
      $num = count($str);
5905 2
5906
      // $replacement
5907
      if (is_array($replacement)) {
5908
        $replacement = array_slice($replacement, 0, $num);
5909
      } else {
5910
        $replacement = array_pad(array($replacement), $num, $replacement);
5911
      }
5912
5913
      // $start
5914 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5915
        $start = array_slice($start, 0, $num);
5916
        foreach ($start as &$valueTmp) {
5917
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5918
        }
5919
        unset($valueTmp);
5920
      } else {
5921
        $start = array_pad(array($start), $num, $start);
5922
      }
5923
5924
      // $length
5925
      if (!isset($length)) {
5926
        $length = array_fill(0, $num, 0);
5927 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5928
        $length = array_slice($length, 0, $num);
5929 20
        foreach ($length as &$valueTmpV2) {
5930
          if (isset($valueTmpV2)) {
5931 20
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5932 2
          } else {
5933
            $valueTmpV2 = 0;
5934 2
          }
5935 2
        }
5936
        unset($valueTmpV2);
5937 2
      } else {
5938
        $length = array_pad(array($length), $num, $length);
5939
      }
5940 20
5941
      // Recursive call
5942 20
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5943 9
    } else {
5944
      if (is_array($replacement)) {
5945
        if (count($replacement) > 0) {
5946 20
          $replacement = $replacement[0];
5947
        } else {
5948 20
          $replacement = '';
5949
        }
5950 20
      }
5951 20
    }
5952
5953 20
    preg_match_all('/./us', (string)$str, $smatches);
5954 20
    preg_match_all('/./us', (string)$replacement, $rmatches);
5955 20
5956 20
    if ($length === null) {
5957
      self::checkForSupport();
5958 20
5959
      $length = \mb_strlen($str);
5960 18
    }
5961 17
5962 17
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5963 17
5964 5
    return implode($smatches[0], null);
5965 5
  }
5966 5
5967
  /**
5968
   * Returns a case swapped version of the string.
5969 20
   *
5970
   * @param string $str
5971 18
   * @param string $encoding
5972 14
   *
5973 14
   * @return string each character's case swapped
5974 14
   */
5975 8
  public static function swapCase($str, $encoding = 'UTF-8')
5976 8
  {
5977 8
    $str = (string)$str;
5978
5979
    if (!isset($str[0])) {
5980 19
      return '';
5981
    }
5982 9
5983 3
    $str = self::clean($str);
5984 3
5985 3
    $strSwappedCase = preg_replace_callback(
5986 6
        '/[\S]/u',
5987 6
        function ($match) use ($encoding) {
5988 6
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5989
5990
          if ($match[0] === $marchToUpper) {
5991 9
            return UTF8::strtolower($match[0], $encoding);
5992 6
          } else {
5993 6
            return $marchToUpper;
5994 6
          }
5995
        },
5996
        $str
5997 20
    );
5998
5999 2
    return $strSwappedCase;
6000 2
  }
6001
6002
  /**
6003 2
   * alias for "UTF8::to_ascii()"
6004 2
   *
6005 2
   * @param string $s The input string e.g. a UTF-8 String
6006
   * @param string $subst_chr
6007
   *
6008 2
   * @return string
6009 18
   */
6010
  public static function toAscii($s, $subst_chr = '?')
6011 20
  {
6012
    return self::to_ascii($s, $subst_chr);
6013 20
  }
6014
6015
  /**
6016 20
   * alias for "UTF8::to_latin1()"
6017 20
   *
6018
   * @param $str
6019 3
   *
6020 20
   * @return string
6021
   */
6022 20
  public static function toLatin1($str)
6023
  {
6024
    return self::to_latin1($str);
6025 20
  }
6026 20
6027 20
  /**
6028 2
   * alias for "UTF8::to_utf8"
6029 20
   *
6030
   * @param string $str
6031 20
   *
6032
   * @return string
6033 20
   */
6034
  public static function toUTF8($str)
6035
  {
6036
    return self::to_utf8($str);
6037
  }
6038
6039
  /**
6040
   * convert to ASCII
6041
   *
6042
   * @param string $s The input string e.g. a UTF-8 String
6043 2
   * @param string $subst_chr
6044
   *
6045 2
   * @return string
6046
   */
6047 1
  public static function to_ascii($s, $subst_chr = '?')
6048
  {
6049 1
    static $translitExtra = null;
6050 1
6051
    $s = (string)$s;
6052 1
6053 2
    if (!isset($s[0])) {
6054 2
      return '';
6055
    }
6056
6057
    $s = self::clean($s);
6058
6059
    if (preg_match("/[\x80-\xFF]/", $s)) {
6060
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6061
6062
      $glibc = 'glibc' === ICONV_IMPL;
6063
6064
      preg_match_all('/./u', $s, $s);
6065
6066
      /** @noinspection AlterInForeachInspection */
6067
      foreach ($s[0] as &$c) {
6068
6069
        if (!isset($c[1])) {
6070
          continue;
6071
        }
6072
6073 26
        if ($glibc) {
6074
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6075 26
        } else {
6076
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6077 26
6078 5
          if ($t !== false && is_string($t)) {
6079
            if (!isset($t[0])) {
6080
              $t = '?';
6081
            } elseif (isset($t[1])) {
6082 22
              $t = ltrim($t, '\'`"^~');
6083 6
            }
6084
          }
6085
        }
6086 16
6087
        if ('?' === $t) {
6088
6089
          if ($translitExtra === null) {
6090
            $translitExtra = (array)self::getData('translit_extra');
6091
          }
6092
6093
          if (isset($translitExtra[$c])) {
6094
            $t = $translitExtra[$c];
6095
          } else {
6096 14
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6097
6098 14
            if ($t[0] < "\x80") {
6099
              $t = $t[0];
6100
            } else {
6101
              $t = $subst_chr;
6102
            }
6103
          }
6104
        }
6105
6106
        if ('?' === $t) {
6107
          $t = self::str_transliterate($c, $subst_chr);
6108
        }
6109
6110
        $c = $t;
6111
      }
6112
6113
      $s = implode('', $s[0]);
6114
    }
6115
6116
    return $s;
6117
  }
6118
6119
  /**
6120
   * alias for "UTF8::to_win1252()"
6121 8
   *
6122
   * @param   string $str
6123 8
   *
6124 2
   * @return  array|string
6125
   */
6126
  public static function to_iso8859($str)
6127
  {
6128 7
    return self::to_win1252($str);
6129 7
  }
6130
6131 7
  /**
6132 1
   * alias for "UTF8::to_win1252()"
6133 1
   *
6134 7
   * @param string|array $str
6135
   *
6136
   * @return string|array
6137 7
   */
6138
  public static function to_latin1($str)
6139 7
  {
6140
    return self::to_win1252($str);
6141
  }
6142
6143 1
  /**
6144 1
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6145 1
   *
6146 7
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6147 7
   *
6148 7
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6149 7
   *
6150 7
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6151
   *    are followed by any of these:  ("group B")
6152 7
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6153
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6154
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6155
   * is also a valid unicode character, and will be left unchanged.
6156
   *
6157
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6158
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6159
   *
6160
   * @param string|array $str Any string or array.
6161
   *
6162
   * @return string The same string, but UTF8 encoded.
6163
   */
6164
  public static function to_utf8($str)
6165
  {
6166
    if (is_array($str)) {
6167
      foreach ($str as $k => $v) {
6168
        /** @noinspection AlterInForeachInspection */
6169
        $str[$k] = self::to_utf8($v);
6170
      }
6171
6172 1
      return $str;
6173
    }
6174 1
6175
    $str = (string)$str;
6176 1
6177 1
    if (!isset($str[0])) {
6178
      return $str;
6179
    }
6180 1
6181
    $max = strlen($str);
6182 1
    $buf = '';
6183
6184 1
    /** @noinspection ForeachInvariantsInspection */
6185 1
    for ($i = 0; $i < $max; $i++) {
6186 1
      $c1 = $str[$i];
6187 1
6188
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6189 1
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6190 1
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6191 1
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6192
6193 1
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6194
6195
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6196
            $buf .= $c1 . $c2;
6197
            $i++;
6198
          } else { // not valid UTF8 - convert it
6199
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6200
            $cc2 = ($c1 & "\x3f") | "\x80";
6201
            $buf .= $cc1 . $cc2;
6202
          }
6203
6204 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6205
6206
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6207
            $buf .= $c1 . $c2 . $c3;
6208
            $i += 2;
6209
          } else { // not valid UTF8 - convert it
6210
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6211
            $cc2 = ($c1 & "\x3f") | "\x80";
6212
            $buf .= $cc1 . $cc2;
6213
          }
6214
6215
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6216
6217 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6218
            $buf .= $c1 . $c2 . $c3 . $c4;
6219
            $i += 3;
6220
          } else { // not valid UTF8 - convert it
6221
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6222
            $cc2 = ($c1 & "\x3f") | "\x80";
6223
            $buf .= $cc1 . $cc2;
6224
          }
6225
6226
        } else { // doesn't look like UTF8, but should be converted
6227
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6228
          $cc2 = (($c1 & "\x3f") | "\x80");
6229
          $buf .= $cc1 . $cc2;
6230
        }
6231
6232
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6233
6234
        $ordC1 = ord($c1);
6235
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6236
          $buf .= self::$win1252ToUtf8[$ordC1];
6237
        } else {
6238
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6239
          $cc2 = (($c1 & "\x3f") | "\x80");
6240
          $buf .= $cc1 . $cc2;
6241
        }
6242
6243
      } else { // it doesn't need conversion
6244
        $buf .= $c1;
6245
      }
6246
    }
6247
6248
    self::checkForSupport();
6249
6250
    // decode unicode escape sequences
6251
    $buf = preg_replace_callback(
6252
        '/\\\\u([0-9a-f]{4})/i',
6253
        function ($match) {
6254
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6255
        },
6256
        $buf
6257
    );
6258
6259
    // decode UTF-8 codepoints
6260
    $buf = preg_replace_callback(
6261
        '/&#\d{2,4};/',
6262
        function ($match) {
6263
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6264
        },
6265
        $buf
6266
    );
6267
6268
    return $buf;
6269
  }
6270
6271
  /**
6272
   * Convert a string into "win1252"-encoding.
6273
   *
6274
   * @param  string|array $str
6275
   *
6276
   * @return string|array
6277
   */
6278
  protected static function to_win1252($str)
6279
  {
6280
    if (is_array($str)) {
6281
6282
      foreach ($str as $k => $v) {
6283
        /** @noinspection AlterInForeachInspection */
6284
        $str[$k] = self::to_win1252($v);
6285
      }
6286
6287
      return $str;
6288
    }
6289
6290
    $str = (string)$str;
6291
6292
    if (!isset($str[0])) {
6293
      return '';
6294
    }
6295
6296
    return self::utf8_decode($str);
6297
  }
6298
6299
  /**
6300
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6301
   *
6302
   * INFO: This is slower then "trim()"
6303
   *
6304
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
6305
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6306
   *
6307
   * @param    string $str   The string to be trimmed
6308
   * @param    string $chars Optional characters to be stripped
6309
   *
6310
   * @return   string The trimmed string
6311
   */
6312
  public static function trim($str = '', $chars = INF)
6313
  {
6314
    $str = (string)$str;
6315
6316
    if (!isset($str[0])) {
6317
      return '';
6318
    }
6319
6320
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6321
    if ($chars === INF || !$chars) {
6322
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6323
    }
6324
6325
    return self::rtrim(self::ltrim($str, $chars), $chars);
6326
  }
6327
6328
  /**
6329
   * Makes string's first char uppercase.
6330
   *
6331
   * @param    string $str The input string
6332
   *
6333
   * @return   string The resulting string
6334
   */
6335
  public static function ucfirst($str)
6336
  {
6337
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6338
  }
6339
6340
  /**
6341
   * alias for "UTF8::ucfirst"
6342
   *
6343
   * @param $str
6344
   *
6345
   * @return string
6346
   */
6347
  public static function ucword($str)
6348
  {
6349
    return self::ucfirst($str);
6350
  }
6351
6352
  /**
6353
   * Uppercase for all words in the string.
6354
   *
6355
   * @param  string $str
6356
   * @param array   $exceptions
6357
   *
6358
   * @return string
6359
   */
6360
  public static function ucwords($str, $exceptions = array())
6361
  {
6362
    if (!$str) {
6363
      return '';
6364
    }
6365
6366
    // init
6367
    $words = explode(' ', $str);
6368
    $newwords = array();
6369
6370
    if (count($exceptions) > 0) {
6371
      $useExceptions = true;
6372
    } else {
6373
      $useExceptions = false;
6374
    }
6375
6376
    foreach ($words as $word) {
6377
      if (
6378
          ($useExceptions === false)
6379
          ||
6380
          (
6381
              $useExceptions === true
6382
              &&
6383
              !in_array($word, $exceptions, true)
6384
          )
6385
      ) {
6386
        $word = self::ucfirst($word);
6387
      }
6388
      $newwords[] = $word;
6389
    }
6390
6391
    return self::ucfirst(implode(' ', $newwords));
6392
  }
6393
6394
  /**
6395
   * Multi decode html entity & fix urlencoded-win1252-chars.
6396
   *
6397
   * e.g:
6398
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6399
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6400
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6401
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6402
   * 'Düsseldorf'                   => 'Düsseldorf'
6403
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6404
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6405
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6406
   *
6407
   * @param string $str
6408
   *
6409
   * @return string
6410
   */
6411
  public static function urldecode($str)
6412
  {
6413
    $str = (string)$str;
6414
6415
    if (!isset($str[0])) {
6416
      return '';
6417
    }
6418
6419
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6420
6421
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6422
6423
    $str = self::fix_simple_utf8(
6424
        rawurldecode(
6425
            self::html_entity_decode(
6426
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6427
                $flags
6428
            )
6429
        )
6430
    );
6431
6432
    return (string)$str;
6433
  }
6434
6435
  /**
6436
   * Return a array with "urlencoded"-win1252 -> UTF-8
6437
   *
6438
   * @return mixed
6439
   */
6440 6
  public static function urldecode_fix_win1252_chars()
6441
  {
6442 6
    static $array = array(
6443 6
        '%20' => ' ',
6444
        '%21' => '!',
6445 6
        '%22' => '"',
6446
        '%23' => '#',
6447 6
        '%24' => '$',
6448 5
        '%25' => '%',
6449
        '%26' => '&',
6450
        '%27' => "'",
6451
        '%28' => '(',
6452 6
        '%29' => ')',
6453
        '%2A' => '*',
6454 6
        '%2B' => '+',
6455
        '%2C' => ',',
6456 6
        '%2D' => '-',
6457 1
        '%2E' => '.',
6458 1
        '%2F' => '/',
6459 1
        '%30' => '0',
6460
        '%31' => '1',
6461 6
        '%32' => '2',
6462
        '%33' => '3',
6463
        '%34' => '4',
6464
        '%35' => '5',
6465
        '%36' => '6',
6466
        '%37' => '7',
6467
        '%38' => '8',
6468
        '%39' => '9',
6469
        '%3A' => ':',
6470
        '%3B' => ';',
6471 6
        '%3C' => '<',
6472
        '%3D' => '=',
6473 6
        '%3E' => '>',
6474
        '%3F' => '?',
6475 6
        '%40' => '@',
6476 6
        '%41' => 'A',
6477
        '%42' => 'B',
6478
        '%43' => 'C',
6479 5
        '%44' => 'D',
6480 5
        '%45' => 'E',
6481
        '%46' => 'F',
6482 5
        '%47' => 'G',
6483 1
        '%48' => 'H',
6484 1
        '%49' => 'I',
6485 1
        '%4A' => 'J',
6486
        '%4B' => 'K',
6487 5
        '%4C' => 'L',
6488
        '%4D' => 'M',
6489
        '%4E' => 'N',
6490
        '%4F' => 'O',
6491
        '%50' => 'P',
6492
        '%51' => 'Q',
6493
        '%52' => 'R',
6494
        '%53' => 'S',
6495
        '%54' => 'T',
6496
        '%55' => 'U',
6497
        '%56' => 'V',
6498
        '%57' => 'W',
6499
        '%58' => 'X',
6500
        '%59' => 'Y',
6501
        '%5A' => 'Z',
6502
        '%5B' => '[',
6503
        '%5C' => '\\',
6504
        '%5D' => ']',
6505
        '%5E' => '^',
6506
        '%5F' => '_',
6507
        '%60' => '`',
6508
        '%61' => 'a',
6509
        '%62' => 'b',
6510
        '%63' => 'c',
6511
        '%64' => 'd',
6512
        '%65' => 'e',
6513
        '%66' => 'f',
6514
        '%67' => 'g',
6515
        '%68' => 'h',
6516
        '%69' => 'i',
6517
        '%6A' => 'j',
6518
        '%6B' => 'k',
6519 1
        '%6C' => 'l',
6520
        '%6D' => 'm',
6521 1
        '%6E' => 'n',
6522
        '%6F' => 'o',
6523
        '%70' => 'p',
6524
        '%71' => 'q',
6525
        '%72' => 'r',
6526
        '%73' => 's',
6527
        '%74' => 't',
6528
        '%75' => 'u',
6529
        '%76' => 'v',
6530
        '%77' => 'w',
6531
        '%78' => 'x',
6532
        '%79' => 'y',
6533 1
        '%7A' => 'z',
6534
        '%7B' => '{',
6535 1
        '%7C' => '|',
6536
        '%7D' => '}',
6537
        '%7E' => '~',
6538
        '%7F' => '',
6539 1
        '%80' => '`',
6540
        '%81' => '',
6541 1
        '%82' => '‚',
6542
        '%83' => 'ƒ',
6543
        '%84' => '„',
6544 1
        '%85' => '…',
6545 1
        '%86' => '†',
6546 1
        '%87' => '‡',
6547 1
        '%88' => 'ˆ',
6548 1
        '%89' => '‰',
6549
        '%8A' => 'Š',
6550
        '%8B' => '‹',
6551 1
        '%8C' => 'Œ',
6552
        '%8D' => '',
6553
        '%8E' => 'Ž',
6554
        '%8F' => '',
6555
        '%90' => '',
6556
        '%91' => '‘',
6557
        '%92' => '’',
6558
        '%93' => '“',
6559
        '%94' => '”',
6560
        '%95' => '•',
6561
        '%96' => '–',
6562
        '%97' => '—',
6563
        '%98' => '˜',
6564 4
        '%99' => '™',
6565
        '%9A' => 'š',
6566 4
        '%9B' => '›',
6567
        '%9C' => 'œ',
6568
        '%9D' => '',
6569
        '%9E' => 'ž',
6570 4
        '%9F' => 'Ÿ',
6571 4
        '%A0' => '',
6572 4
        '%A1' => '¡',
6573
        '%A2' => '¢',
6574 4
        '%A3' => '£',
6575 4
        '%A4' => '¤',
6576 4
        '%A5' => '¥',
6577 4
        '%A6' => '¦',
6578
        '%A7' => '§',
6579 4
        '%A8' => '¨',
6580
        '%A9' => '©',
6581
        '%AA' => 'ª',
6582
        '%AB' => '«',
6583
        '%AC' => '¬',
6584 4
        '%AD' => '',
6585
        '%AE' => '®',
6586 4
        '%AF' => '¯',
6587
        '%B0' => '°',
6588
        '%B1' => '±',
6589
        '%B2' => '²',
6590
        '%B3' => '³',
6591 4
        '%B4' => '´',
6592 4
        '%B5' => 'µ',
6593
        '%B6' => '¶',
6594 4
        '%B7' => '·',
6595 4
        '%B8' => '¸',
6596 4
        '%B9' => '¹',
6597 4
        '%BA' => 'º',
6598 4
        '%BB' => '»',
6599
        '%BC' => '¼',
6600 4
        '%BD' => '½',
6601 4
        '%BE' => '¾',
6602 4
        '%BF' => '¿',
6603 4
        '%C0' => 'À',
6604
        '%C1' => 'Á',
6605 4
        '%C2' => 'Â',
6606 3
        '%C3' => 'Ã',
6607 3
        '%C4' => 'Ä',
6608 3
        '%C5' => 'Å',
6609 3
        '%C6' => 'Æ',
6610
        '%C7' => 'Ç',
6611 3
        '%C8' => 'È',
6612
        '%C9' => 'É',
6613
        '%CA' => 'Ê',
6614
        '%CB' => 'Ë',
6615 3
        '%CC' => 'Ì',
6616 3
        '%CD' => 'Í',
6617
        '%CE' => 'Î',
6618 4
        '%CF' => 'Ï',
6619
        '%D0' => 'Ð',
6620
        '%D1' => 'Ñ',
6621
        '%D2' => 'Ò',
6622
        '%D3' => 'Ó',
6623
        '%D4' => 'Ô',
6624
        '%D5' => 'Õ',
6625
        '%D6' => 'Ö',
6626
        '%D7' => '×',
6627
        '%D8' => 'Ø',
6628
        '%D9' => 'Ù',
6629
        '%DA' => 'Ú',
6630
        '%DB' => 'Û',
6631
        '%DC' => 'Ü',
6632
        '%DD' => 'Ý',
6633
        '%DE' => 'Þ',
6634
        '%DF' => 'ß',
6635
        '%E0' => 'à',
6636
        '%E1' => 'á',
6637
        '%E2' => 'â',
6638
        '%E3' => 'ã',
6639
        '%E4' => 'ä',
6640
        '%E5' => 'å',
6641
        '%E6' => 'æ',
6642
        '%E7' => 'ç',
6643
        '%E8' => 'è',
6644
        '%E9' => 'é',
6645
        '%EA' => 'ê',
6646
        '%EB' => 'ë',
6647
        '%EC' => 'ì',
6648
        '%ED' => 'í',
6649
        '%EE' => 'î',
6650
        '%EF' => 'ï',
6651
        '%F0' => 'ð',
6652
        '%F1' => 'ñ',
6653
        '%F2' => 'ò',
6654
        '%F3' => 'ó',
6655
        '%F4' => 'ô',
6656
        '%F5' => 'õ',
6657
        '%F6' => 'ö',
6658
        '%F7' => '÷',
6659
        '%F8' => 'ø',
6660
        '%F9' => 'ù',
6661
        '%FA' => 'ú',
6662
        '%FB' => 'û',
6663
        '%FC' => 'ü',
6664
        '%FD' => 'ý',
6665
        '%FE' => 'þ',
6666
        '%FF' => 'ÿ',
6667
    );
6668
6669
    return $array;
6670
  }
6671
6672
  /**
6673
   * Decodes an UTF-8 string to ISO-8859-1.
6674
   *
6675
   * @param string $str
6676
   *
6677
   * @return string
6678
   */
6679
  public static function utf8_decode($str)
6680
  {
6681
    static $utf8ToWin1252Keys = null;
6682
    static $utf8ToWin1252Values = null;
6683
6684
    $str = (string)$str;
6685
6686
    if (!isset($str[0])) {
6687
      return '';
6688
    }
6689
6690
    // init
6691
    self::checkForSupport();
6692
6693
    $str = self::to_utf8($str);
6694
6695
    if ($utf8ToWin1252Keys === null) {
6696
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6697
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6698
    }
6699
6700
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6701
  }
6702
6703
  /**
6704
   * Encodes an ISO-8859-1 string to UTF-8.
6705
   *
6706
   * @param string $str
6707
   *
6708
   * @return string
6709
   */
6710
  public static function utf8_encode($str)
6711
  {
6712
    $str = \utf8_encode($str);
6713
6714
    if (false === strpos($str, "\xC2")) {
6715
      return $str;
6716
    } else {
6717
6718
      static $cp1252ToUtf8Keys = null;
6719
      static $cp1252ToUtf8Values = null;
6720
6721
      if ($cp1252ToUtf8Keys === null) {
6722
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6723
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6724
      }
6725
6726
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6727
    }
6728
  }
6729
6730
  /**
6731
   * fix -> utf8-win1252 chars
6732
   *
6733
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6734
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6735
   * See: http://en.wikipedia.org/wiki/Windows-1252
6736
   *
6737
   * @deprecated use "UTF8::fix_simple_utf8()"
6738
   *
6739
   * @param   string $str
6740
   *
6741
   * @return  string
6742
   */
6743
  public static function utf8_fix_win1252_chars($str)
6744
  {
6745
    return self::fix_simple_utf8($str);
6746
  }
6747
6748
  /**
6749
   * Returns an array with all utf8 whitespace characters.
6750
   *
6751
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6752
   *
6753
   * @author: Derek E. [email protected]
6754
   *
6755
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6756
   *         as defined in above URL
6757
   */
6758
  public static function whitespace_table()
6759
  {
6760
    return self::$whitespaceTable;
6761
  }
6762
6763
  /**
6764
   * Limit the number of words in a string.
6765
   *
6766
   * @param  string $str
6767
   * @param  int    $words
6768
   * @param  string $strAddOn
6769
   *
6770
   * @return string
6771
   */
6772
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6773
  {
6774
    $str = (string)$str;
6775
6776
    if (!isset($str[0])) {
6777
      return '';
6778
    }
6779
6780
    $words = (int)$words;
6781
6782
    if ($words < 1) {
6783
      return '';
6784
    }
6785
6786
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6787
6788
    if (
6789
        !isset($matches[0])
6790
        ||
6791
        self::strlen($str) === self::strlen($matches[0])
6792
    ) {
6793
      return $str;
6794
    }
6795
6796
    return self::rtrim($matches[0]) . $strAddOn;
6797
  }
6798
6799
  /**
6800
   * Wraps a string to a given number of characters
6801
   *
6802
   * @link  http://php.net/manual/en/function.wordwrap.php
6803
   *
6804
   * @param string $str   <p>
6805
   *                      The input string.
6806
   *                      </p>
6807
   * @param int    $width [optional] <p>
6808
   *                      The column width.
6809
   *                      </p>
6810
   * @param string $break [optional] <p>
6811
   *                      The line is broken using the optional
6812
   *                      break parameter.
6813
   *                      </p>
6814
   * @param bool   $cut   [optional] <p>
6815
   *                      If the cut is set to true, the string is
6816
   *                      always wrapped at or before the specified width. So if you have
6817
   *                      a word that is larger than the given width, it is broken apart.
6818
   *                      (See second example).
6819
   *                      </p>
6820
   *
6821
   * @return string the given string wrapped at the specified column.
6822
   * @since 4.0.2
6823
   * @since 5.0
6824
   */
6825
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6826
  {
6827
    $str = (string)$str;
6828
    $break = (string)$break;
6829
6830
    if (!isset($str[0], $break[0])) {
6831
      return '';
6832
    }
6833
6834
    $w = '';
6835
    $strSplit = explode($break, $str);
6836
    $count = count($strSplit);
6837
6838
    if (1 === $count && '' === $strSplit[0]) {
6839
      return '';
6840
    }
6841
6842
    $chars = array();
6843
    /** @noinspection ForeachInvariantsInspection */
6844
    for ($i = 0; $i < $count; ++$i) {
6845
6846
      if ($i) {
6847
        $chars[] = $break;
6848
        $w .= '#';
6849
      }
6850
6851
      $c = $strSplit[$i];
6852
      unset($strSplit[$i]);
6853
6854
      foreach (self::split($c) as $c) {
6855
        $chars[] = $c;
6856
        $w .= ' ' === $c ? ' ' : '?';
6857
      }
6858
    }
6859
6860
    $strReturn = '';
6861
    $j = 0;
6862
    $b = $i = -1;
6863
    $w = wordwrap($w, $width, '#', $cut);
6864
6865
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6866
      for (++$i; $i < $b; ++$i) {
6867
        $strReturn .= $chars[$j];
6868
        unset($chars[$j++]);
6869
      }
6870
6871
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6872
        unset($chars[$j++]);
6873
      }
6874
6875
      $strReturn .= $break;
6876
    }
6877
6878
    return $strReturn . implode('', $chars);
6879
  }
6880
6881
  /**
6882
   * Returns an array of Unicode White Space characters.
6883
   *
6884
   * @return   array An array with numeric code point as key and White Space Character as value.
6885
   */
6886
  public static function ws()
6887
  {
6888
    return self::$whitespace;
6889
  }
6890
6891
}
6892