Completed
Push — master ( f39b34...54357b )
by Lars
12:58 queued 01:58
created

UTF8::str_sort()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3.4326

Importance

Changes 3
Bugs 1 Features 0
Metric Value
c 3
b 1
f 0
dl 0
loc 16
ccs 7
cts 11
cp 0.6364
rs 9.4285
cc 3
eloc 9
nc 4
nop 3
crap 3.4326
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Numeric code point => UTF-8 Character
83
   *
84
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
85
   *
86
   * @var array
87
   */
88
  protected static $whitespace = array(
89
    // NUL Byte
90
    0     => "\x0",
91
    // Tab
92
    9     => "\x9",
93
    // New Line
94
    10    => "\xa",
95
    // Vertical Tab
96
    11    => "\xb",
97
    // Carriage Return
98
    13    => "\xd",
99
    // Ordinary Space
100
    32    => "\x20",
101
    // NO-BREAK SPACE
102
    160   => "\xc2\xa0",
103
    // OGHAM SPACE MARK
104
    5760  => "\xe1\x9a\x80",
105
    // MONGOLIAN VOWEL SEPARATOR
106
    6158  => "\xe1\xa0\x8e",
107
    // EN QUAD
108
    8192  => "\xe2\x80\x80",
109
    // EM QUAD
110
    8193  => "\xe2\x80\x81",
111
    // EN SPACE
112
    8194  => "\xe2\x80\x82",
113
    // EM SPACE
114
    8195  => "\xe2\x80\x83",
115
    // THREE-PER-EM SPACE
116
    8196  => "\xe2\x80\x84",
117
    // FOUR-PER-EM SPACE
118
    8197  => "\xe2\x80\x85",
119
    // SIX-PER-EM SPACE
120
    8198  => "\xe2\x80\x86",
121
    // FIGURE SPACE
122
    8199  => "\xe2\x80\x87",
123
    // PUNCTUATION SPACE
124
    8200  => "\xe2\x80\x88",
125
    // THIN SPACE
126
    8201  => "\xe2\x80\x89",
127
    //HAIR SPACE
128
    8202  => "\xe2\x80\x8a",
129
    // LINE SEPARATOR
130
    8232  => "\xe2\x80\xa8",
131
    // PARAGRAPH SEPARATOR
132
    8233  => "\xe2\x80\xa9",
133
    // NARROW NO-BREAK SPACE
134
    8239  => "\xe2\x80\xaf",
135
    // MEDIUM MATHEMATICAL SPACE
136
    8287  => "\xe2\x81\x9f",
137
    // IDEOGRAPHIC SPACE
138
    12288 => "\xe3\x80\x80",
139
  );
140
141
  /**
142
   * @var array
143
   */
144
  protected static $whitespaceTable = array(
145
      'SPACE'                     => "\x20",
146
      'NO-BREAK SPACE'            => "\xc2\xa0",
147
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
148
      'EN QUAD'                   => "\xe2\x80\x80",
149
      'EM QUAD'                   => "\xe2\x80\x81",
150
      'EN SPACE'                  => "\xe2\x80\x82",
151
      'EM SPACE'                  => "\xe2\x80\x83",
152
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
153
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
154
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
155
      'FIGURE SPACE'              => "\xe2\x80\x87",
156
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
157
      'THIN SPACE'                => "\xe2\x80\x89",
158
      'HAIR SPACE'                => "\xe2\x80\x8a",
159
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
160
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
161
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
162
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
163
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
164
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
165
  );
166
167
  /**
168
   * bidirectional text chars
169
   *
170
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
171
   *
172
   * @var array
173
   */
174
  protected static $bidiUniCodeControlsTable = array(
175
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
176
    8234 => "\xE2\x80\xAA",
177
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
178
    8235 => "\xE2\x80\xAB",
179
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
180
    8236 => "\xE2\x80\xAC",
181
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
182
    8237 => "\xE2\x80\xAD",
183
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
184
    8238 => "\xE2\x80\xAE",
185
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
186
    8294 => "\xE2\x81\xA6",
187
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
188
    8295 => "\xE2\x81\xA7",
189
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
190
    8296 => "\xE2\x81\xA8",
191
    // POP DIRECTIONAL ISOLATE
192
    8297 => "\xE2\x81\xA9",
193
  );
194
195
  /**
196
   * @var array
197
   */
198
  protected static $commonCaseFold = array(
199
      'ſ'            => 's',
200
      "\xCD\x85"     => 'ι',
201
      'ς'            => 'σ',
202
      "\xCF\x90"     => 'β',
203
      "\xCF\x91"     => 'θ',
204
      "\xCF\x95"     => 'φ',
205
      "\xCF\x96"     => 'π',
206
      "\xCF\xB0"     => 'κ',
207
      "\xCF\xB1"     => 'ρ',
208
      "\xCF\xB5"     => 'ε',
209
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
210
      "\xE1\xBE\xBE" => 'ι',
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $brokenUtf8ToUtf8 = array(
217
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
218
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
219
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
220
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
221
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
222
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
223
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
224
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
225
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
226
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
227
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
228
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
229
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
230
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
231
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
232
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
233
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
234
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
235
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
236
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
237
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
238
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
239
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
240
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
241
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
242
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
243
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
244
      'ü'       => 'ü',
245
      'ä'       => 'ä',
246
      'ö'       => 'ö',
247
      'Ö'       => 'Ö',
248
      'ß'       => 'ß',
249
      'Ã '       => 'à',
250
      'á'       => 'á',
251
      'â'       => 'â',
252
      'ã'       => 'ã',
253
      'ù'       => 'ù',
254
      'ú'       => 'ú',
255
      'û'       => 'û',
256
      'Ù'       => 'Ù',
257
      'Ú'       => 'Ú',
258
      'Û'       => 'Û',
259
      'Ü'       => 'Ü',
260
      'ò'       => 'ò',
261
      'ó'       => 'ó',
262
      'ô'       => 'ô',
263
      'è'       => 'è',
264
      'é'       => 'é',
265
      'ê'       => 'ê',
266
      'ë'       => 'ë',
267
      'À'       => 'À',
268
      'Á'       => 'Á',
269
      'Â'       => 'Â',
270
      'Ã'       => 'Ã',
271
      'Ä'       => 'Ä',
272
      'Ã…'       => 'Å',
273
      'Ç'       => 'Ç',
274
      'È'       => 'È',
275
      'É'       => 'É',
276
      'Ê'       => 'Ê',
277
      'Ë'       => 'Ë',
278
      'ÃŒ'       => 'Ì',
279
      'Í'       => 'Í',
280
      'ÃŽ'       => 'Î',
281
      'Ï'       => 'Ï',
282
      'Ñ'       => 'Ñ',
283
      'Ã’'       => 'Ò',
284
      'Ó'       => 'Ó',
285
      'Ô'       => 'Ô',
286
      'Õ'       => 'Õ',
287
      'Ø'       => 'Ø',
288
      'Ã¥'       => 'å',
289
      'æ'       => 'æ',
290
      'ç'       => 'ç',
291
      'ì'       => 'ì',
292
      'í'       => 'í',
293
      'î'       => 'î',
294
      'ï'       => 'ï',
295
      'ð'       => 'ð',
296
      'ñ'       => 'ñ',
297
      'õ'       => 'õ',
298
      'ø'       => 'ø',
299
      'ý'       => 'ý',
300
      'ÿ'       => 'ÿ',
301
      '€'      => '€',
302
  );
303
304
  /**
305
   * @var array
306
   */
307
  protected static $utf8ToWin1252 = array(
308
      "\xe2\x82\xac" => "\x80", // EURO SIGN
309
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
310
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
311
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
312
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
313
      "\xe2\x80\xa0" => "\x86", // DAGGER
314
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
315
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
316
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
317
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
318
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
319
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
320
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
321
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
322
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
323
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
324
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
325
      "\xe2\x80\xa2" => "\x95", // BULLET
326
      "\xe2\x80\x93" => "\x96", // EN DASH
327
      "\xe2\x80\x94" => "\x97", // EM DASH
328
      "\xcb\x9c"     => "\x98", // SMALL TILDE
329
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
330
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
331
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
332
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
333
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
334
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
335
  );
336
337
  /**
338
   * @var array
339
   */
340
  protected static $utf8MSWord = array(
341
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
342
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
343
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
344
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
345
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
346
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
347
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
348
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
349
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
350
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
351
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
352
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
353
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
354
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
355
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
356
  );
357
358
  protected static $iconvEncoding = array(
359
      'ANSI_X3.4-1968',
360
      'ANSI_X3.4-1986',
361
      'ASCII',
362
      'CP367',
363
      'IBM367',
364
      'ISO-IR-6',
365
      'ISO646-US',
366
      'ISO_646.IRV:1991',
367
      'US',
368
      'US-ASCII',
369
      'CSASCII',
370
      'UTF-8',
371
      'ISO-10646-UCS-2',
372
      'UCS-2',
373
      'CSUNICODE',
374
      'UCS-2BE',
375
      'UNICODE-1-1',
376
      'UNICODEBIG',
377
      'CSUNICODE11',
378
      'UCS-2LE',
379
      'UNICODELITTLE',
380
      'ISO-10646-UCS-4',
381
      'UCS-4',
382
      'CSUCS4',
383
      'UCS-4BE',
384
      'UCS-4LE',
385
      'UTF-16',
386
      'UTF-16BE',
387
      'UTF-16LE',
388
      'UTF-32',
389
      'UTF-32BE',
390
      'UTF-32LE',
391
      'UNICODE-1-1-UTF-7',
392
      'UTF-7',
393
      'CSUNICODE11UTF7',
394
      'UCS-2-INTERNAL',
395
      'UCS-2-SWAPPED',
396
      'UCS-4-INTERNAL',
397
      'UCS-4-SWAPPED',
398
      'C99',
399
      'JAVA',
400
      'CP819',
401
      'IBM819',
402
      'ISO-8859-1',
403
      'ISO-IR-100',
404
      'ISO8859-1',
405
      'ISO_8859-1',
406
      'ISO_8859-1:1987',
407
      'L1',
408
      'LATIN1',
409
      'CSISOLATIN1',
410
      'ISO-8859-2',
411
      'ISO-IR-101',
412
      'ISO8859-2',
413
      'ISO_8859-2',
414
      'ISO_8859-2:1987',
415
      'L2',
416
      'LATIN2',
417
      'CSISOLATIN2',
418
      'ISO-8859-3',
419
      'ISO-IR-109',
420
      'ISO8859-3',
421
      'ISO_8859-3',
422
      'ISO_8859-3:1988',
423
      'L3',
424
      'LATIN3',
425
      'CSISOLATIN3',
426
      'ISO-8859-4',
427
      'ISO-IR-110',
428
      'ISO8859-4',
429
      'ISO_8859-4',
430
      'ISO_8859-4:1988',
431
      'L4',
432
      'LATIN4',
433
      'CSISOLATIN4',
434
      'CYRILLIC',
435
      'ISO-8859-5',
436
      'ISO-IR-144',
437
      'ISO8859-5',
438
      'ISO_8859-5',
439
      'ISO_8859-5:1988',
440
      'CSISOLATINCYRILLIC',
441
      'ARABIC',
442
      'ASMO-708',
443
      'ECMA-114',
444
      'ISO-8859-6',
445
      'ISO-IR-127',
446
      'ISO8859-6',
447
      'ISO_8859-6',
448
      'ISO_8859-6:1987',
449
      'CSISOLATINARABIC',
450
      'ECMA-118',
451
      'ELOT_928',
452
      'GREEK',
453
      'GREEK8',
454
      'ISO-8859-7',
455
      'ISO-IR-126',
456
      'ISO8859-7',
457
      'ISO_8859-7',
458
      'ISO_8859-7:1987',
459
      'ISO_8859-7:2003',
460
      'CSISOLATINGREEK',
461
      'HEBREW',
462
      'ISO-8859-8',
463
      'ISO-IR-138',
464
      'ISO8859-8',
465
      'ISO_8859-8',
466
      'ISO_8859-8:1988',
467
      'CSISOLATINHEBREW',
468
      'ISO-8859-9',
469
      'ISO-IR-148',
470
      'ISO8859-9',
471
      'ISO_8859-9',
472
      'ISO_8859-9:1989',
473
      'L5',
474
      'LATIN5',
475
      'CSISOLATIN5',
476
      'ISO-8859-10',
477
      'ISO-IR-157',
478
      'ISO8859-10',
479
      'ISO_8859-10',
480
      'ISO_8859-10:1992',
481
      'L6',
482
      'LATIN6',
483
      'CSISOLATIN6',
484
      'ISO-8859-11',
485
      'ISO8859-11',
486
      'ISO_8859-11',
487
      'ISO-8859-13',
488
      'ISO-IR-179',
489
      'ISO8859-13',
490
      'ISO_8859-13',
491
      'L7',
492
      'LATIN7',
493
      'ISO-8859-14',
494
      'ISO-CELTIC',
495
      'ISO-IR-199',
496
      'ISO8859-14',
497
      'ISO_8859-14',
498
      'ISO_8859-14:1998',
499
      'L8',
500
      'LATIN8',
501
      'ISO-8859-15',
502
      'ISO-IR-203',
503
      'ISO8859-15',
504
      'ISO_8859-15',
505
      'ISO_8859-15:1998',
506
      'LATIN-9',
507
      'ISO-8859-16',
508
      'ISO-IR-226',
509
      'ISO8859-16',
510
      'ISO_8859-16',
511
      'ISO_8859-16:2001',
512
      'L10',
513
      'LATIN10',
514
      'KOI8-R',
515
      'CSKOI8R',
516
      'KOI8-U',
517
      'KOI8-RU',
518
      'CP1250',
519
      'MS-EE',
520
      'WINDOWS-1250',
521
      'CP1251',
522
      'MS-CYRL',
523
      'WINDOWS-1251',
524
      'CP1252',
525
      'MS-ANSI',
526
      'WINDOWS-1252',
527
      'CP1253',
528
      'MS-GREEK',
529
      'WINDOWS-1253',
530
      'CP1254',
531
      'MS-TURK',
532
      'WINDOWS-1254',
533
      'CP1255',
534
      'MS-HEBR',
535
      'WINDOWS-1255',
536
      'CP1256',
537
      'MS-ARAB',
538
      'WINDOWS-1256',
539
      'CP1257',
540
      'WINBALTRIM',
541
      'WINDOWS-1257',
542
      'CP1258',
543
      'WINDOWS-1258',
544
      '850',
545
      'CP850',
546
      'IBM850',
547
      'CSPC850MULTILINGUAL',
548
      '862',
549
      'CP862',
550
      'IBM862',
551
      'CSPC862LATINHEBREW',
552
      '866',
553
      'CP866',
554
      'IBM866',
555
      'CSIBM866',
556
      'MAC',
557
      'MACINTOSH',
558
      'MACROMAN',
559
      'CSMACINTOSH',
560
      'MACCENTRALEUROPE',
561
      'MACICELAND',
562
      'MACCROATIAN',
563
      'MACROMANIA',
564
      'MACCYRILLIC',
565
      'MACUKRAINE',
566
      'MACGREEK',
567
      'MACTURKISH',
568
      'MACHEBREW',
569
      'MACARABIC',
570
      'MACTHAI',
571
      'HP-ROMAN8',
572
      'R8',
573
      'ROMAN8',
574
      'CSHPROMAN8',
575
      'NEXTSTEP',
576
      'ARMSCII-8',
577
      'GEORGIAN-ACADEMY',
578
      'GEORGIAN-PS',
579
      'KOI8-T',
580
      'CP154',
581
      'CYRILLIC-ASIAN',
582
      'PT154',
583
      'PTCP154',
584
      'CSPTCP154',
585
      'KZ-1048',
586
      'RK1048',
587
      'STRK1048-2002',
588
      'CSKZ1048',
589
      'MULELAO-1',
590
      'CP1133',
591
      'IBM-CP1133',
592
      'ISO-IR-166',
593
      'TIS-620',
594
      'TIS620',
595
      'TIS620-0',
596
      'TIS620.2529-1',
597
      'TIS620.2533-0',
598
      'TIS620.2533-1',
599
      'CP874',
600
      'WINDOWS-874',
601
      'VISCII',
602
      'VISCII1.1-1',
603
      'CSVISCII',
604
      'TCVN',
605
      'TCVN-5712',
606
      'TCVN5712-1',
607
      'TCVN5712-1:1993',
608
      'ISO-IR-14',
609
      'ISO646-JP',
610
      'JIS_C6220-1969-RO',
611
      'JP',
612
      'CSISO14JISC6220RO',
613
      'JISX0201-1976',
614
      'JIS_X0201',
615
      'X0201',
616
      'CSHALFWIDTHKATAKANA',
617
      'ISO-IR-87',
618
      'JIS0208',
619
      'JIS_C6226-1983',
620
      'JIS_X0208',
621
      'JIS_X0208-1983',
622
      'JIS_X0208-1990',
623
      'X0208',
624
      'CSISO87JISX0208',
625
      'ISO-IR-159',
626
      'JIS_X0212',
627
      'JIS_X0212-1990',
628
      'JIS_X0212.1990-0',
629
      'X0212',
630
      'CSISO159JISX02121990',
631
      'CN',
632
      'GB_1988-80',
633
      'ISO-IR-57',
634
      'ISO646-CN',
635
      'CSISO57GB1988',
636
      'CHINESE',
637
      'GB_2312-80',
638
      'ISO-IR-58',
639
      'CSISO58GB231280',
640
      'CN-GB-ISOIR165',
641
      'ISO-IR-165',
642
      'ISO-IR-149',
643
      'KOREAN',
644
      'KSC_5601',
645
      'KS_C_5601-1987',
646
      'KS_C_5601-1989',
647
      'CSKSC56011987',
648
      'EUC-JP',
649
      'EUCJP',
650
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
651
      'CSEUCPKDFMTJAPANESE',
652
      'MS_KANJI',
653
      'SHIFT-JIS',
654
      'SHIFT_JIS',
655
      'SJIS',
656
      'CSSHIFTJIS',
657
      'CP932',
658
      'ISO-2022-JP',
659
      'CSISO2022JP',
660
      'ISO-2022-JP-1',
661
      'ISO-2022-JP-2',
662
      'CSISO2022JP2',
663
      'CN-GB',
664
      'EUC-CN',
665
      'EUCCN',
666
      'GB2312',
667
      'CSGB2312',
668
      'GBK',
669
      'CP936',
670
      'MS936',
671
      'WINDOWS-936',
672
      'GB18030',
673
      'ISO-2022-CN',
674
      'CSISO2022CN',
675
      'ISO-2022-CN-EXT',
676
      'HZ',
677
      'HZ-GB-2312',
678
      'EUC-TW',
679
      'EUCTW',
680
      'CSEUCTW',
681
      'BIG-5',
682
      'BIG-FIVE',
683
      'BIG5',
684
      'BIGFIVE',
685
      'CN-BIG5',
686
      'CSBIG5',
687
      'CP950',
688
      'BIG5-HKSCS:1999',
689
      'BIG5-HKSCS:2001',
690
      'BIG5-HKSCS',
691
      'BIG5-HKSCS:2004',
692
      'BIG5HKSCS',
693
      'EUC-KR',
694
      'EUCKR',
695
      'CSEUCKR',
696
      'CP949',
697
      'UHC',
698
      'CP1361',
699
      'JOHAB',
700
      'ISO-2022-KR',
701
      'CSISO2022KR',
702
      'CP856',
703
      'CP922',
704
      'CP943',
705
      'CP1046',
706
      'CP1124',
707
      'CP1129',
708
      'CP1161',
709
      'IBM-1161',
710
      'IBM1161',
711
      'CSIBM1161',
712
      'CP1162',
713
      'IBM-1162',
714
      'IBM1162',
715
      'CSIBM1162',
716
      'CP1163',
717
      'IBM-1163',
718
      'IBM1163',
719
      'CSIBM1163',
720
      'DEC-KANJI',
721
      'DEC-HANYU',
722
      '437',
723
      'CP437',
724
      'IBM437',
725
      'CSPC8CODEPAGE437',
726
      'CP737',
727
      'CP775',
728
      'IBM775',
729
      'CSPC775BALTIC',
730
      '852',
731
      'CP852',
732
      'IBM852',
733
      'CSPCP852',
734
      'CP853',
735
      '855',
736
      'CP855',
737
      'IBM855',
738
      'CSIBM855',
739
      '857',
740
      'CP857',
741
      'IBM857',
742
      'CSIBM857',
743
      'CP858',
744
      '860',
745
      'CP860',
746
      'IBM860',
747
      'CSIBM860',
748
      '861',
749
      'CP-IS',
750
      'CP861',
751
      'IBM861',
752
      'CSIBM861',
753
      '863',
754
      'CP863',
755
      'IBM863',
756
      'CSIBM863',
757
      'CP864',
758
      'IBM864',
759
      'CSIBM864',
760
      '865',
761
      'CP865',
762
      'IBM865',
763
      'CSIBM865',
764
      '869',
765
      'CP-GR',
766
      'CP869',
767
      'IBM869',
768
      'CSIBM869',
769
      'CP1125',
770
      'EUC-JISX0213',
771
      'SHIFT_JISX0213',
772
      'ISO-2022-JP-3',
773
      'BIG5-2003',
774
      'ISO-IR-230',
775
      'TDS565',
776
      'ATARI',
777
      'ATARIST',
778
      'RISCOS-LATIN1',
779
  );
780
781
  /**
782
   * @var array
783
   */
784
  private static $support = array();
785
786
  /**
787
   * __construct()
788
   */
789
  public function __construct()
790 1
  {
791
    self::checkForSupport();
792 1
  }
793 1
794
  /**
795
   * Returns a single UTF-8 character from string.
796
   *
797
   * @param    string $str A UTF-8 string.
798
   * @param    int    $pos The position of character to return.
799
   *
800
   * @return   string Single Multi-Byte character.
801
   */
802
  public static function access($str, $pos)
803 1
  {
804
    // Return the character at the specified position: $str[1] like functionality.
805
806
    return self::substr($str, $pos, 1);
807 1
  }
808
809
  /**
810
   * Prepends BOM character to the string and returns the whole string.
811
   *
812
   * INFO: If BOM already existed there, the Input string is returned.
813
   *
814
   * @param    string $str The input string
815
   *
816
   * @return   string The output string that contains BOM
817
   */
818
  public static function add_bom_to_string($str)
819
  {
820
    if (!self::is_bom(substr($str, 0, 3))) {
821
      $str = self::bom() . $str;
822
    }
823
824
    return $str;
825
  }
826
827
  /**
828
   * Returns the Byte Order Mark Character.
829
   *
830
   * @return   string Byte Order Mark
831
   */
832
  public static function bom()
833 2
  {
834
    return "\xEF\xBB\xBF";
835 2
  }
836
837
  /**
838
   * @alias of UTF8::chr_map()
839
   *
840
   * @param $callback
841
   * @param $str
842
   *
843
   * @return array
844
   */
845
  public static function callback($callback, $str)
846 1
  {
847
    return self::chr_map($callback, $str);
848 1
  }
849
850
  /**
851
   * Returns an array of all lower and upper case UTF-8 encoded characters.
852
   *
853
   * @return   string An array with lower case chars as keys and upper chars as values.
854
   */
855
  protected static function case_table()
856
  {
857
    static $case = array(
858
859
      // lower => upper
860
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
861
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
862
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
863
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
864
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
865
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
866
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
867
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
868
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
869
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
870
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
871
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
872
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
873
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
874
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
875
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
876
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
877
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
878
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
879
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
880
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
881
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
882
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
883
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
884
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
885
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
886
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
887
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
888
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
889
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
890
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
891
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
892
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
893
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
894
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
895
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
896
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
897
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
898
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
899
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
900
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
901
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
902
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
903
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
904
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
905
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
906
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
907
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
908
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
909
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
910
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
911
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
912
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
913
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
914
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
915
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
916
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
917
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
918
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
919
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
920
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
921
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
922
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
923
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
924
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
925
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
926
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
927
      "\xea\x9e\x87"     => "\xea\x9e\x86",
928
      "\xea\x9e\x85"     => "\xea\x9e\x84",
929
      "\xea\x9e\x83"     => "\xea\x9e\x82",
930
      "\xea\x9e\x81"     => "\xea\x9e\x80",
931
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
932
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
933
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
934
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
935
      "\xea\x9d\xad"     => "\xea\x9d\xac",
936
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
937
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
938
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
939
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
940
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
941
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
942
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
943
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
944
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
945
      "\xea\x9d\x99"     => "\xea\x9d\x98",
946
      "\xea\x9d\x97"     => "\xea\x9d\x96",
947
      "\xea\x9d\x95"     => "\xea\x9d\x94",
948
      "\xea\x9d\x93"     => "\xea\x9d\x92",
949
      "\xea\x9d\x91"     => "\xea\x9d\x90",
950
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
951
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
952
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
953
      "\xea\x9d\x89"     => "\xea\x9d\x88",
954
      "\xea\x9d\x87"     => "\xea\x9d\x86",
955
      "\xea\x9d\x85"     => "\xea\x9d\x84",
956
      "\xea\x9d\x83"     => "\xea\x9d\x82",
957
      "\xea\x9d\x81"     => "\xea\x9d\x80",
958
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
959
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
960
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
961
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
962
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
963
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
964
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
965
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
966
      "\xea\x9c\xad"     => "\xea\x9c\xac",
967
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
968
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
969
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
970
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
971
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
972
      "\xea\x9a\x97"     => "\xea\x9a\x96",
973
      "\xea\x9a\x95"     => "\xea\x9a\x94",
974
      "\xea\x9a\x93"     => "\xea\x9a\x92",
975
      "\xea\x9a\x91"     => "\xea\x9a\x90",
976
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
977
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
978
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
979
      "\xea\x9a\x89"     => "\xea\x9a\x88",
980
      "\xea\x9a\x87"     => "\xea\x9a\x86",
981
      "\xea\x9a\x85"     => "\xea\x9a\x84",
982
      "\xea\x9a\x83"     => "\xea\x9a\x82",
983
      "\xea\x9a\x81"     => "\xea\x9a\x80",
984
      "\xea\x99\xad"     => "\xea\x99\xac",
985
      "\xea\x99\xab"     => "\xea\x99\xaa",
986
      "\xea\x99\xa9"     => "\xea\x99\xa8",
987
      "\xea\x99\xa7"     => "\xea\x99\xa6",
988
      "\xea\x99\xa5"     => "\xea\x99\xa4",
989
      "\xea\x99\xa3"     => "\xea\x99\xa2",
990
      "\xea\x99\x9f"     => "\xea\x99\x9e",
991
      "\xea\x99\x9d"     => "\xea\x99\x9c",
992
      "\xea\x99\x9b"     => "\xea\x99\x9a",
993
      "\xea\x99\x99"     => "\xea\x99\x98",
994
      "\xea\x99\x97"     => "\xea\x99\x96",
995
      "\xea\x99\x95"     => "\xea\x99\x94",
996
      "\xea\x99\x93"     => "\xea\x99\x92",
997
      "\xea\x99\x91"     => "\xea\x99\x90",
998
      "\xea\x99\x8f"     => "\xea\x99\x8e",
999
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1000
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1001
      "\xea\x99\x89"     => "\xea\x99\x88",
1002
      "\xea\x99\x87"     => "\xea\x99\x86",
1003
      "\xea\x99\x85"     => "\xea\x99\x84",
1004
      "\xea\x99\x83"     => "\xea\x99\x82",
1005
      "\xea\x99\x81"     => "\xea\x99\x80",
1006
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1007
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1008
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1009
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1010
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1011
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1012
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1013
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1014
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1015
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1016
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1017
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1018
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1019
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1020
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1021
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1022
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1023
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1024
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1025
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1026
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1027
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1028
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1029
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1030
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1031
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1032
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1033
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1034
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1035
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1036
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1037
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1038
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1039
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1040
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1041
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1042
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1043
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1044
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1045
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1046
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1047
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1048
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1049
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1050
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1051
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1052
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1053
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1054
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1055
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1056
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1057
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1058
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1059
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1060
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1061
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1062
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1063
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1064
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1065
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1066
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1067
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1068
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1069
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1070
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1071
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1072
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1073
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1074
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1075
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1076
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1077
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1078
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1079
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1080
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1081
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1082
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1083
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1084
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1085
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1086
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1087
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1088
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1089
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1090
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1091
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1092
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1093
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1094
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1095
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1096
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1097
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1098
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1099
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1100
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1101
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1102
      "\xe2\xb1\xa5"     => "\xc8\xba",
1103
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1104
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1105
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1106
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1107
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1108
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1109
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1110
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1111
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1112
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1113
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1114
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1115
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1116
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1117
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1118
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1119
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1120
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1121
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1122
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1123
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1124
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1125
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1126
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1127
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1128
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1129
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1130
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1131
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1132
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1133
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1134
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1135
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1136
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1137
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1138
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1139
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1140
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1141
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1142
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1143
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1144
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1145
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1146
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1147
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1148
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1149
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1150
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1151
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1152
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1153
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1154
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1155
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1156
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1157
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1158
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1159
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1160
      "\xe1\xbe\xbe"     => "\xce\x99",
1161
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1162
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1163
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1164
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1165
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1166
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1167
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1168
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1169
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1170
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1171
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1172
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1173
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1174
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1175
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1176
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1177
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1178
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1179
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1180
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1181
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1182
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1183
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1184
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1185
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1186
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1187
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1188
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1189
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1190
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1191
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1192
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1193
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1194
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1195
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1196
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1197
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1198
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1199
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1200
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1201
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1202
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1203
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1204
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1205
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1206
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1207
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1208
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1209
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1210
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1211
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1212
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1213
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1214
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1215
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1216
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1217
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1218
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1219
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1220
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1221
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1222
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1223
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1224
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1225
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1226
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1227
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1228
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1229
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1230
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1231
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1232
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1233
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1234
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1235
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1236
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1237
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1238
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1239
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1240
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1241
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1242
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1243
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1244
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1245
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1246
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1247
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1248
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1249
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1250
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1251
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1252
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1253
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1254
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1255
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1256
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1257
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1258
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1259
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1260
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1261
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1262
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1263
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1264
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1265
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1266
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1267
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1268
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1269
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1270
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1271
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1272
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1273
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1274
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1275
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1276
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1277
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1278
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1279
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1280
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1281
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1282
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1283
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1284
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1285
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1286
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1287
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1288
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1289
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1290
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1291
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1292
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1293
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1294
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1295
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1296
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1297
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1298
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1299
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1300
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1301
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1302
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1303
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1304
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1305
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1306
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1307
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1308
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1309
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1310
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1311
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1312
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1313
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1314
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1315
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1316
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1317
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1318
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1319
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1320
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1321
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1322
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1323
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1324
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1325
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1326
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1327
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1328
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1329
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1330
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1331
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1332
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1333
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1334
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1335
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1336
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1337
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1338
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1339
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1340
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1341
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1342
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1343
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1344
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1345
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1346
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1347
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1348
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1349
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1350
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1351
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1352
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1353
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1354
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1355
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1356
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1357
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1358
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1359
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1360
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1361
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1362
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1363
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1364
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1365
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1366
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1367
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1368
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1369
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1370
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1371
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1372
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1373
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1374
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1375
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1376
      "\xd6\x86"         => "\xd5\x96",
1377
      "\xd6\x85"         => "\xd5\x95",
1378
      "\xd6\x84"         => "\xd5\x94",
1379
      "\xd6\x83"         => "\xd5\x93",
1380
      "\xd6\x82"         => "\xd5\x92",
1381
      "\xd6\x81"         => "\xd5\x91",
1382
      "\xd6\x80"         => "\xd5\x90",
1383
      "\xd5\xbf"         => "\xd5\x8f",
1384
      "\xd5\xbe"         => "\xd5\x8e",
1385
      "\xd5\xbd"         => "\xd5\x8d",
1386
      "\xd5\xbc"         => "\xd5\x8c",
1387
      "\xd5\xbb"         => "\xd5\x8b",
1388
      "\xd5\xba"         => "\xd5\x8a",
1389
      "\xd5\xb9"         => "\xd5\x89",
1390
      "\xd5\xb8"         => "\xd5\x88",
1391
      "\xd5\xb7"         => "\xd5\x87",
1392
      "\xd5\xb6"         => "\xd5\x86",
1393
      "\xd5\xb5"         => "\xd5\x85",
1394
      "\xd5\xb4"         => "\xd5\x84",
1395
      "\xd5\xb3"         => "\xd5\x83",
1396
      "\xd5\xb2"         => "\xd5\x82",
1397
      "\xd5\xb1"         => "\xd5\x81",
1398
      "\xd5\xb0"         => "\xd5\x80",
1399
      "\xd5\xaf"         => "\xd4\xbf",
1400
      "\xd5\xae"         => "\xd4\xbe",
1401
      "\xd5\xad"         => "\xd4\xbd",
1402
      "\xd5\xac"         => "\xd4\xbc",
1403
      "\xd5\xab"         => "\xd4\xbb",
1404
      "\xd5\xaa"         => "\xd4\xba",
1405
      "\xd5\xa9"         => "\xd4\xb9",
1406
      "\xd5\xa8"         => "\xd4\xb8",
1407
      "\xd5\xa7"         => "\xd4\xb7",
1408
      "\xd5\xa6"         => "\xd4\xb6",
1409
      "\xd5\xa5"         => "\xd4\xb5",
1410
      "\xd5\xa4"         => "\xd4\xb4",
1411
      "\xd5\xa3"         => "\xd4\xb3",
1412
      "\xd5\xa2"         => "\xd4\xb2",
1413
      "\xd5\xa1"         => "\xd4\xb1",
1414
      "\xd4\xa5"         => "\xd4\xa4",
1415
      "\xd4\xa3"         => "\xd4\xa2",
1416
      "\xd4\xa1"         => "\xd4\xa0",
1417
      "\xd4\x9f"         => "\xd4\x9e",
1418
      "\xd4\x9d"         => "\xd4\x9c",
1419
      "\xd4\x9b"         => "\xd4\x9a",
1420
      "\xd4\x99"         => "\xd4\x98",
1421
      "\xd4\x97"         => "\xd4\x96",
1422
      "\xd4\x95"         => "\xd4\x94",
1423
      "\xd4\x93"         => "\xd4\x92",
1424
      "\xd4\x91"         => "\xd4\x90",
1425
      "\xd4\x8f"         => "\xd4\x8e",
1426
      "\xd4\x8d"         => "\xd4\x8c",
1427
      "\xd4\x8b"         => "\xd4\x8a",
1428
      "\xd4\x89"         => "\xd4\x88",
1429
      "\xd4\x87"         => "\xd4\x86",
1430
      "\xd4\x85"         => "\xd4\x84",
1431
      "\xd4\x83"         => "\xd4\x82",
1432
      "\xd4\x81"         => "\xd4\x80",
1433
      "\xd3\xbf"         => "\xd3\xbe",
1434
      "\xd3\xbd"         => "\xd3\xbc",
1435
      "\xd3\xbb"         => "\xd3\xba",
1436
      "\xd3\xb9"         => "\xd3\xb8",
1437
      "\xd3\xb7"         => "\xd3\xb6",
1438
      "\xd3\xb5"         => "\xd3\xb4",
1439
      "\xd3\xb3"         => "\xd3\xb2",
1440
      "\xd3\xb1"         => "\xd3\xb0",
1441
      "\xd3\xaf"         => "\xd3\xae",
1442
      "\xd3\xad"         => "\xd3\xac",
1443
      "\xd3\xab"         => "\xd3\xaa",
1444
      "\xd3\xa9"         => "\xd3\xa8",
1445
      "\xd3\xa7"         => "\xd3\xa6",
1446
      "\xd3\xa5"         => "\xd3\xa4",
1447
      "\xd3\xa3"         => "\xd3\xa2",
1448
      "\xd3\xa1"         => "\xd3\xa0",
1449
      "\xd3\x9f"         => "\xd3\x9e",
1450
      "\xd3\x9d"         => "\xd3\x9c",
1451
      "\xd3\x9b"         => "\xd3\x9a",
1452
      "\xd3\x99"         => "\xd3\x98",
1453
      "\xd3\x97"         => "\xd3\x96",
1454
      "\xd3\x95"         => "\xd3\x94",
1455
      "\xd3\x93"         => "\xd3\x92",
1456
      "\xd3\x91"         => "\xd3\x90",
1457
      "\xd3\x8f"         => "\xd3\x80",
1458
      "\xd3\x8e"         => "\xd3\x8d",
1459
      "\xd3\x8c"         => "\xd3\x8b",
1460
      "\xd3\x8a"         => "\xd3\x89",
1461
      "\xd3\x88"         => "\xd3\x87",
1462
      "\xd3\x86"         => "\xd3\x85",
1463
      "\xd3\x84"         => "\xd3\x83",
1464
      "\xd3\x82"         => "\xd3\x81",
1465
      "\xd2\xbf"         => "\xd2\xbe",
1466
      "\xd2\xbd"         => "\xd2\xbc",
1467
      "\xd2\xbb"         => "\xd2\xba",
1468
      "\xd2\xb9"         => "\xd2\xb8",
1469
      "\xd2\xb7"         => "\xd2\xb6",
1470
      "\xd2\xb5"         => "\xd2\xb4",
1471
      "\xd2\xb3"         => "\xd2\xb2",
1472
      "\xd2\xb1"         => "\xd2\xb0",
1473
      "\xd2\xaf"         => "\xd2\xae",
1474
      "\xd2\xad"         => "\xd2\xac",
1475
      "\xd2\xab"         => "\xd2\xaa",
1476
      "\xd2\xa9"         => "\xd2\xa8",
1477
      "\xd2\xa7"         => "\xd2\xa6",
1478
      "\xd2\xa5"         => "\xd2\xa4",
1479
      "\xd2\xa3"         => "\xd2\xa2",
1480
      "\xd2\xa1"         => "\xd2\xa0",
1481
      "\xd2\x9f"         => "\xd2\x9e",
1482
      "\xd2\x9d"         => "\xd2\x9c",
1483
      "\xd2\x9b"         => "\xd2\x9a",
1484
      "\xd2\x99"         => "\xd2\x98",
1485
      "\xd2\x97"         => "\xd2\x96",
1486
      "\xd2\x95"         => "\xd2\x94",
1487
      "\xd2\x93"         => "\xd2\x92",
1488
      "\xd2\x91"         => "\xd2\x90",
1489
      "\xd2\x8f"         => "\xd2\x8e",
1490
      "\xd2\x8d"         => "\xd2\x8c",
1491
      "\xd2\x8b"         => "\xd2\x8a",
1492
      "\xd2\x81"         => "\xd2\x80",
1493
      "\xd1\xbf"         => "\xd1\xbe",
1494
      "\xd1\xbd"         => "\xd1\xbc",
1495
      "\xd1\xbb"         => "\xd1\xba",
1496
      "\xd1\xb9"         => "\xd1\xb8",
1497
      "\xd1\xb7"         => "\xd1\xb6",
1498
      "\xd1\xb5"         => "\xd1\xb4",
1499
      "\xd1\xb3"         => "\xd1\xb2",
1500
      "\xd1\xb1"         => "\xd1\xb0",
1501
      "\xd1\xaf"         => "\xd1\xae",
1502
      "\xd1\xad"         => "\xd1\xac",
1503
      "\xd1\xab"         => "\xd1\xaa",
1504
      "\xd1\xa9"         => "\xd1\xa8",
1505
      "\xd1\xa7"         => "\xd1\xa6",
1506
      "\xd1\xa5"         => "\xd1\xa4",
1507
      "\xd1\xa3"         => "\xd1\xa2",
1508
      "\xd1\xa1"         => "\xd1\xa0",
1509
      "\xd1\x9f"         => "\xd0\x8f",
1510
      "\xd1\x9e"         => "\xd0\x8e",
1511
      "\xd1\x9d"         => "\xd0\x8d",
1512
      "\xd1\x9c"         => "\xd0\x8c",
1513
      "\xd1\x9b"         => "\xd0\x8b",
1514
      "\xd1\x9a"         => "\xd0\x8a",
1515
      "\xd1\x99"         => "\xd0\x89",
1516
      "\xd1\x98"         => "\xd0\x88",
1517
      "\xd1\x97"         => "\xd0\x87",
1518
      "\xd1\x96"         => "\xd0\x86",
1519
      "\xd1\x95"         => "\xd0\x85",
1520
      "\xd1\x94"         => "\xd0\x84",
1521
      "\xd1\x93"         => "\xd0\x83",
1522
      "\xd1\x92"         => "\xd0\x82",
1523
      "\xd1\x91"         => "\xd0\x81",
1524
      "\xd1\x90"         => "\xd0\x80",
1525
      "\xd1\x8f"         => "\xd0\xaf",
1526
      "\xd1\x8e"         => "\xd0\xae",
1527
      "\xd1\x8d"         => "\xd0\xad",
1528
      "\xd1\x8c"         => "\xd0\xac",
1529
      "\xd1\x8b"         => "\xd0\xab",
1530
      "\xd1\x8a"         => "\xd0\xaa",
1531
      "\xd1\x89"         => "\xd0\xa9",
1532
      "\xd1\x88"         => "\xd0\xa8",
1533
      "\xd1\x87"         => "\xd0\xa7",
1534
      "\xd1\x86"         => "\xd0\xa6",
1535
      "\xd1\x85"         => "\xd0\xa5",
1536
      "\xd1\x84"         => "\xd0\xa4",
1537
      "\xd1\x83"         => "\xd0\xa3",
1538
      "\xd1\x82"         => "\xd0\xa2",
1539
      "\xd1\x81"         => "\xd0\xa1",
1540
      "\xd1\x80"         => "\xd0\xa0",
1541
      "\xd0\xbf"         => "\xd0\x9f",
1542
      "\xd0\xbe"         => "\xd0\x9e",
1543
      "\xd0\xbd"         => "\xd0\x9d",
1544
      "\xd0\xbc"         => "\xd0\x9c",
1545
      "\xd0\xbb"         => "\xd0\x9b",
1546
      "\xd0\xba"         => "\xd0\x9a",
1547
      "\xd0\xb9"         => "\xd0\x99",
1548
      "\xd0\xb8"         => "\xd0\x98",
1549
      "\xd0\xb7"         => "\xd0\x97",
1550
      "\xd0\xb6"         => "\xd0\x96",
1551
      "\xd0\xb5"         => "\xd0\x95",
1552
      "\xd0\xb4"         => "\xd0\x94",
1553
      "\xd0\xb3"         => "\xd0\x93",
1554
      "\xd0\xb2"         => "\xd0\x92",
1555
      "\xd0\xb1"         => "\xd0\x91",
1556
      "\xd0\xb0"         => "\xd0\x90",
1557
      "\xcf\xbb"         => "\xcf\xba",
1558
      "\xcf\xb8"         => "\xcf\xb7",
1559
      "\xcf\xb5"         => "\xce\x95",
1560
      "\xcf\xb2"         => "\xcf\xb9",
1561
      "\xcf\xb1"         => "\xce\xa1",
1562
      "\xcf\xb0"         => "\xce\x9a",
1563
      "\xcf\xaf"         => "\xcf\xae",
1564
      "\xcf\xad"         => "\xcf\xac",
1565
      "\xcf\xab"         => "\xcf\xaa",
1566
      "\xcf\xa9"         => "\xcf\xa8",
1567
      "\xcf\xa7"         => "\xcf\xa6",
1568
      "\xcf\xa5"         => "\xcf\xa4",
1569
      "\xcf\xa3"         => "\xcf\xa2",
1570
      "\xcf\xa1"         => "\xcf\xa0",
1571
      "\xcf\x9f"         => "\xcf\x9e",
1572
      "\xcf\x9d"         => "\xcf\x9c",
1573
      "\xcf\x9b"         => "\xcf\x9a",
1574
      "\xcf\x99"         => "\xcf\x98",
1575
      "\xcf\x97"         => "\xcf\x8f",
1576
      "\xcf\x96"         => "\xce\xa0",
1577
      "\xcf\x95"         => "\xce\xa6",
1578
      "\xcf\x91"         => "\xce\x98",
1579
      "\xcf\x90"         => "\xce\x92",
1580
      "\xcf\x8e"         => "\xce\x8f",
1581
      "\xcf\x8d"         => "\xce\x8e",
1582
      "\xcf\x8c"         => "\xce\x8c",
1583
      "\xcf\x8b"         => "\xce\xab",
1584
      "\xcf\x8a"         => "\xce\xaa",
1585
      "\xcf\x89"         => "\xce\xa9",
1586
      "\xcf\x88"         => "\xce\xa8",
1587
      "\xcf\x87"         => "\xce\xa7",
1588
      "\xcf\x86"         => "\xce\xa6",
1589
      "\xcf\x85"         => "\xce\xa5",
1590
      "\xcf\x84"         => "\xce\xa4",
1591
      "\xcf\x83"         => "\xce\xa3",
1592
      "\xcf\x82"         => "\xce\xa3",
1593
      "\xcf\x81"         => "\xce\xa1",
1594
      "\xcf\x80"         => "\xce\xa0",
1595
      "\xce\xbf"         => "\xce\x9f",
1596
      "\xce\xbe"         => "\xce\x9e",
1597
      "\xce\xbd"         => "\xce\x9d",
1598
      "\xce\xbc"         => "\xce\x9c",
1599
      "\xce\xbb"         => "\xce\x9b",
1600
      "\xce\xba"         => "\xce\x9a",
1601
      "\xce\xb9"         => "\xce\x99",
1602
      "\xce\xb8"         => "\xce\x98",
1603
      "\xce\xb7"         => "\xce\x97",
1604
      "\xce\xb6"         => "\xce\x96",
1605
      "\xce\xb5"         => "\xce\x95",
1606
      "\xce\xb4"         => "\xce\x94",
1607
      "\xce\xb3"         => "\xce\x93",
1608
      "\xce\xb2"         => "\xce\x92",
1609
      "\xce\xb1"         => "\xce\x91",
1610
      "\xce\xaf"         => "\xce\x8a",
1611
      "\xce\xae"         => "\xce\x89",
1612
      "\xce\xad"         => "\xce\x88",
1613
      "\xce\xac"         => "\xce\x86",
1614
      "\xcd\xbd"         => "\xcf\xbf",
1615
      "\xcd\xbc"         => "\xcf\xbe",
1616
      "\xcd\xbb"         => "\xcf\xbd",
1617
      "\xcd\xb7"         => "\xcd\xb6",
1618
      "\xcd\xb3"         => "\xcd\xb2",
1619
      "\xcd\xb1"         => "\xcd\xb0",
1620
      "\xca\x92"         => "\xc6\xb7",
1621
      "\xca\x8c"         => "\xc9\x85",
1622
      "\xca\x8b"         => "\xc6\xb2",
1623
      "\xca\x8a"         => "\xc6\xb1",
1624
      "\xca\x89"         => "\xc9\x84",
1625
      "\xca\x88"         => "\xc6\xae",
1626
      "\xca\x83"         => "\xc6\xa9",
1627
      "\xca\x80"         => "\xc6\xa6",
1628
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1629
      "\xc9\xb5"         => "\xc6\x9f",
1630
      "\xc9\xb2"         => "\xc6\x9d",
1631
      "\xc9\xb1"         => "\xe2\xb1\xae",
1632
      "\xc9\xaf"         => "\xc6\x9c",
1633
      "\xc9\xab"         => "\xe2\xb1\xa2",
1634
      "\xc9\xa9"         => "\xc6\x96",
1635
      "\xc9\xa8"         => "\xc6\x97",
1636
      "\xc9\xa5"         => "\xea\x9e\x8d",
1637
      "\xc9\xa3"         => "\xc6\x94",
1638
      "\xc9\xa0"         => "\xc6\x93",
1639
      "\xc9\x9b"         => "\xc6\x90",
1640
      "\xc9\x99"         => "\xc6\x8f",
1641
      "\xc9\x97"         => "\xc6\x8a",
1642
      "\xc9\x96"         => "\xc6\x89",
1643
      "\xc9\x94"         => "\xc6\x86",
1644
      "\xc9\x93"         => "\xc6\x81",
1645
      "\xc9\x92"         => "\xe2\xb1\xb0",
1646
      "\xc9\x91"         => "\xe2\xb1\xad",
1647
      "\xc9\x90"         => "\xe2\xb1\xaf",
1648
      "\xc9\x8f"         => "\xc9\x8e",
1649
      "\xc9\x8d"         => "\xc9\x8c",
1650
      "\xc9\x8b"         => "\xc9\x8a",
1651
      "\xc9\x89"         => "\xc9\x88",
1652
      "\xc9\x87"         => "\xc9\x86",
1653
      "\xc9\x82"         => "\xc9\x81",
1654
      "\xc9\x80"         => "\xe2\xb1\xbf",
1655
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1656
      "\xc8\xbc"         => "\xc8\xbb",
1657
      "\xc8\xb3"         => "\xc8\xb2",
1658
      "\xc8\xb1"         => "\xc8\xb0",
1659
      "\xc8\xaf"         => "\xc8\xae",
1660
      "\xc8\xad"         => "\xc8\xac",
1661
      "\xc8\xab"         => "\xc8\xaa",
1662
      "\xc8\xa9"         => "\xc8\xa8",
1663
      "\xc8\xa7"         => "\xc8\xa6",
1664
      "\xc8\xa5"         => "\xc8\xa4",
1665
      "\xc8\xa3"         => "\xc8\xa2",
1666
      "\xc8\x9f"         => "\xc8\x9e",
1667
      "\xc8\x9d"         => "\xc8\x9c",
1668
      "\xc8\x9b"         => "\xc8\x9a",
1669
      "\xc8\x99"         => "\xc8\x98",
1670
      "\xc8\x97"         => "\xc8\x96",
1671
      "\xc8\x95"         => "\xc8\x94",
1672
      "\xc8\x93"         => "\xc8\x92",
1673
      "\xc8\x91"         => "\xc8\x90",
1674
      "\xc8\x8f"         => "\xc8\x8e",
1675
      "\xc8\x8d"         => "\xc8\x8c",
1676
      "\xc8\x8b"         => "\xc8\x8a",
1677
      "\xc8\x89"         => "\xc8\x88",
1678
      "\xc8\x87"         => "\xc8\x86",
1679
      "\xc8\x85"         => "\xc8\x84",
1680
      "\xc8\x83"         => "\xc8\x82",
1681
      "\xc8\x81"         => "\xc8\x80",
1682
      "\xc7\xbf"         => "\xc7\xbe",
1683
      "\xc7\xbd"         => "\xc7\xbc",
1684
      "\xc7\xbb"         => "\xc7\xba",
1685
      "\xc7\xb9"         => "\xc7\xb8",
1686
      "\xc7\xb5"         => "\xc7\xb4",
1687
      "\xc7\xb3"         => "\xc7\xb2",
1688
      "\xc7\xaf"         => "\xc7\xae",
1689
      "\xc7\xad"         => "\xc7\xac",
1690
      "\xc7\xab"         => "\xc7\xaa",
1691
      "\xc7\xa9"         => "\xc7\xa8",
1692
      "\xc7\xa7"         => "\xc7\xa6",
1693
      "\xc7\xa5"         => "\xc7\xa4",
1694
      "\xc7\xa3"         => "\xc7\xa2",
1695
      "\xc7\xa1"         => "\xc7\xa0",
1696
      "\xc7\x9f"         => "\xc7\x9e",
1697
      "\xc7\x9d"         => "\xc6\x8e",
1698
      "\xc7\x9c"         => "\xc7\x9b",
1699
      "\xc7\x9a"         => "\xc7\x99",
1700
      "\xc7\x98"         => "\xc7\x97",
1701
      "\xc7\x96"         => "\xc7\x95",
1702
      "\xc7\x94"         => "\xc7\x93",
1703
      "\xc7\x92"         => "\xc7\x91",
1704
      "\xc7\x90"         => "\xc7\x8f",
1705
      "\xc7\x8e"         => "\xc7\x8d",
1706
      "\xc7\x8c"         => "\xc7\x8b",
1707
      "\xc7\x89"         => "\xc7\x88",
1708
      "\xc7\x86"         => "\xc7\x85",
1709
      "\xc6\xbf"         => "\xc7\xb7",
1710
      "\xc6\xbd"         => "\xc6\xbc",
1711
      "\xc6\xb9"         => "\xc6\xb8",
1712
      "\xc6\xb6"         => "\xc6\xb5",
1713
      "\xc6\xb4"         => "\xc6\xb3",
1714
      "\xc6\xb0"         => "\xc6\xaf",
1715
      "\xc6\xad"         => "\xc6\xac",
1716
      "\xc6\xa8"         => "\xc6\xa7",
1717
      "\xc6\xa5"         => "\xc6\xa4",
1718
      "\xc6\xa3"         => "\xc6\xa2",
1719
      "\xc6\xa1"         => "\xc6\xa0",
1720
      "\xc6\x9e"         => "\xc8\xa0",
1721
      "\xc6\x9a"         => "\xc8\xbd",
1722
      "\xc6\x99"         => "\xc6\x98",
1723
      "\xc6\x95"         => "\xc7\xb6",
1724
      "\xc6\x92"         => "\xc6\x91",
1725
      "\xc6\x8c"         => "\xc6\x8b",
1726
      "\xc6\x88"         => "\xc6\x87",
1727
      "\xc6\x85"         => "\xc6\x84",
1728
      "\xc6\x83"         => "\xc6\x82",
1729
      "\xc6\x80"         => "\xc9\x83",
1730
      "\xc5\xbf"         => "\x53",
1731
      "\xc5\xbe"         => "\xc5\xbd",
1732
      "\xc5\xbc"         => "\xc5\xbb",
1733
      "\xc5\xba"         => "\xc5\xb9",
1734
      "\xc5\xb7"         => "\xc5\xb6",
1735
      "\xc5\xb5"         => "\xc5\xb4",
1736
      "\xc5\xb3"         => "\xc5\xb2",
1737
      "\xc5\xb1"         => "\xc5\xb0",
1738
      "\xc5\xaf"         => "\xc5\xae",
1739
      "\xc5\xad"         => "\xc5\xac",
1740
      "\xc5\xab"         => "\xc5\xaa",
1741
      "\xc5\xa9"         => "\xc5\xa8",
1742
      "\xc5\xa7"         => "\xc5\xa6",
1743
      "\xc5\xa5"         => "\xc5\xa4",
1744
      "\xc5\xa3"         => "\xc5\xa2",
1745
      "\xc5\xa1"         => "\xc5\xa0",
1746
      "\xc5\x9f"         => "\xc5\x9e",
1747
      "\xc5\x9d"         => "\xc5\x9c",
1748
      "\xc5\x9b"         => "\xc5\x9a",
1749
      "\xc5\x99"         => "\xc5\x98",
1750
      "\xc5\x97"         => "\xc5\x96",
1751
      "\xc5\x95"         => "\xc5\x94",
1752
      "\xc5\x93"         => "\xc5\x92",
1753
      "\xc5\x91"         => "\xc5\x90",
1754
      "\xc5\x8f"         => "\xc5\x8e",
1755
      "\xc5\x8d"         => "\xc5\x8c",
1756
      "\xc5\x8b"         => "\xc5\x8a",
1757
      "\xc5\x88"         => "\xc5\x87",
1758
      "\xc5\x86"         => "\xc5\x85",
1759
      "\xc5\x84"         => "\xc5\x83",
1760
      "\xc5\x82"         => "\xc5\x81",
1761
      "\xc5\x80"         => "\xc4\xbf",
1762
      "\xc4\xbe"         => "\xc4\xbd",
1763
      "\xc4\xbc"         => "\xc4\xbb",
1764
      "\xc4\xba"         => "\xc4\xb9",
1765
      "\xc4\xb7"         => "\xc4\xb6",
1766
      "\xc4\xb5"         => "\xc4\xb4",
1767
      "\xc4\xb3"         => "\xc4\xb2",
1768
      "\xc4\xb1"         => "\x49",
1769
      "\xc4\xaf"         => "\xc4\xae",
1770
      "\xc4\xad"         => "\xc4\xac",
1771
      "\xc4\xab"         => "\xc4\xaa",
1772
      "\xc4\xa9"         => "\xc4\xa8",
1773
      "\xc4\xa7"         => "\xc4\xa6",
1774
      "\xc4\xa5"         => "\xc4\xa4",
1775
      "\xc4\xa3"         => "\xc4\xa2",
1776
      "\xc4\xa1"         => "\xc4\xa0",
1777
      "\xc4\x9f"         => "\xc4\x9e",
1778
      "\xc4\x9d"         => "\xc4\x9c",
1779
      "\xc4\x9b"         => "\xc4\x9a",
1780
      "\xc4\x99"         => "\xc4\x98",
1781
      "\xc4\x97"         => "\xc4\x96",
1782
      "\xc4\x95"         => "\xc4\x94",
1783
      "\xc4\x93"         => "\xc4\x92",
1784
      "\xc4\x91"         => "\xc4\x90",
1785
      "\xc4\x8f"         => "\xc4\x8e",
1786
      "\xc4\x8d"         => "\xc4\x8c",
1787
      "\xc4\x8b"         => "\xc4\x8a",
1788
      "\xc4\x89"         => "\xc4\x88",
1789
      "\xc4\x87"         => "\xc4\x86",
1790
      "\xc4\x85"         => "\xc4\x84",
1791
      "\xc4\x83"         => "\xc4\x82",
1792
      "\xc4\x81"         => "\xc4\x80",
1793
      "\xc3\xbf"         => "\xc5\xb8",
1794
      "\xc3\xbe"         => "\xc3\x9e",
1795
      "\xc3\xbd"         => "\xc3\x9d",
1796
      "\xc3\xbc"         => "\xc3\x9c",
1797
      "\xc3\xbb"         => "\xc3\x9b",
1798
      "\xc3\xba"         => "\xc3\x9a",
1799
      "\xc3\xb9"         => "\xc3\x99",
1800
      "\xc3\xb8"         => "\xc3\x98",
1801
      "\xc3\xb6"         => "\xc3\x96",
1802
      "\xc3\xb5"         => "\xc3\x95",
1803
      "\xc3\xb4"         => "\xc3\x94",
1804
      "\xc3\xb3"         => "\xc3\x93",
1805
      "\xc3\xb2"         => "\xc3\x92",
1806
      "\xc3\xb1"         => "\xc3\x91",
1807
      "\xc3\xb0"         => "\xc3\x90",
1808
      "\xc3\xaf"         => "\xc3\x8f",
1809
      "\xc3\xae"         => "\xc3\x8e",
1810
      "\xc3\xad"         => "\xc3\x8d",
1811
      "\xc3\xac"         => "\xc3\x8c",
1812
      "\xc3\xab"         => "\xc3\x8b",
1813
      "\xc3\xaa"         => "\xc3\x8a",
1814
      "\xc3\xa9"         => "\xc3\x89",
1815
      "\xc3\xa8"         => "\xc3\x88",
1816
      "\xc3\xa7"         => "\xc3\x87",
1817
      "\xc3\xa6"         => "\xc3\x86",
1818
      "\xc3\xa5"         => "\xc3\x85",
1819
      "\xc3\xa4"         => "\xc3\x84",
1820
      "\xc3\xa3"         => "\xc3\x83",
1821
      "\xc3\xa2"         => "\xc3\x82",
1822
      "\xc3\xa1"         => "\xc3\x81",
1823
      "\xc3\xa0"         => "\xc3\x80",
1824
      "\xc2\xb5"         => "\xce\x9c",
1825
      "\x7a"             => "\x5a",
1826
      "\x79"             => "\x59",
1827
      "\x78"             => "\x58",
1828
      "\x77"             => "\x57",
1829
      "\x76"             => "\x56",
1830
      "\x75"             => "\x55",
1831
      "\x74"             => "\x54",
1832
      "\x73"             => "\x53",
1833
      "\x72"             => "\x52",
1834
      "\x71"             => "\x51",
1835
      "\x70"             => "\x50",
1836
      "\x6f"             => "\x4f",
1837
      "\x6e"             => "\x4e",
1838
      "\x6d"             => "\x4d",
1839
      "\x6c"             => "\x4c",
1840
      "\x6b"             => "\x4b",
1841
      "\x6a"             => "\x4a",
1842
      "\x69"             => "\x49",
1843
      "\x68"             => "\x48",
1844
      "\x67"             => "\x47",
1845
      "\x66"             => "\x46",
1846
      "\x65"             => "\x45",
1847
      "\x64"             => "\x44",
1848
      "\x63"             => "\x43",
1849
      "\x62"             => "\x42",
1850
      "\x61"             => "\x41",
1851
1852
    );
1853
1854
    return $case;
1855
  }
1856
1857
  /**
1858
   * check for UTF8-Support
1859
   */
1860
  public static function checkForSupport()
1861 157
  {
1862
    if (!isset(self::$support['mbstring'])) {
1863 157
1864
      self::$support['mbstring'] = self::mbstring_loaded();
1865 1
      self::$support['iconv'] = self::iconv_loaded();
1866 1
      self::$support['intl'] = self::intl_loaded();
1867 1
      self::$support['intlChar'] = self::intlChar_loaded();
1868 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1869 1
    }
1870 157
  }
1871
1872
  /**
1873
   * Generates a UTF-8 encoded character from the given code point.
1874
   *
1875
   * @param    int $code_point The code point for which to generate a character.
1876
   *
1877
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1878
   */
1879 8
  public static function chr($code_point)
1880
  {
1881 8
    self::checkForSupport();
1882
1883 8
    $i = (int)$code_point;
1884
1885
    if (self::$support['intlChar'] === true) {
1886
      return \IntlChar::chr($code_point);
1887
    }
1888
1889
    if ($i !== $code_point) {
1890 8
      $i = (int)self::hex_to_int($code_point);
1891
    }
1892
1893
    if (!$i) {
1894
      return '';
1895
    }
1896
1897
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1898
  }
1899
1900
  /**
1901
   * Applies callback to all characters of a string.
1902 1
   *
1903
   * @param    string $callback The callback function.
1904 1
   * @param    string $str      UTF-8 string to run callback on.
1905
   *
1906 1
   * @return   array The outcome of callback.
1907
   */
1908
1909
  public static function chr_map($callback, $str)
1910
  {
1911
    $chars = self::split($str);
1912
1913
    return array_map($callback, $chars);
1914
  }
1915
1916
  /**
1917
   * Generates an array of byte length of each character of a Unicode string.
1918
   *
1919
   * 1 byte => U+0000  - U+007F
1920
   * 2 byte => U+0080  - U+07FF
1921 2
   * 3 byte => U+0800  - U+FFFF
1922
   * 4 byte => U+10000 - U+10FFFF
1923 2
   *
1924 2
   * @param    string $str The original Unicode string.
1925
   *
1926
   * @return   array An array of byte lengths of each character.
1927 2
   */
1928
  public static function chr_size_list($str)
1929
  {
1930
    if (!$str) {
1931
      return array();
1932
    }
1933
1934
    return array_map('strlen', self::split($str));
1935
  }
1936
1937 2
  /**
1938
   * Get a decimal code representation of a specific character.
1939 2
   *
1940 2
   * @param   string $chr The input character
1941 2
   *
1942
   * @return  int
1943 2
   */
1944
  public static function chr_to_decimal($chr)
1945 2
  {
1946
    $chr = (string)$chr;
1947
    $code = self::ord($chr[0]);
1948 2
    $bytes = 1;
1949
1950 2
    if (!($code & 0x80)) {
1951 2
      // 0xxxxxxx
1952 2
      return $code;
1953
    }
1954 1
1955 1
    if (($code & 0xe0) === 0xc0) {
1956 1
      // 110xxxxx
1957
      $bytes = 2;
1958
      $code &= ~0xc0;
1959
    } elseif (($code & 0xf0) === 0xe0) {
1960
      // 1110xxxx
1961
      $bytes = 3;
1962 2
      $code &= ~0xe0;
1963
    } elseif (($code & 0xf8) === 0xf0) {
1964 2
      // 11110xxx
1965 2
      $bytes = 4;
1966
      $code &= ~0xf0;
1967 2
    }
1968
1969
    for ($i = 2; $i <= $bytes; $i++) {
1970
      // 10xxxxxx
1971
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1972
    }
1973
1974
    return $code;
1975
  }
1976
1977
  /**
1978
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1979
   *
1980
   * @param    string $chr The input character
1981
   * @param    string $pfix
1982
   *
1983
   * @return   string The code point encoded as U+xxxx
1984
   */
1985
  public static function chr_to_hex($chr, $pfix = 'U+')
1986
  {
1987
    return self::int_to_hex(self::ord($chr), $pfix);
1988
  }
1989
1990
  /**
1991
   * Splits a string into smaller chunks and multiple lines, using the specified
1992
   * line ending character.
1993 1
   *
1994
   * @param    string $body     The original string to be split.
1995 1
   * @param    int    $chunklen The maximum character length of a chunk.
1996
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1997
   *
1998
   * @return   string The chunked string
1999
   */
2000
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2001
  {
2002
    return implode($end, self::split($body, $chunklen));
2003
  }
2004
2005
  /**
2006
   * accepts a string and removes all non-UTF-8 characters from it.
2007
   *
2008
   * @param string $str                     The string to be sanitized.
2009 35
   * @param bool   $remove_bom
2010
   * @param bool   $normalize_whitespace
2011
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2012
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2013
   *
2014
   * @return string Clean UTF-8 encoded string
2015
   */
2016
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2017
  {
2018
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2019
    // caused connection reset problem on larger strings
2020
2021
    $regx = '/
2022
      (
2023
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2024 35
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2025 35
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2026
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2027 35
        ){1,100}                      # ...one or more times
2028 35
      )
2029
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2030 35
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2031 7
    /x';
2032 7
    $str = preg_replace($regx, '$1', $str);
2033
2034 35
    $str = self::replace_diamond_question_mark($str, '');
2035 1
    $str = self::remove_invisible_characters($str);
2036 1
2037
    if ($normalize_whitespace === true) {
2038 35
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2039 4
    }
2040 4
2041
    if ($normalize_msword === true) {
2042 35
      $str = self::normalize_msword($str);
2043
    }
2044
2045
    if ($remove_bom === true) {
2046
      $str = self::removeBOM($str);
2047
    }
2048
2049
    return $str;
2050
  }
2051
2052 3
  /**
2053
   * Clean-up a and show only printable UTF-8 chars at the end.
2054 3
   *
2055
   * @param string $str
2056 3
   *
2057 1
   * @return string
2058
   */
2059
  public static function cleanup($str)
2060
  {
2061 3
    $str = (string)$str;
2062
2063
    if (!isset($str[0])) {
2064
      return '';
2065
    }
2066
2067
    // fixed ISO <-> UTF-8 Errors
2068 3
    $str = self::fix_simple_utf8($str);
2069
2070 3
    // remove all none UTF-8 symbols
2071
    // && remove diamond question mark (�)
2072
    // && remove remove invisible characters (e.g. "\0")
2073
    // && remove BOM
2074
    // && normalize whitespace chars (but keep non-breaking-spaces)
2075
    $str = self::clean($str, true, true, false, true);
2076
2077
    return (string)$str;
2078
  }
2079
2080
  /**
2081
   * Accepts a string and returns an array of Unicode code points.
2082 3
   *
2083
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
2084 3
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
2085 3
   *                          default, code points will be returned as integers.
2086 3
   *
2087
   * @return   array The array of code points
2088 3
   */
2089
  public static function codepoints($arg, $u_style = false)
2090 3
  {
2091 3
    if (is_string($arg)) {
2092 3
      $arg = self::split($arg);
2093
    }
2094 3
2095
    $arg = array_map(
2096 3
        array(
2097
            '\\voku\\helper\\UTF8',
2098
            'ord',
2099
        ),
2100
        $arg
2101
    );
2102
2103
    if ($u_style) {
2104
      $arg = array_map(
2105
          array(
2106 3
              '\\voku\\helper\\UTF8',
2107
              'int_to_hex',
2108
          ),
2109
          $arg
2110
      );
2111
    }
2112
2113
    return $arg;
2114
  }
2115
2116
  /**
2117 3
   * Returns count of characters used in a string.
2118
   *
2119 3
   * @param    string $str The input string.
2120
   *
2121 3
   * @return   array An associative array of Character as keys and
2122
   *           their count as values.
2123 3
   */
2124
  public static function count_chars($str) // there is no $mode parameters
2125
  {
2126
    $array = array_count_values(self::split($str));
2127
2128
    ksort($array);
2129
2130
    return $array;
2131
  }
2132
2133 1
  /**
2134
   * Get a UTF-8 character from its decimal code representation.
2135 1
   *
2136
   * @param   int $code Code.
2137 1
   *
2138 1
   * @return  string
2139 1
   */
2140
  public static function decimal_to_chr($code)
2141 1
  {
2142
    self::checkForSupport();
2143
2144
    return \mb_convert_encoding(
2145
        '&#x' . dechex($code) . ';',
2146
        'UTF-8',
2147
        'HTML-ENTITIES'
2148
    );
2149
  }
2150
2151
  /**
2152
   * encode a string
2153
   *
2154
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2155 11
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2156
   *
2157 11
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2158
   * @param string $str      the string
2159 11
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2160 11
   *                         otherwise we auto-detect the current string-encoding
2161
   *
2162
   * @return string
2163 1
   */
2164 1
  public static function encode($encoding, $str, $force = true)
2165
  {
2166
    $str = (string)$str;
2167
    $encoding = (string)$encoding;
2168
2169
    if (!isset($str[0], $encoding[0])) {
2170
      return $str;
2171
    }
2172
2173
    $encoding = self::normalizeEncoding($encoding);
2174
    $encodingDetected = self::str_detect_encoding($str);
2175
2176
    if (
2177
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2178
        &&
2179
        (
2180
            $force === true
2181
            ||
2182
            $encodingDetected !== $encoding
2183
        )
2184
    ) {
2185
      self::checkForSupport();
2186
2187 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2188
          $encoding === 'UTF-8'
2189
          &&
2190
          (
2191
              $force === true
2192
              || $encodingDetected === 'UTF-8'
2193
              || $encodingDetected === 'WINDOWS-1252'
2194
              || $encodingDetected === 'ISO-8859-1'
2195
          )
2196
      ) {
2197
        return self::to_utf8($str);
2198
      }
2199
2200 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2201
          $encoding === 'ISO-8859-1'
2202
          &&
2203
          (
2204
              $force === true
2205
              || $encodingDetected === 'ISO-8859-1'
2206
              || $encodingDetected === 'UTF-8'
2207
          )
2208
      ) {
2209
        return self::to_win1252($str);
2210
      }
2211
2212
      $strEncoded = \mb_convert_encoding(
2213
          $str,
2214
          $encoding,
2215
          $encodingDetected
2216
      );
2217
2218
      if ($strEncoded) {
2219
        return $strEncoded;
2220
      }
2221
    }
2222
2223
    return $str;
2224
  }
2225
2226
  /**
2227
   * Callback function for preg_replace_callback use.
2228
   *
2229
   * @param  array $matches PREG matches
2230
   *
2231
   * @return string
2232
   */
2233
  protected static function entityCallback($matches)
2234
  {
2235
    self::checkForSupport();
2236
2237
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2238
2239
    if ($return === "'") {
2240
      return '&#x27;';
2241
    }
2242
2243
    return $return;
2244
  }
2245
2246
  /**
2247
   * Reads entire file into a string.
2248
   *
2249
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2250
   *
2251
   * @link http://php.net/manual/en/function.file-get-contents.php
2252 2
   *
2253
   * @param string   $filename      <p>
2254
   *                                Name of the file to read.
2255 2
   *                                </p>
2256 2
   * @param int      $flags         [optional] <p>
2257
   *                                Prior to PHP 6, this parameter is called
2258 2
   *                                use_include_path and is a bool.
2259 2
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2260
   *                                to trigger include path
2261
   *                                search.
2262
   *                                </p>
2263 2
   *                                <p>
2264 2
   *                                The value of flags can be any combination of
2265
   *                                the following flags (with some restrictions), joined with the
2266 2
   *                                binary OR (|)
2267 2
   *                                operator.
2268
   *                                </p>
2269 2
   *                                <p>
2270 1
   *                                <table>
2271 1
   *                                Available flags
2272 2
   *                                <tr valign="top">
2273
   *                                <td>Flag</td>
2274
   *                                <td>Description</td>
2275
   *                                </tr>
2276 2
   *                                <tr valign="top">
2277
   *                                <td>
2278
   *                                FILE_USE_INCLUDE_PATH
2279
   *                                </td>
2280 2
   *                                <td>
2281 2
   *                                Search for filename in the include directory.
2282
   *                                See include_path for more
2283 2
   *                                information.
2284
   *                                </td>
2285 2
   *                                </tr>
2286 1
   *                                <tr valign="top">
2287 1
   *                                <td>
2288 1
   *                                FILE_TEXT
2289 1
   *                                </td>
2290 1
   *                                <td>
2291 1
   *                                As of PHP 6, the default encoding of the read
2292
   *                                data is UTF-8. You can specify a different encoding by creating a
2293 2
   *                                custom context or by changing the default using
2294 2
   *                                stream_default_encoding. This flag cannot be
2295 2
   *                                used with FILE_BINARY.
2296 2
   *                                </td>
2297
   *                                </tr>
2298
   *                                <tr valign="top">
2299 2
   *                                <td>
2300
   *                                FILE_BINARY
2301
   *                                </td>
2302
   *                                <td>
2303
   *                                With this flag, the file is read in binary mode. This is the default
2304
   *                                setting and cannot be used with FILE_TEXT.
2305
   *                                </td>
2306
   *                                </tr>
2307
   *                                </table>
2308
   *                                </p>
2309 1
   * @param resource $context       [optional] <p>
2310
   *                                A valid context resource created with
2311 1
   *                                stream_context_create. If you don't need to use a
2312
   *                                custom context, you can skip this parameter by &null;.
2313
   *                                </p>
2314
   * @param int      $offset        [optional] <p>
2315
   *                                The offset where the reading starts.
2316
   *                                </p>
2317
   * @param int      $maxlen        [optional] <p>
2318
   *                                Maximum length of data read. The default is to read until end
2319
   *                                of file is reached.
2320
   *                                </p>
2321
   * @param int      $timeout
2322
   *
2323 7
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2324
   *                                default utf-8 chars
2325 7
   *
2326 7
   * @return string The function returns the read data or false on failure.
2327 2
   */
2328
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2329 1
  {
2330 2
    // init
2331 2
    $timeout = (int)$timeout;
2332 7
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2333 1
2334 1
    if ($timeout && $context === null) {
2335 1
      $context = stream_context_create(
2336 1
          array(
2337 7
              'http' =>
2338 7
                  array(
2339
                      'timeout' => $timeout,
2340
                  ),
2341
          )
2342 7
      );
2343 7
    }
2344 1
2345 1
    if (is_int($maxlen)) {
2346 7
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2347
    } else {
2348 7
      $data = file_get_contents($filename, $flags, $context, $offset);
2349 5
    }
2350 5
2351 4
    // return false on error
2352
    if ($data === false) {
2353
      return false;
2354
    }
2355 7
2356
    if ($convertToUtf8 === true) {
2357
      self::checkForSupport();
2358
2359
      $data = self::encode('UTF-8', $data, false);
2360 7
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2361 7
    }
2362 7
2363
    // clean utf-8 string
2364 7
    return $data;
2365
  }
2366
2367
  /**
2368
   * Checks if a file starts with BOM character.
2369
   *
2370
   * @param    string $file_path Path to a valid file.
2371
   *
2372
   * @return   bool True if the file has BOM at the start, False otherwise.
2373
   */
2374
  public static function file_has_bom($file_path)
2375
  {
2376
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
2377
  }
2378
2379
  /**
2380
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2381
   *
2382
   * @param mixed  $var
2383
   * @param int    $normalization_form
2384
   * @param string $leading_combining
2385
   *
2386
   * @return mixed
2387
   */
2388
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2389
  {
2390
    switch (gettype($var)) {
2391 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2392
        foreach ($var as $k => $v) {
2393
          /** @noinspection AlterInForeachInspection */
2394
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2395
        }
2396
        break;
2397 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2398
        foreach ($var as $k => $v) {
2399
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2400
        }
2401
        break;
2402
      case 'string':
2403 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2404
          // Workaround https://bugs.php.net/65732
2405
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2406
        }
2407 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2408
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2409
            $n = '-';
2410
          } else {
2411
            $n = \Normalizer::normalize($var, $normalization_form);
2412
2413
            if (isset($n[0])) {
2414
              $var = $n;
2415
            } else {
2416
              $var = self::encode('UTF-8', $var);
2417 1
            }
2418
2419 1
          }
2420 1
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2421 1
            // Prevent leading combining chars
2422 1
            // for NFC-safe concatenations.
2423
            $var = $leading_combining . $var;
2424
          }
2425 1
        }
2426
        break;
2427
    }
2428
2429
    return $var;
2430
  }
2431
2432
  /**
2433
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2434
   *
2435
   * @param int    $type
2436
   * @param string $var
2437 1
   * @param int    $filter
2438
   * @param mixed  $option
2439 1
   *
2440 1
   * @return mixed
2441 1
   */
2442 1 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2443
  {
2444
    if (4 > func_num_args()) {
2445 1
      $var = filter_input($type, $var, $filter);
2446
    } else {
2447
      $var = filter_input($type, $var, $filter, $option);
2448
    }
2449
2450
    return self::filter($var);
2451
  }
2452
2453
  /**
2454
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2455
   *
2456
   * @param int   $type
2457 1
   * @param mixed $definition
2458
   * @param bool  $add_empty
2459 1
   *
2460
   * @return mixed
2461
   */
2462 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2463
  {
2464
    if (2 > func_num_args()) {
2465
      $a = filter_input_array($type);
2466
    } else {
2467
      $a = filter_input_array($type, $definition, $add_empty);
2468
    }
2469 8
2470
    return self::filter($a);
2471 8
  }
2472 8
2473
  /**
2474 8
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2475
   *
2476 8
   * @param mixed $var
2477 2
   * @param int   $filter
2478
   * @param mixed $option
2479
   *
2480 8
   * @return mixed
2481 1
   */
2482 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2483 1
  {
2484
    if (3 > func_num_args()) {
2485 8
      $var = filter_var($var, $filter);
2486
    } else {
2487
      $var = filter_var($var, $filter, $option);
2488
    }
2489
2490
    return self::filter($var);
2491
  }
2492
2493
  /**
2494
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2495 1
   *
2496
   * @param array $data
2497 1
   * @param mixed $definition
2498
   * @param bool  $add_empty
2499
   *
2500
   * @return mixed
2501
   */
2502 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2503
  {
2504
    if (2 > func_num_args()) {
2505
      $a = filter_var_array($data);
2506
    } else {
2507 1
      $a = filter_var_array($data, $definition, $add_empty);
2508 1
    }
2509 1
2510 1
    return self::filter($a);
2511 1
  }
2512
2513 1
  /**
2514
   * Checks if the number of Unicode characters in a string are not
2515
   * more than the specified integer.
2516
   *
2517
   * @param    string $str      The original string to be checked.
2518
   * @param    int    $box_size The size in number of chars to be checked against string.
2519
   *
2520
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2521
   */
2522
  public static function fits_inside($str, $box_size)
2523 1
  {
2524
    return (self::strlen($str) <= $box_size);
2525 1
  }
2526
2527 1
  /**
2528 1
   * Fixing a broken UTF-8 string.
2529
   *
2530
   * @param string $str
2531 1
   *
2532
   * @return string
2533 1
   */
2534 1
  public static function fix_simple_utf8($str)
2535 1
  {
2536 1
    static $brokenUtf8ToUtf8Keys = null;
2537 1
    static $brokenUtf8ToUtf8Values = null;
2538 1
2539 1
    $str = (string)$str;
2540 1
2541 1
    if (!isset($str[0])) {
2542 1
      return '';
2543 1
    }
2544
2545
    if ($brokenUtf8ToUtf8Keys === null) {
2546
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2547
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2548
    }
2549
2550
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2551
  }
2552
2553
  /**
2554
   * Fix a double (or multiple) encoded UTF8 string.
2555
   *
2556
   * @param array|string $str
2557
   *
2558
   * @return string
2559
   */
2560
  public static function fix_utf8($str)
2561
  {
2562
    if (is_array($str)) {
2563 1
2564 1
      foreach ($str as $k => $v) {
2565
        /** @noinspection AlterInForeachInspection */
2566
        $str[$k] = self::fix_utf8($v);
2567
      }
2568
2569
      return $str;
2570
    }
2571
2572
    $last = '';
2573
    while ($last !== $str) {
2574
      $last = $str;
2575
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2575 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2576
    }
2577
2578
    return $str;
2579
  }
2580
2581
  /**
2582
   * Get character of a specific character.
2583
   *
2584
   * @param   string $char Character.
2585
   *
2586
   * @return  string 'RTL' or 'LTR'
2587
   */
2588
  public static function getCharDirection($char)
2589
  {
2590
    // init
2591
    self::checkForSupport();
2592
2593
    if (self::$support['intlChar'] === true) {
2594
      $tmpReturn = \IntlChar::charDirection($char);
2595
2596
      // from "IntlChar"-Class
2597
      $charDirection = array(
2598
          'RTL' => array(1, 13, 14, 15, 21),
2599
          'LTR' => array(0, 11, 12, 20),
2600
      );
2601
2602
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2603
        return 'LTR';
2604
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2605
        return 'RTL';
2606
      }
2607
    }
2608
2609
    $c = static::chr_to_decimal($char);
2610
2611
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2612
      return 'LTR';
2613
    }
2614
2615
    if (0x85e >= $c) {
2616
2617
      if (0x5be === $c ||
2618
          0x5c0 === $c ||
2619
          0x5c3 === $c ||
2620
          0x5c6 === $c ||
2621
          (0x5d0 <= $c && 0x5ea >= $c) ||
2622
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2623 2
          0x608 === $c ||
2624
          0x60b === $c ||
2625 2
          0x60d === $c ||
2626 2
          0x61b === $c ||
2627 2
          (0x61e <= $c && 0x64a >= $c) ||
2628
          (0x66d <= $c && 0x66f >= $c) ||
2629
          (0x671 <= $c && 0x6d5 >= $c) ||
2630
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2631
          (0x6ee <= $c && 0x6ef >= $c) ||
2632
          (0x6fa <= $c && 0x70d >= $c) ||
2633
          0x710 === $c ||
2634
          (0x712 <= $c && 0x72f >= $c) ||
2635
          (0x74d <= $c && 0x7a5 >= $c) ||
2636
          0x7b1 === $c ||
2637
          (0x7c0 <= $c && 0x7ea >= $c) ||
2638
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2639
          0x7fa === $c ||
2640 1
          (0x800 <= $c && 0x815 >= $c) ||
2641
          0x81a === $c ||
2642 1
          0x824 === $c ||
2643 1
          0x828 === $c ||
2644
          (0x830 <= $c && 0x83e >= $c) ||
2645 1
          (0x840 <= $c && 0x858 >= $c) ||
2646 1
          0x85e === $c
2647
      ) {
2648
        return 'RTL';
2649
      }
2650 1
2651
    } elseif (0x200f === $c) {
2652 1
2653 1
      return 'RTL';
2654 1
2655
    } elseif (0xfb1d <= $c) {
2656 1
2657 1
      if (0xfb1d === $c ||
2658 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2659 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2660 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2661
          0xfb3e === $c ||
2662 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2663
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2664 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2665 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2666
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2667
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2668
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2669 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2670 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
2671
          (0x10800 <= $c && 0x10805 >= $c) ||
2672 1
          0x10808 === $c ||
2673
          (0x1080a <= $c && 0x10835 >= $c) ||
2674 1
          (0x10837 <= $c && 0x10838 >= $c) ||
2675 1
          0x1083c === $c ||
2676 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2677
          (0x10857 <= $c && 0x1085f >= $c) ||
2678 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2679
          (0x10920 <= $c && 0x10939 >= $c) ||
2680
          0x1093f === $c ||
2681
          0x10a00 === $c ||
2682
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2683
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2684
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2685
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2686
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2687
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2688
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2689
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2690
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2691
          (0x10b78 <= $c && 0x10b7f >= $c)
2692
      ) {
2693
        return 'RTL';
2694
      }
2695
    }
2696
2697
    return 'LTR';
2698
  }
2699
2700
  /**
2701
   * get data from "/data/*.ser"
2702
   *
2703
   * @param string $file
2704
   *
2705
   * @return bool|string|array|int false on error
2706
   */
2707
  protected static function getData($file)
2708 1
  {
2709
    $file = __DIR__ . '/data/' . $file . '.php';
2710 1
    if (file_exists($file)) {
2711 1
      /** @noinspection PhpIncludeInspection */
2712
      return require $file;
2713 1
    } else {
2714 1
      return false;
2715 1
    }
2716 1
  }
2717 1
2718 1
  /**
2719
   * Creates a random string of UTF-8 characters.
2720
   *
2721
   * @param    int $len The length of string in characters.
2722
   *
2723
   * @return   string String consisting of random characters.
2724
   */
2725
  public static function hash($len = 8)
2726
  {
2727
    static $chars = array();
2728
    static $chars_len = null;
2729
2730
    if ($len <= 0) {
2731
      return '';
2732
    }
2733
2734
    // init
2735
    self::checkForSupport();
2736
2737
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2738
      if (self::$support['pcre_utf8'] === true) {
2739
        $chars = array_map(
2740
            array(
2741
                '\\voku\\helper\\UTF8',
2742
                'chr',
2743
            ),
2744
            range(48, 79)
2745
        );
2746
2747
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2748
2749
        $chars = array_values(array_filter($chars));
2750
      } else {
2751
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2752
      }
2753
2754
      $chars_len = count($chars);
2755
    }
2756
2757
    $hash = '';
2758
2759
    for (; $len; --$len) {
2760
      $hash .= $chars[mt_rand() % $chars_len];
2761
    }
2762
2763
    return $hash;
2764
  }
2765
2766
  /**
2767
   * Converts hexadecimal U+xxxx code point representation to Integer.
2768
   *
2769
   * INFO: opposite to UTF8::int_to_hex( )
2770
   *
2771
   * @param    string $str The hexadecimal code point representation.
2772
   *
2773
   * @return   int The code point, or 0 on failure.
2774
   */
2775
  public static function hex_to_int($str)
2776
  {
2777
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2778
      return intval($match[1], 16);
2779
    }
2780
2781
    return 0;
2782
  }
2783
2784
  /**
2785
   * alias for "UTF8::html_entity_decode()"
2786
   *
2787
   * @param string $str
2788
   * @param int    $flags
2789
   * @param string $encoding
2790 15
   *
2791
   * @return string
2792 15
   */
2793
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2794 15
  {
2795 3
    return self::html_entity_decode($str, $flags, $encoding);
2796
  }
2797
2798 15
  /**
2799 4
   * Converts a UTF-8 string to a series of HTML numbered entities.
2800
   *
2801
   * e.g.: &#123;&#39;&#1740;
2802 15
   *
2803 3
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2804 3
   * @param    bool $keepAsciiChars Keep ASCII chars.
2805 3
   *
2806
   * @return string HTML numbered entities.
2807
   */
2808 3
  public static function html_encode($str, $keepAsciiChars = false)
2809
  {
2810
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2811 15
    if (function_exists('mb_encode_numericentity')) {
2812
2813 15
      $startCode = 0x00;
2814
      if ($keepAsciiChars === true) {
2815
        $startCode = 0x80;
2816 15
      }
2817 15
2818 15
      return mb_encode_numericentity(
2819
          $str,
2820 15
          array($startCode, 0xffff, 0, 0xffff,),
2821
          self::str_detect_encoding($str)
2822 15
      );
2823
    }
2824 15
2825
    return implode(
2826
        array_map(
2827
            function ($data) use ($keepAsciiChars) {
2828
              return self::single_chr_html_encode($data, $keepAsciiChars);
2829
            },
2830
            self::split($str)
2831
        )
2832
    );
2833
  }
2834 12
2835
  /**
2836 12
   * UTF-8 version of html_entity_decode()
2837
   *
2838 12
   * The reason we are not using html_entity_decode() by itself is because
2839
   * while it is not technically correct to leave out the semicolon
2840 12
   * at the end of an entity most browsers will still interpret the entity
2841 5
   * correctly. html_entity_decode() does not convert entities without
2842
   * semicolons, so we are left with our own little solution here. Bummer.
2843
   *
2844 11
   * Convert all HTML entities to their applicable characters
2845
   *
2846
   * @link http://php.net/manual/en/function.html-entity-decode.php
2847
   *
2848
   * @param string $str      <p>
2849
   *                         The input string.
2850
   *                         </p>
2851
   * @param int    $flags    [optional] <p>
2852
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2853
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2854
   *                         <table>
2855
   *                         Available <i>flags</i> constants
2856
   *                         <tr valign="top">
2857
   *                         <td>Constant Name</td>
2858
   *                         <td>Description</td>
2859
   *                         </tr>
2860
   *                         <tr valign="top">
2861
   *                         <td><b>ENT_COMPAT</b></td>
2862
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2863
   *                         </tr>
2864
   *                         <tr valign="top">
2865
   *                         <td><b>ENT_QUOTES</b></td>
2866
   *                         <td>Will convert both double and single quotes.</td>
2867
   *                         </tr>
2868
   *                         <tr valign="top">
2869
   *                         <td><b>ENT_NOQUOTES</b></td>
2870
   *                         <td>Will leave both double and single quotes unconverted.</td>
2871
   *                         </tr>
2872
   *                         <tr valign="top">
2873
   *                         <td><b>ENT_HTML401</b></td>
2874
   *                         <td>
2875
   *                         Handle code as HTML 4.01.
2876
   *                         </td>
2877
   *                         </tr>
2878
   *                         <tr valign="top">
2879
   *                         <td><b>ENT_XML1</b></td>
2880
   *                         <td>
2881
   *                         Handle code as XML 1.
2882
   *                         </td>
2883
   *                         </tr>
2884
   *                         <tr valign="top">
2885
   *                         <td><b>ENT_XHTML</b></td>
2886
   *                         <td>
2887
   *                         Handle code as XHTML.
2888
   *                         </td>
2889
   *                         </tr>
2890
   *                         <tr valign="top">
2891
   *                         <td><b>ENT_HTML5</b></td>
2892
   *                         <td>
2893
   *                         Handle code as HTML 5.
2894
   *                         </td>
2895
   *                         </tr>
2896
   *                         </table>
2897
   *                         </p>
2898
   * @param string $encoding [optional] <p>
2899
   *                         Encoding to use.
2900
   *                         </p>
2901
   *
2902
   * @return string the decoded string.
2903
   */
2904
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2905
  {
2906
    $str = (string)$str;
2907
2908
    if (!isset($str[0])) {
2909
      return '';
2910
    }
2911
2912
    if (strpos($str, '&') === false) {
2913
      return $str;
2914
    }
2915
2916
    if ($flags === null) {
2917
      if (Bootup::is_php('5.4') === true) {
2918
        $flags = ENT_COMPAT | ENT_HTML5;
2919
      } else {
2920
        $flags = ENT_COMPAT;
2921
      }
2922
    }
2923
2924
    do {
2925
      $str_compare = $str;
2926
2927
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2928
2929
      // decode numeric & UTF16 two byte entities
2930
      $str = html_entity_decode(
2931
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2932
          $flags,
2933
          $encoding
2934
      );
2935
2936
    } while ($str_compare !== $str);
2937
2938
    return $str;
2939
  }
2940
2941
  /**
2942
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2943
   *
2944
   * @link http://php.net/manual/en/function.htmlentities.php
2945
   *
2946
   * @param string $str           <p>
2947
   *                              The input string.
2948
   *                              </p>
2949
   * @param int    $flags         [optional] <p>
2950 2
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2951
   *                              invalid code unit sequences and the used document type. The default is
2952 2
   *                              ENT_COMPAT | ENT_HTML401.
2953
   *                              <table>
2954
   *                              Available <i>flags</i> constants
2955
   *                              <tr valign="top">
2956
   *                              <td>Constant Name</td>
2957
   *                              <td>Description</td>
2958
   *                              </tr>
2959
   *                              <tr valign="top">
2960
   *                              <td><b>ENT_COMPAT</b></td>
2961
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2962
   *                              </tr>
2963
   *                              <tr valign="top">
2964
   *                              <td><b>ENT_QUOTES</b></td>
2965
   *                              <td>Will convert both double and single quotes.</td>
2966
   *                              </tr>
2967
   *                              <tr valign="top">
2968
   *                              <td><b>ENT_NOQUOTES</b></td>
2969
   *                              <td>Will leave both double and single quotes unconverted.</td>
2970
   *                              </tr>
2971
   *                              <tr valign="top">
2972
   *                              <td><b>ENT_IGNORE</b></td>
2973
   *                              <td>
2974
   *                              Silently discard invalid code unit sequences instead of returning
2975
   *                              an empty string. Using this flag is discouraged as it
2976
   *                              may have security implications.
2977
   *                              </td>
2978
   *                              </tr>
2979
   *                              <tr valign="top">
2980
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2981
   *                              <td>
2982
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2983
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2984
   *                              </td>
2985
   *                              </tr>
2986
   *                              <tr valign="top">
2987
   *                              <td><b>ENT_DISALLOWED</b></td>
2988
   *                              <td>
2989
   *                              Replace invalid code points for the given document type with a
2990
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2991
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2992
   *                              instance, to ensure the well-formedness of XML documents with
2993
   *                              embedded external content.
2994
   *                              </td>
2995
   *                              </tr>
2996
   *                              <tr valign="top">
2997
   *                              <td><b>ENT_HTML401</b></td>
2998
   *                              <td>
2999
   *                              Handle code as HTML 4.01.
3000
   *                              </td>
3001
   *                              </tr>
3002
   *                              <tr valign="top">
3003
   *                              <td><b>ENT_XML1</b></td>
3004
   *                              <td>
3005
   *                              Handle code as XML 1.
3006
   *                              </td>
3007
   *                              </tr>
3008
   *                              <tr valign="top">
3009
   *                              <td><b>ENT_XHTML</b></td>
3010
   *                              <td>
3011
   *                              Handle code as XHTML.
3012
   *                              </td>
3013
   *                              </tr>
3014
   *                              <tr valign="top">
3015
   *                              <td><b>ENT_HTML5</b></td>
3016
   *                              <td>
3017
   *                              Handle code as HTML 5.
3018
   *                              </td>
3019
   *                              </tr>
3020
   *                              </table>
3021
   *                              </p>
3022
   * @param string $encoding      [optional] <p>
3023
   *                              Like <b>htmlspecialchars</b>,
3024
   *                              <b>htmlentities</b> takes an optional third argument
3025
   *                              <i>encoding</i> which defines encoding used in
3026
   *                              conversion.
3027
   *                              Although this argument is technically optional, you are highly
3028
   *                              encouraged to specify the correct value for your code.
3029
   *                              </p>
3030
   * @param bool   $double_encode [optional] <p>
3031
   *                              When <i>double_encode</i> is turned off PHP will not
3032
   *                              encode existing html entities. The default is to convert everything.
3033
   *                              </p>
3034
   *
3035
   *
3036
   * @return string the encoded string.
3037
   * </p>
3038
   * <p>
3039
   * If the input <i>string</i> contains an invalid code unit
3040
   * sequence within the given <i>encoding</i> an empty string
3041
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3042
   * <b>ENT_SUBSTITUTE</b> flags are set.
3043
   */
3044
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3045
  {
3046
    return htmlentities($str, $flags, $encoding, $double_encode);
3047
  }
3048
3049
  /**
3050
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
3051
   *
3052
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3053
   *
3054
   * @param string $str           <p>
3055
   *                              The string being converted.
3056
   *                              </p>
3057
   * @param int    $flags         [optional] <p>
3058
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3059
   *                              invalid code unit sequences and the used document type. The default is
3060
   *                              ENT_COMPAT | ENT_HTML401.
3061
   *                              <table>
3062 1
   *                              Available <i>flags</i> constants
3063
   *                              <tr valign="top">
3064 1
   *                              <td>Constant Name</td>
3065
   *                              <td>Description</td>
3066
   *                              </tr>
3067
   *                              <tr valign="top">
3068
   *                              <td><b>ENT_COMPAT</b></td>
3069
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3070
   *                              </tr>
3071
   *                              <tr valign="top">
3072 1
   *                              <td><b>ENT_QUOTES</b></td>
3073
   *                              <td>Will convert both double and single quotes.</td>
3074 1
   *                              </tr>
3075
   *                              <tr valign="top">
3076
   *                              <td><b>ENT_NOQUOTES</b></td>
3077
   *                              <td>Will leave both double and single quotes unconverted.</td>
3078
   *                              </tr>
3079
   *                              <tr valign="top">
3080
   *                              <td><b>ENT_IGNORE</b></td>
3081
   *                              <td>
3082
   *                              Silently discard invalid code unit sequences instead of returning
3083
   *                              an empty string. Using this flag is discouraged as it
3084
   *                              may have security implications.
3085
   *                              </td>
3086
   *                              </tr>
3087
   *                              <tr valign="top">
3088
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3089
   *                              <td>
3090
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3091
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3092
   *                              </td>
3093
   *                              </tr>
3094
   *                              <tr valign="top">
3095
   *                              <td><b>ENT_DISALLOWED</b></td>
3096
   *                              <td>
3097
   *                              Replace invalid code points for the given document type with a
3098
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3099
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3100
   *                              instance, to ensure the well-formedness of XML documents with
3101
   *                              embedded external content.
3102
   *                              </td>
3103 1
   *                              </tr>
3104
   *                              <tr valign="top">
3105 1
   *                              <td><b>ENT_HTML401</b></td>
3106
   *                              <td>
3107
   *                              Handle code as HTML 4.01.
3108
   *                              </td>
3109
   *                              </tr>
3110
   *                              <tr valign="top">
3111
   *                              <td><b>ENT_XML1</b></td>
3112
   *                              <td>
3113
   *                              Handle code as XML 1.
3114
   *                              </td>
3115 1
   *                              </tr>
3116
   *                              <tr valign="top">
3117 1
   *                              <td><b>ENT_XHTML</b></td>
3118
   *                              <td>
3119
   *                              Handle code as XHTML.
3120
   *                              </td>
3121
   *                              </tr>
3122
   *                              <tr valign="top">
3123
   *                              <td><b>ENT_HTML5</b></td>
3124
   *                              <td>
3125
   *                              Handle code as HTML 5.
3126
   *                              </td>
3127 1
   *                              </tr>
3128
   *                              </table>
3129 1
   *                              </p>
3130
   * @param string $encoding      [optional] <p>
3131
   *                              Defines encoding used in conversion.
3132
   *                              </p>
3133
   *                              <p>
3134
   *                              For the purposes of this function, the encodings
3135
   *                              ISO-8859-1, ISO-8859-15,
3136
   *                              UTF-8, cp866,
3137
   *                              cp1251, cp1252, and
3138
   *                              KOI8-R are effectively equivalent, provided the
3139
   *                              <i>string</i> itself is valid for the encoding, as
3140
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3141
   *                              the same positions in all of these encodings.
3142
   *                              </p>
3143
   * @param bool   $double_encode [optional] <p>
3144
   *                              When <i>double_encode</i> is turned off PHP will not
3145
   *                              encode existing html entities, the default is to convert everything.
3146
   *                              </p>
3147
   *
3148
   * @return string The converted string.
3149
   * </p>
3150
   * <p>
3151
   * If the input <i>string</i> contains an invalid code unit
3152
   * sequence within the given <i>encoding</i> an empty string
3153
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3154
   * <b>ENT_SUBSTITUTE</b> flags are set.
3155
   */
3156
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3157
  {
3158
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3159
  }
3160
3161
  /**
3162
   * checks whether iconv is available on the server
3163
   *
3164
   * @return   bool True if available, False otherwise
3165
   */
3166
  public static function iconv_loaded()
3167
  {
3168
    return extension_loaded('iconv') ? true : false;
3169
  }
3170
3171
  /**
3172
   * Converts Integer to hexadecimal U+xxxx code point representation.
3173
   *
3174
   * @param    int    $int The integer to be converted to hexadecimal code point.
3175
   * @param    string $pfix
3176
   *
3177
   * @return   string The code point, or empty string on failure.
3178
   */
3179 16
  public static function int_to_hex($int, $pfix = 'U+')
3180
  {
3181 16
    if (ctype_digit((string)$int)) {
3182
      $hex = dechex((int)$int);
3183
3184
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3185
3186
      return $pfix . $hex;
3187
    }
3188
3189
    return '';
3190
  }
3191
3192 4
  /**
3193
   * checks whether intl is available on the server
3194 4
   *
3195
   * @return   bool True if available, False otherwise
3196
   */
3197
  public static function intl_loaded()
3198
  {
3199
    return extension_loaded('intl') ? true : false;
3200
  }
3201
3202
  /**
3203
   * checks whether intl-char is available on the server
3204 1
   *
3205
   * @return   bool True if available, False otherwise
3206 1
   */
3207
  public static function intlChar_loaded()
3208 1
  {
3209 1
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3210
  }
3211
3212 1
  /**
3213 1
   * alias for "UTF8::is_ascii()"
3214
   *
3215 1
   * @param string $str
3216
   *
3217
   * @return boolean
3218
   */
3219
  public static function isAscii($str)
3220
  {
3221
    return self::is_ascii($str);
3222
  }
3223
3224
  /**
3225
   * alias for "UTF8::is_base64"
3226 4
   *
3227
   * @param string $str
3228
   *
3229 4
   * @return bool
3230
   */
3231
  public static function isBase64($str)
3232 4
  {
3233
    return self::is_base64($str);
3234 4
  }
3235 4
3236 4
  /**
3237 4
   * alias for "UTF8::is_bom"
3238 3
   *
3239
   * @param string $utf8_chr
3240 4
   *
3241
   * @return boolean
3242
   */
3243
  public static function isBom($utf8_chr)
3244
  {
3245
    return self::is_bom($utf8_chr);
3246
  }
3247
3248
  /**
3249
   * Try to check if a string is a json-string...
3250
   *
3251
   * @param $str
3252
   *
3253
   * @return bool
3254
   */
3255
  public static function isJson($str)
3256
  {
3257
    $str = (string)$str;
3258
3259
    if (!isset($str[0])) {
3260
      return false;
3261
    }
3262
3263
    if (
3264
        is_object(json_decode($str))
3265
        &&
3266
        json_last_error() === JSON_ERROR_NONE
3267
    ) {
3268
      return true;
3269
    } else {
3270
      return false;
3271
    }
3272
  }
3273 2
3274
  /**
3275 2
   * check if string contains any html-tags <lall>
3276
   *
3277
   * @param string $str
3278
   *
3279
   * @return boolean
3280
   */
3281
  public static function isHtml($str)
3282
  {
3283
    $str = (string)$str;
3284
3285 2
    if (!isset($str[0])) {
3286
      return false;
3287 2
    }
3288 2
3289
    // init
3290 2
    $matches = array();
3291 2
3292 2
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3293 2
3294 2
    if (count($matches) == 0) {
3295 2
      return false;
3296 2
    } else {
3297 2
      return true;
3298 2
    }
3299 1
  }
3300 1
3301 2
  /**
3302 2
   * alias for "UTF8::is_utf8"
3303 2
   *
3304
   * @param string $str
3305 2
   *
3306 2
   * @return bool
3307 2
   */
3308 2
  public static function isUtf8($str)
3309 2
  {
3310 2
    return self::is_utf8($str);
3311 2
  }
3312 2
3313 2
  /**
3314 1
   * Checks if a string is 7 bit ASCII.
3315 1
   *
3316 2
   * @param    string $str The string to check.
3317 2
   *
3318 2
   * @return   bool <strong>true</strong> if it is ASCII<br />
3319
   *                <strong>false</strong> otherwise
3320 2
   */
3321 1
  public static function is_ascii($str)
3322 1
  {
3323
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3324 1
  }
3325
3326
  /**
3327
   * Returns true if the string is base64 encoded, false otherwise.
3328 2
   *
3329
   * @param string $str
3330 2
   *
3331
   * @return bool Whether or not $str is base64 encoded
3332
   */
3333
  public static function is_base64($str)
3334
  {
3335
    $str = (string)$str;
3336
3337
    if (!isset($str[0])) {
3338
      return false;
3339
    }
3340 2
3341
    if (base64_encode(base64_decode($str, true)) === $str) {
3342 2
      return true;
3343 2
    } else {
3344
      return false;
3345 2
    }
3346 2
  }
3347 2
3348 2
  /**
3349 2
   * Check if the input is binary... (is look like a hack)
3350 2
   *
3351 2
   * @param string $input
3352 2
   *
3353 2
   * @return bool
3354
   */
3355
  public static function is_binary($input)
3356 2
  {
3357 2
3358 2
    $testLength = strlen($input);
3359
3360 2
    if (
3361 2
        preg_match('~^[01]+$~', $input)
3362 2
        ||
3363 1
        substr_count($input, "\x00") > 0
3364 1
        ||
3365 1
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3366 1
    ) {
3367 1
      return true;
3368 1
    } else {
3369
      return false;
3370
    }
3371 1
  }
3372 1
3373 1
  /**
3374
   * Check if the file is binary.
3375 2
   *
3376
   * @param string $file
3377
   *
3378
   * @return boolean
3379
   */
3380
  public static function is_binary_file($file)
3381
  {
3382
    try {
3383 2
      $fp = fopen($file, 'r');
3384
      $block = fread($fp, 512);
3385 2
      fclose($fp);
3386
    } catch (\Exception $e) {
3387
      $block = '';
3388
    }
3389
3390
    return self::is_binary($block);
3391
  }
3392
3393
  /**
3394
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3395
   *
3396
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3397 34
   *
3398
   * @param    string $utf8_chr The input string.
3399 34
   *
3400
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3401 34
   */
3402 3
  public static function is_bom($utf8_chr)
3403
  {
3404
    return ($utf8_chr === self::bom());
3405 32
  }
3406
3407
  /**
3408
   * Check if the string is UTF-16.
3409
   *
3410
   * @param string $str
3411
   *
3412
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3413
   */
3414 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3415 32
  {
3416
    if (self::is_binary($str)) {
3417 32
      self::checkForSupport();
3418 32
3419 32
      $maybeUTF16LE = 0;
3420
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3421
      if ($test !== false && strlen($test) > 1) {
3422 32
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3423 32
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3424 32
        if ($test3 === $test) {
3425
          $strChars = self::count_chars($str);
3426
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3427 32
            if (in_array($test3char, $strChars, true) === true) {
3428
              $maybeUTF16LE++;
3429 30
            }
3430 32
          }
3431
        }
3432 28
      }
3433 28
3434 28
      $maybeUTF16BE = 0;
3435 28
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3436 30
      if ($test !== false && strlen($test) > 1) {
3437
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3438 13
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3439 13
        if ($test3 === $test) {
3440 13
          $strChars = self::count_chars($str);
3441 13
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3442 23
            if (in_array($test3char, $strChars, true) === true) {
3443
              $maybeUTF16BE++;
3444 6
            }
3445 6
          }
3446 6
        }
3447 6
      }
3448 12
3449
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3450
        if ($maybeUTF16LE > $maybeUTF16BE) {
3451
          return 1;
3452
        } else {
3453
          return 2;
3454
        }
3455
      }
3456
3457 3
    }
3458 3
3459 3
    return false;
3460 3
  }
3461 7
3462
  /**
3463 3
   * Check if the string is UTF-32.
3464 3
   *
3465 3
   * @param string $str
3466 3
   *
3467 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3468
   */
3469 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3470
  {
3471 3
    if (self::is_binary($str)) {
3472
      self::checkForSupport();
3473 32
3474
      $maybeUTF32LE = 0;
3475
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3476 30
      if ($test !== false && strlen($test) > 1) {
3477
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3478 28
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3479 28
        if ($test3 === $test) {
3480 28
          $strChars = self::count_chars($str);
3481 28
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3482
            if (in_array($test3char, $strChars, true) === true) {
3483
              $maybeUTF32LE++;
3484
            }
3485
          }
3486 28
        }
3487
      }
3488
3489
      $maybeUTF32BE = 0;
3490
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3491
      if ($test !== false && strlen($test) > 1) {
3492 28
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3493 28
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3494 28
        if ($test3 === $test) {
3495 28
          $strChars = self::count_chars($str);
3496
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3497 28
            if (in_array($test3char, $strChars, true) === true) {
3498
              $maybeUTF32BE++;
3499 28
            }
3500 28
          }
3501 5
        }
3502
      }
3503
3504 28
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3505 28
        if ($maybeUTF32LE > $maybeUTF32BE) {
3506 28
          return 1;
3507 28
        } else {
3508 28
          return 2;
3509
        }
3510
      }
3511
3512
    }
3513 13
3514
    return false;
3515
  }
3516 32
3517
  /**
3518 14
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3519
   *
3520
   * @see    http://hsivonen.iki.fi/php-utf8/
3521
   *
3522
   * @param    string $str The string to be checked.
3523
   *
3524
   * @return   bool
3525
   */
3526
  public static function is_utf8($str)
3527
  {
3528
    $str = (string)$str;
3529
3530
    if (!isset($str[0])) {
3531
      return true;
3532
    }
3533
3534
    if (self::pcre_utf8_support() !== true) {
3535
3536
      // If even just the first character can be matched, when the /u
3537
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3538
      // invalid, nothing at all will match, even if the string contains
3539
      // some valid sequences
3540
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3541
3542
    } else {
3543
3544
      $mState = 0; // cached expected number of octets after the current octet
3545
      // until the beginning of the next UTF8 character sequence
3546
      $mUcs4 = 0; // cached Unicode character
3547
      $mBytes = 1; // cached expected number of octets in the current sequence
3548
      $len = strlen($str);
3549
3550
      /** @noinspection ForeachInvariantsInspection */
3551
      for ($i = 0; $i < $len; $i++) {
3552
        $in = ord($str[$i]);
3553
        if ($mState === 0) {
3554
          // When mState is zero we expect either a US-ASCII character or a
3555
          // multi-octet sequence.
3556
          if (0 === (0x80 & $in)) {
3557
            // US-ASCII, pass straight through.
3558 2
            $mBytes = 1;
3559 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3560 2
            // First octet of 2 octet sequence.
3561
            $mUcs4 = $in;
3562 2
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3563 2
            $mState = 1;
3564 2
            $mBytes = 2;
3565
          } elseif (0xE0 === (0xF0 & $in)) {
3566
            // First octet of 3 octet sequence.
3567
            $mUcs4 = $in;
3568 2
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3569
            $mState = 2;
3570
            $mBytes = 3;
3571 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3572
            // First octet of 4 octet sequence.
3573
            $mUcs4 = $in;
3574
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3575
            $mState = 3;
3576
            $mBytes = 4;
3577
          } elseif (0xF8 === (0xFC & $in)) {
3578
            /* First octet of 5 octet sequence.
3579
            *
3580
            * This is illegal because the encoded codepoint must be either
3581
            * (a) not the shortest form or
3582
            * (b) outside the Unicode range of 0-0x10FFFF.
3583
            * Rather than trying to resynchronize, we will carry on until the end
3584
            * of the sequence and let the later error handling code catch it.
3585
            */
3586
            $mUcs4 = $in;
3587
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3588
            $mState = 4;
3589
            $mBytes = 5;
3590 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3591
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3592
            $mUcs4 = $in;
3593
            $mUcs4 = ($mUcs4 & 1) << 30;
3594
            $mState = 5;
3595
            $mBytes = 6;
3596
          } else {
3597
            /* Current octet is neither in the US-ASCII range nor a legal first
3598
             * octet of a multi-octet sequence.
3599
             */
3600
            return false;
3601
          }
3602
        } else {
3603
          // When mState is non-zero, we expect a continuation of the multi-octet
3604
          // sequence
3605
          if (0x80 === (0xC0 & $in)) {
3606
            // Legal continuation.
3607 1
            $shift = ($mState - 1) * 6;
3608
            $tmp = $in;
3609 1
            $tmp = ($tmp & 0x0000003F) << $shift;
3610
            $mUcs4 |= $tmp;
3611 1
            /**
3612
             * End of the multi-octet sequence. mUcs4 now contains the final
3613
             * Unicode code point to be output
3614 1
             */
3615
            if (0 === --$mState) {
3616
              /*
3617 1
              * Check for illegal sequences and code points.
3618
              */
3619
              // From Unicode 3.1, non-shortest form is illegal
3620
              if (
3621
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3622
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3623
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3624
                  (4 < $mBytes) ||
3625
                  // From Unicode 3.2, surrogate characters are illegal.
3626
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3627 6
                  // Code points outside the Unicode range are illegal.
3628
                  ($mUcs4 > 0x10FFFF)
3629 6
              ) {
3630
                return false;
3631
              }
3632
              // initialize UTF8 cache
3633
              $mState = 0;
3634
              $mUcs4 = 0;
3635
              $mBytes = 1;
3636
            }
3637
          } else {
3638
            /**
3639
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3640
             * Incomplete multi-octet sequence.
3641
             */
3642 24
            return false;
3643
          }
3644 24
        }
3645
      }
3646 24
3647 2
      return true;
3648
    }
3649
  }
3650 23
3651
  /**
3652 23
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3653
   * Decodes a JSON string
3654
   *
3655
   * @link http://php.net/manual/en/function.json-decode.php
3656
   *
3657
   * @param string $json    <p>
3658
   *                        The <i>json</i> string being decoded.
3659
   *                        </p>
3660
   *                        <p>
3661
   *                        This function only works with UTF-8 encoded strings.
3662 1
   *                        </p>
3663
   *                        <p>PHP implements a superset of
3664 1
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3665
   *                        only supports these values when they are nested inside an array or an object.
3666
   *                        </p>
3667
   * @param bool   $assoc   [optional] <p>
3668 1
   *                        When <b>TRUE</b>, returned objects will be converted into
3669
   *                        associative arrays.
3670
   *                        </p>
3671
   * @param int    $depth   [optional] <p>
3672
   *                        User specified recursion depth.
3673
   *                        </p>
3674
   * @param int    $options [optional] <p>
3675
   *                        Bitmask of JSON decode options. Currently only
3676
   *                        <b>JSON_BIGINT_AS_STRING</b>
3677
   *                        is supported (default is to cast large integers as floats)
3678
   *                        </p>
3679 1
   *
3680
   * @return mixed the value encoded in <i>json</i> in appropriate
3681 1
   * PHP type. Values true, false and
3682 1
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3683 1
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3684
   * <i>json</i> cannot be decoded or if the encoded
3685 1
   * data is deeper than the recursion limit.
3686
   */
3687
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3688
  {
3689
    $json = self::filter($json);
3690
3691
    if (Bootup::is_php('5.4') === true) {
3692
      $json = json_decode($json, $assoc, $depth, $options);
3693
    } else {
3694 2
      $json = json_decode($json, $assoc, $depth);
3695
    }
3696 2
3697
    return $json;
3698 2
  }
3699 2
3700 2
  /**
3701
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3702 2
   * Returns the JSON representation of a value
3703
   *
3704
   * @link http://php.net/manual/en/function.json-encode.php
3705
   *
3706
   * @param mixed $value   <p>
3707
   *                       The <i>value</i> being encoded. Can be any type except
3708
   *                       a resource.
3709
   *                       </p>
3710
   *                       <p>
3711
   *                       All string data must be UTF-8 encoded.
3712 1
   *                       </p>
3713
   *                       <p>PHP implements a superset of
3714 1
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3715
   *                       only supports these values when they are nested inside an array or an object.
3716
   *                       </p>
3717
   * @param int   $options [optional] <p>
3718 1
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3719
   *                       <b>JSON_HEX_TAG</b>,
3720
   *                       <b>JSON_HEX_AMP</b>,
3721
   *                       <b>JSON_HEX_APOS</b>,
3722
   *                       <b>JSON_NUMERIC_CHECK</b>,
3723
   *                       <b>JSON_PRETTY_PRINT</b>,
3724
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3725
   *                       <b>JSON_FORCE_OBJECT</b>,
3726
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3727
   *                       constants is described on
3728 13
   *                       the JSON constants page.
3729
   *                       </p>
3730 13
   * @param int   $depth   [optional] <p>
3731
   *                       Set the maximum depth. Must be greater than zero.
3732 13
   *                       </p>
3733
   *
3734
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3735 13
   */
3736 13
  public static function json_encode($value, $options = 0, $depth = 512)
3737 13
  {
3738 13
    $value = self::filter($value);
3739 13
3740 13
    if (Bootup::is_php('5.5')) {
3741 13
      $json = json_encode($value, $options, $depth);
3742 13
    } else {
3743 13
      $json = json_encode($value, $options);
3744 13
    }
3745 13
3746 13
    return $json;
3747 13
  }
3748 13
3749
  /**
3750 13
   * Makes string's first char lowercase.
3751 2
   *
3752
   * @param    string $str The input string
3753
   *
3754 13
   * @return   string The resulting string
3755
   */
3756
  public static function lcfirst($str)
3757
  {
3758
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3759
  }
3760
3761
  /**
3762
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3763
   *
3764 2
   * WARNING: This is much slower then "ltrim()" !!!!
3765
   *
3766 2
   * @param    string $str   The string to be trimmed
3767 2
   * @param    string $chars Optional characters to be stripped
3768
   *
3769 2
   * @return   string The string with unwanted characters stripped from the left
3770 1
   */
3771 1 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3772 1
  {
3773
    $str = (string)$str;
3774 2
3775
    if (!isset($str[0])) {
3776
      return '';
3777
    }
3778
3779
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3780
3781
    return preg_replace("/^{$chars}+/u", '', $str);
3782
  }
3783
3784
  /**
3785
   * Returns the UTF-8 character with the maximum code point in the given data.
3786 8
   *
3787
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3788 8
   *
3789 8
   * @return   string The character with the highest code point than others.
3790
   */
3791 8 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3792
  {
3793 8
    if (is_array($arg)) {
3794
      $arg = implode($arg);
3795 2
    }
3796
3797 2
    return self::chr(max(self::codepoints($arg)));
3798
  }
3799 1
3800 1
  /**
3801
   * Calculates and returns the maximum number of bytes taken by any
3802 2
   * UTF-8 encoded character in the given string.
3803 2
   *
3804
   * @param    string $str The original Unicode string.
3805 8
   *
3806 8
   * @return   int An array of byte lengths of each character.
3807 1
   */
3808 1
  public static function max_chr_width($str)
3809
  {
3810 8
    $bytes = self::chr_size_list($str);
3811 8
    if (count($bytes) > 0) {
3812
      return (int)max($bytes);
3813 8
    } else {
3814
      return 0;
3815
    }
3816
  }
3817
3818
  /**
3819
   * checks whether mbstring is available on the server
3820
   *
3821
   * @return   bool True if available, False otherwise
3822
   */
3823
  public static function mbstring_loaded()
3824
  {
3825
    $return = extension_loaded('mbstring');
3826 1
3827
    if ($return === true) {
3828 1
      \mb_internal_encoding('UTF-8');
3829 1
    }
3830
3831
    return $return;
3832
  }
3833
3834
  /**
3835
   * Returns the UTF-8 character with the minimum code point in the given data.
3836
   *
3837
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3838
   *
3839
   * @return   string The character with the lowest code point than others.
3840
   */
3841 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3842 1
  {
3843
    if (is_array($arg)) {
3844 1
      $arg = implode($arg);
3845
    }
3846
3847
    return self::chr(min(self::codepoints($arg)));
3848
  }
3849
3850
  /**
3851
   * Normalize the encoding-name input.
3852
   *
3853
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3854
   *
3855 15
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3856
   */
3857 15
  public static function normalizeEncoding($encoding)
3858 2
  {
3859
    static $staticNormalizeEncodingCache = array();
3860
3861 14
    if (!$encoding) {
3862 14
      return $encoding;
3863
    }
3864 14
3865 2
    if (in_array($encoding, self::$iconvEncoding, true)) {
3866
      return $encoding;
3867
    }
3868 13
3869 7
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3870
      return $staticNormalizeEncodingCache[$encoding];
3871
    }
3872 12
3873 8
    $encodingOrig = $encoding;
3874
    $encoding = strtoupper($encoding);
3875
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3876 10
3877
    $equivalences = array(
3878
        'ISO88591'    => 'ISO-8859-1',
3879
        'ISO8859'     => 'ISO-8859-1',
3880
        'ISO'         => 'ISO-8859-1',
3881
        'LATIN1'      => 'ISO-8859-1',
3882
        'LATIN'       => 'ISO-8859-1',
3883
        'UTF16'       => 'UTF-16',
3884
        'UTF32'       => 'UTF-32',
3885
        'UTF8'        => 'UTF-8',
3886
        'UTF'         => 'UTF-8',
3887
        'UTF7'        => 'UTF-7',
3888
        'WIN1252'     => 'ISO-8859-1',
3889
        'WINDOWS1252' => 'ISO-8859-1',
3890
        '8BIT'        => 'CP850',
3891
        'BINARY'      => 'CP850',
3892
    );
3893
3894
    if (!empty($equivalences[$encodingUpperHelper])) {
3895
      $encoding = $equivalences[$encodingUpperHelper];
3896
    }
3897 1
3898
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3899
3900 1
    return $encoding;
3901
  }
3902 1
3903
  /**
3904 1
   * Normalize MS Word special characters.
3905 1
   *
3906
   * @param string $str The string to be normalized.
3907
   *
3908
   * @return string
3909
   */
3910
  public static function normalize_msword($str)
3911
  {
3912 33
    static $utf8MSWordKeys = null;
3913
    static $utf8MSWordValues = null;
3914
3915 33
    if ($utf8MSWordKeys === null) {
3916
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3917
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3918
    }
3919
3920
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3921
  }
3922
3923
  /**
3924
   * Normalize the whitespace.
3925
   *
3926 1
   * @param string $str                     The string to be normalized.
3927
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3928 1
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3929 1
   *
3930
   * @return string
3931
   */
3932 1
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3933
  {
3934 1
    static $whitespaces = array();
3935
    static $bidiUniCodeControls = null;
3936
3937 1
    $cacheKey = (int)$keepNonBreakingSpace;
3938
3939
    if (!isset($whitespaces[$cacheKey])) {
3940 1
3941
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3942
3943
      if ($keepNonBreakingSpace === true) {
3944 1
        /** @noinspection OffsetOperationsInspection */
3945
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3946 1
      }
3947
3948
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3949 1
    }
3950
3951
    if ($keepBidiUnicodeControls === false) {
3952 1
      if ($bidiUniCodeControls === null) {
3953
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3954
      }
3955
3956 1
      $str = str_replace($bidiUniCodeControls, '', $str);
3957
    }
3958 1
3959 1
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3960 1
  }
3961 1
3962 1
  /**
3963
   * Format a number with grouped thousands.
3964
   *
3965
   * @param float  $number
3966
   * @param int    $decimals
3967
   * @param string $dec_point
3968
   * @param string $thousands_sep
3969
   *
3970
   * @return string
3971
   */
3972
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3973
  {
3974
    $thousands_sep = (string)$thousands_sep;
3975 7
    $dec_point = (string)$dec_point;
3976
3977 7
    if (
3978
        isset($thousands_sep[1], $dec_point[1])
3979
        &&
3980 7
        Bootup::is_php('5.4') === true
3981 2
    ) {
3982 2
      return str_replace(
3983 7
          array(
3984
              '.',
3985 7
              ',',
3986
          ),
3987
          array(
3988 3
              $dec_point,
3989 1
              $thousands_sep,
3990 1
          ),
3991
          number_format($number, $decimals, '.', ',')
3992
      );
3993
    }
3994 3
3995 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3996 1
  }
3997 3
3998
  /**
3999 7
   * Calculates Unicode code point of the given UTF-8 encoded character.
4000
   *
4001
   * @param    string $s The character of which to calculate code point.
4002 3
   *
4003 1
   * @return   int Unicode code point of the given character,<br />
4004 1
   *           0 on invalid UTF-8 byte sequence.
4005
   */
4006
  public static function ord($s)
4007
  {
4008 3
    if (!$s && $s !== '0') {
4009 1
      return 0;
4010 1
    }
4011 3
4012
    // init
4013 7
    self::checkForSupport();
4014
4015
    if (self::$support['intlChar'] === true) {
4016
      $tmpReturn = \IntlChar::ord($s);
4017
      if ($tmpReturn) {
4018
        return $tmpReturn;
4019
      }
4020
    }
4021
4022
    $s = unpack('C*', substr($s, 0, 4));
4023
    $a = $s ? $s[1] : 0;
4024 1
4025
    if (0xF0 <= $a && isset($s[4])) {
4026 1
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
4027 1
    }
4028 1
4029
    if (0xE0 <= $a && isset($s[3])) {
4030 1
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
4031 1
    }
4032 1
4033 1
    if (0xC0 <= $a && isset($s[2])) {
4034 1
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
4035
    }
4036 1
4037
    return $a;
4038
  }
4039
4040
  /**
4041
   * Parses the string into variables.
4042
   *
4043
   * WARNING: This differs from parse_str() by returning the results
4044
   *    instead of placing them in the local scope!
4045
   *
4046
   * @link http://php.net/manual/en/function.parse-str.php
4047
   *
4048
   * @param string $str     <p>
4049
   *                        The input string.
4050
   *                        </p>
4051
   * @param array  $result  <p>
4052 36
   *                        If the second parameter arr is present,
4053
   *                        variables are stored in this variable as array elements instead.
4054
   *                        </p>
4055 36
   *
4056
   * @return void
4057
   */
4058
  public static function parse_str($str, &$result)
4059 36
  {
4060 36
    // init
4061 36
    self::checkForSupport();
4062 36
4063
    $str = self::filter($str);
4064 36
4065
    \mb_parse_str($str, $result);
4066
  }
4067 36
4068 36
  /**
4069
   * checks if \u modifier is available that enables Unicode support in PCRE.
4070 36
   *
4071
   * @return   bool True if support is available, false otherwise
4072
   */
4073
  public static function pcre_utf8_support()
4074
  {
4075
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4076
    return (bool)@preg_match('//u', '');
4077
  }
4078
4079
  /**
4080
   * Create an array containing a range of UTF-8 characters.
4081 36
   *
4082
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4083 36
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4084
   *
4085 36
   * @return   array
4086 36
   */
4087 36
  public static function range($var1, $var2)
4088
  {
4089 36
    if (!$var1 || !$var2) {
4090 36
      return array();
4091 36
    }
4092
4093 36 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4094
      $start = (int)$var1;
4095
    } elseif (ctype_xdigit($var1)) {
4096
      $start = (int)self::hex_to_int($var1);
4097
    } else {
4098
      $start = self::ord($var1);
4099
    }
4100
4101
    if (!$start) {
4102
      return array();
4103
    }
4104
4105 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4106 23
      $end = (int)$var2;
4107
    } elseif (ctype_xdigit($var2)) {
4108 23
      $end = (int)self::hex_to_int($var2);
4109
    } else {
4110 23
      $end = self::ord($var2);
4111 5
    }
4112
4113
    if (!$end) {
4114 19
      return array();
4115
    }
4116 19
4117
    return array_map(
4118
        array(
4119
            '\\voku\\helper\\UTF8',
4120
            'chr',
4121
        ),
4122
        range($start, $end)
4123
    );
4124
  }
4125
4126
  /**
4127 40
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4128
   *
4129 40
   * @param string $str
4130
   *
4131 40
   * @return string
4132
   */
4133 40
  public static function removeBOM($str = '')
4134 30
  {
4135
    // INFO: https://en.wikipedia.org/wiki/Byte_order_mark
4136
4137 16
    if (0 === strpos($str, "\xef\xbb\xbf")) { // UTF-8 BOM
4138
      $str = substr($str, 3);
4139 16
    } elseif (0 === strpos($str, '')) { // UTF-8 BOM as "WINDOWS-1252"
4140 15
      $str = substr($str, 6); // INFO: one char has (maybe) more then one byte ...
4141
    } elseif (0 === strpos($str, "\x00\x00\xfe\xff")) { // UTF-32 (BE) BOM
4142 15
      $str = substr($str, 4);
4143 14
    } elseif (0 === strpos($str, "\xff\xfe\x00\x00")) { // UTF-32 (LE) BOM
4144 15
      $str = substr($str, 4);
4145 1
    } elseif (0 === strpos($str, "\xfe\xff")) { // UTF-16 (BE) BOM
4146 1
      $str = substr($str, 2);
4147
    } elseif (0 === strpos($str, 'þÿ')) { // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4148
      $str = substr($str, 4);
4149 16
    } elseif (0 === strpos($str, "\xff\xfe")) { // UTF-16 (LE) BOM
4150
      $str = substr($str, 2);
4151 16
    } elseif (0 === strpos($str, 'ÿþ')) { // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4152
      $str = substr($str, 4);
4153 16
    }
4154 16
4155 16
    return $str;
4156
  }
4157
4158
  /**
4159 16
   * Removes duplicate occurrences of a string in another string.
4160
   *
4161 16
   * @param    string       $str  The base string
4162
   * @param    string|array $what String to search for in the base string
4163
   *
4164
   * @return   string The result string with removed duplicates
4165
   */
4166
  public static function remove_duplicates($str, $what = ' ')
4167
  {
4168
    if (is_string($what)) {
4169
      $what = array($what);
4170
    }
4171
4172
    if (is_array($what)) {
4173
      foreach ($what as $item) {
4174
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4175
      }
4176
    }
4177
4178
    return $str;
4179
  }
4180
4181 2
  /**
4182
   * Remove Invisible Characters
4183 2
   *
4184 1
   * This prevents sandwiching null characters
4185
   * between ascii characters, like Java\0script.
4186
   *
4187 2
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4188
   *
4189
   * @param  string $str
4190
   * @param  bool   $url_encoded
4191
   * @param  string $replacement
4192
   *
4193
   * @return  string
4194
   */
4195
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4196
  {
4197
    // init
4198
    $non_displayables = array();
4199 25
4200
    // every control character except newline (dec 10),
4201 25
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4202
    if ($url_encoded) {
4203 25
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4204 5
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4205
    }
4206
4207
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4208 24
4209 24
    do {
4210 24
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4211
    } while ($count !== 0);
4212 24
4213
    return $str;
4214 24
  }
4215
4216
  /**
4217
   * replace diamond question mark (�)
4218 24
   *
4219 24
   * @param string $str
4220 24
   * @param string $unknown
4221 24
   *
4222 24
   * @return string
4223
   */
4224 24
  public static function replace_diamond_question_mark($str, $unknown = '?')
4225
  {
4226
    return str_replace(
4227
        array(
4228
            "\xEF\xBF\xBD",
4229
            '�',
4230
        ),
4231
        array(
4232
            $unknown,
4233
            $unknown,
4234
        ),
4235
        $str
4236
    );
4237
  }
4238
4239
  /**
4240
   * Strip whitespace or other characters from end of a UTF-8 string.
4241
   *
4242
   * WARNING: This is much slower then "rtrim()" !!!!
4243
   *
4244
   * @param    string $str   The string to be trimmed
4245
   * @param    string $chars Optional characters to be stripped
4246
   *
4247
   * @return   string The string with unwanted characters stripped from the right
4248
   */
4249 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4250
  {
4251
    $str = (string)$str;
4252
4253
    if (!isset($str[0])) {
4254
      return '';
4255
    }
4256 24
4257 5
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4258
4259 5
    return preg_replace("/{$chars}+$/u", '', $str);
4260 5
  }
4261
4262 24
  /**
4263
   * rxClass
4264
   *
4265
   * @param string $s
4266 24
   * @param string $class
4267
   *
4268
   * @return string
4269
   */
4270
  protected static function rxClass($s, $class = '')
4271
  {
4272
    static $rxClassCache = array();
4273
4274
    $cacheKey = $s . $class;
4275
4276
    if (isset($rxClassCache[$cacheKey])) {
4277 3
      return $rxClassCache[$cacheKey];
4278
    }
4279
4280
    $class = array($class);
4281
4282
    /** @noinspection SuspiciousLoopInspection */
4283
    foreach (self::str_split($s) as $s) {
4284 3
      if ('-' === $s) {
4285 2
        $class[0] = '-' . $class[0];
4286 1
      } elseif (!isset($s[2])) {
4287 2
        $class[0] .= preg_quote($s, '/');
4288 1
      } elseif (1 === self::strlen($s)) {
4289 2
        $class[0] .= $s;
4290
      } else {
4291 2
        $class[] = $s;
4292
      }
4293
    }
4294 2
4295
    $class[0] = '[' . $class[0] . ']';
4296
4297
    if (1 === count($class)) {
4298
      $return = $class[0];
4299
    } else {
4300 3
      $return = '(?:' . implode('|', $class) . ')';
4301 1
    }
4302
4303
    $rxClassCache[$cacheKey] = $return;
4304
4305
    return $return;
4306
  }
4307
4308
  /**
4309
   * Echo native UTF8-Support libs, e.g. for debugging.
4310 3
   */
4311 3
  public static function showSupport()
4312 3
  {
4313 3
    foreach (self::$support as $utf8Support) {
4314 3
      echo $utf8Support . "\n<br>";
4315 3
    }
4316 3
  }
4317 3
4318
  /**
4319
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4320 3
   *
4321 3
   * @param    string $chr            The Unicode character to be encoded as numbered entity.
4322 3
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4323 3
   *
4324
   * @return   string The HTML numbered entity.
4325
   */
4326
  public static function single_chr_html_encode($chr, $keepAsciiChars = false)
4327
  {
4328
    if (!$chr) {
4329
      return '';
4330
    }
4331
4332
    if ($keepAsciiChars === true) {
4333
      if (self::isAscii($chr) === true) {
4334
        return $chr;
4335
      }
4336
    }
4337
4338
    return '&#' . self::ord($chr) . ';';
4339
  }
4340
4341
  /**
4342
   * Convert a string to an array of Unicode characters.
4343
   *
4344
   * @param    string  $str       The string to split into array.
4345
   * @param    int     $length    Max character length of each array element.
4346
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4347
   *
4348
   * @return   array An array containing chunks of the string.
4349
   */
4350
  public static function split($str, $length = 1, $cleanUtf8 = false)
4351
  {
4352
    $str = (string)$str;
4353 13
4354
    if (!isset($str[0])) {
4355 13
      return array();
4356
    }
4357
4358 13
    // init
4359 13
    self::checkForSupport();
4360 1
    $str = (string)$str;
4361 1
    $ret = array();
4362 12
4363
    if (self::$support['pcre_utf8'] === true) {
4364 13
4365
      if ($cleanUtf8 === true) {
4366 13
        $str = self::clean($str);
4367 13
      }
4368
4369 13
      preg_match_all('/./us', $str, $retArray);
4370
      if (isset($retArray[0])) {
4371
        $ret = $retArray[0];
4372
      }
4373
      unset($retArray);
4374
4375
    } else {
4376
4377
      // fallback
4378
4379
      $len = strlen($str);
4380
4381 1
      /** @noinspection ForeachInvariantsInspection */
4382
      for ($i = 0; $i < $len; $i++) {
4383 1
        if (($str[$i] & "\x80") === "\x00") {
4384
          $ret[] = $str[$i];
4385
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4386
          if (($str[$i + 1] & "\xC0") === "\x80") {
4387 1
            $ret[] = $str[$i] . $str[$i + 1];
4388
4389 1
            $i++;
4390
          }
4391 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4392
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4393 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4394 1
4395
            $i += 2;
4396
          }
4397 1
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4398 1 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4399 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4400 1
4401
            $i += 3;
4402 1
          }
4403
        }
4404
      }
4405 1
    }
4406
4407
    if ($length > 1) {
4408 1
      $ret = array_chunk($ret, $length);
4409
4410
      $ret = array_map('implode', $ret);
4411
    }
4412
4413
    if (isset($ret[0]) && $ret[0] === '') {
4414
      return array();
4415
    }
4416
4417
    return $ret;
4418
  }
4419
4420
  /**
4421 2
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4422
   *
4423 2
   * @param string $str
4424
   *
4425 2
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4426 2
   *                      otherwise it will return false.
4427
   */
4428 2
  public static function str_detect_encoding($str)
4429
  {
4430
4431 2
    //
4432 2
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4433 2
    //
4434 2
4435 2
    if (self::is_binary($str)) {
4436
      if (self::is_utf16($str) === 1) {
4437 2
        return 'UTF-16LE';
4438 2
      } elseif (self::is_utf16($str) === 2) {
4439 2
        return 'UTF-16BE';
4440 2
      } elseif (self::is_utf32($str) === 1) {
4441 2
        return 'UTF-32LE';
4442 2
      } elseif (self::is_utf32($str) === 2) {
4443
        return 'UTF-32BE';
4444 2
      }
4445 2
    }
4446 2
4447 2
    //
4448 2
    // 2.) simple check for ASCII chars
4449 2
    //
4450
4451 2
    if (self::is_ascii($str) === true) {
4452
      return 'ASCII';
4453
    }
4454 2
4455
    //
4456
    // 3.) simple check for UTF-8 chars
4457
    //
4458
4459
    if (self::is_utf8($str) === true) {
4460
      return 'UTF-8';
4461
    }
4462
4463
    //
4464
    // 4.) check via "\mb_detect_encoding()"
4465
    //
4466
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4467
4468
    $detectOrder = array(
4469
        'windows-1251',
4470
        'ISO-8859-1',
4471
        'ASCII',
4472
        'UTF-8',
4473
    );
4474
4475 1
    self::checkForSupport();
4476
4477 1
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4478
    if ($encoding) {
4479 1
      return $encoding;
4480
    }
4481
4482
    //
4483
    // 5.) check via "iconv()"
4484
    //
4485
4486
    $md5 = md5($str);
4487
    foreach (self::$iconvEncoding as $encodingTmp) {
4488
      # INFO: //IGNORE and //TRANSLIT still throw notice
4489
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4490
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4491
        return $encodingTmp;
4492
      }
4493
    }
4494
4495
    return false;
4496
  }
4497
4498
  /**
4499
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4500
   *
4501
   * @link  http://php.net/manual/en/function.str-ireplace.php
4502
   *
4503
   * @param mixed $search  <p>
4504
   *                       Every replacement with search array is
4505
   *                       performed on the result of previous replacement.
4506
   *                       </p>
4507
   * @param mixed $replace <p>
4508
   *                       </p>
4509
   * @param mixed $subject <p>
4510
   *                       If subject is an array, then the search and
4511
   *                       replace is performed with every entry of
4512 12
   *                       subject, and the return value is an array as
4513
   *                       well.
4514 12
   *                       </p>
4515
   * @param int   $count   [optional] <p>
4516
   *                       The number of matched and replaced needles will
4517
   *                       be returned in count which is passed by
4518
   *                       reference.
4519
   *                       </p>
4520
   *
4521
   * @return mixed a string or an array of replacements.
4522
   * @since 5.0
4523
   */
4524
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4525
  {
4526
    $search = (array)$search;
4527
4528
    /** @noinspection AlterInForeachInspection */
4529
    foreach ($search as &$s) {
4530
      if ('' === $s .= '') {
4531
        $s = '/^(?<=.)$/';
4532
      } else {
4533
        $s = '/' . preg_quote($s, '/') . '/ui';
4534
      }
4535
    }
4536
4537
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4538
    $count = $replace;
4539
4540
    return $subject;
4541
  }
4542 1
4543
  /**
4544 1
   * Limit the number of characters in a string, but also after the next word.
4545
   *
4546 1
   * @param  string $str
4547 1
   * @param  int    $length
4548 1
   * @param  string $strAddOn
4549
   *
4550 1
   * @return string
4551 1
   */
4552 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4553 1
  {
4554
    $str = (string)$str;
4555
4556 1
    if (!isset($str[0])) {
4557
      return '';
4558
    }
4559
4560
    $length = (int)$length;
4561
4562
    if (self::strlen($str) <= $length) {
4563
      return $str;
4564
    }
4565
4566
    if (self::substr($str, $length - 1, 1) === ' ') {
4567 17
      return self::substr($str, 0, $length - 1) . $strAddOn;
4568
    }
4569
4570 17
    $str = self::substr($str, 0, $length);
4571
    $array = explode(' ', $str);
4572 17
    array_pop($array);
4573
    $new_str = implode(' ', $array);
4574
4575
    if ($new_str === '') {
4576
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4577
    } else {
4578 17
      $str = $new_str . $strAddOn;
4579 17
    }
4580 17
4581 17
    return $str;
4582 17
  }
4583 16
4584 16
  /**
4585 17
   * Pad a UTF-8 string to given length with another string.
4586
   *
4587
   * @param    string $input      The input string
4588
   * @param    int    $pad_length The length of return string
4589
   * @param    string $pad_string String to use for padding the input string
4590 17
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4591 17
   *
4592
   * @return   string Returns the padded string
4593
   */
4594 1
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4595 1
  {
4596
    $input_length = self::strlen($input);
4597
4598 1
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4599 1
      $ps_length = self::strlen($pad_string);
4600 1
4601 1
      $diff = $pad_length - $input_length;
4602 1
4603
      switch ($pad_type) {
4604 1 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4605
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4606 1
          $pre = self::substr($pre, 0, $diff);
4607
          $post = '';
4608
          break;
4609
4610
        case STR_PAD_BOTH:
4611
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4612
          $pre = self::substr($pre, 0, (int)$diff / 2);
4613
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4614
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4615
          break;
4616 1
4617
        case STR_PAD_RIGHT:
4618 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4619
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4620 1
          $post = self::substr($post, 0, $diff);
4621
          $pre = '';
4622
      }
4623
4624
      return $pre . $input . $post;
4625 1
    }
4626 1
4627
    return $input;
4628
  }
4629 1
4630 1
  /**
4631 1
   * Repeat a string.
4632
   *
4633 1
   * @param string $input      <p>
4634
   *                           The string to be repeated.
4635
   *                           </p>
4636
   * @param int    $multiplier <p>
4637
   *                           Number of time the input string should be
4638
   *                           repeated.
4639
   *                           </p>
4640
   *                           <p>
4641
   *                           multiplier has to be greater than or equal to 0.
4642
   *                           If the multiplier is set to 0, the function
4643
   *                           will return an empty string.
4644
   *                           </p>
4645
   *
4646
   * @return string the repeated string.
4647
   */
4648
  public static function str_repeat($input, $multiplier)
4649
  {
4650
    $input = self::filter($input);
4651
4652
    return str_repeat($input, $multiplier);
4653
  }
4654 8
4655
  /**
4656 8
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4657
   *
4658 8
   * (PHP 4, PHP 5)<br/>
4659
   * Replace all occurrences of the search string with the replacement string
4660 8
   *
4661 2
   * @link http://php.net/manual/en/function.str-replace.php
4662
   *
4663
   * @param mixed $search  <p>
4664 7
   *                       The value being searched for, otherwise known as the needle.
4665
   *                       An array may be used to designate multiple needles.
4666 7
   *                       </p>
4667 7
   * @param mixed $replace <p>
4668 7
   *                       The replacement value that replaces found search
4669
   *                       values. An array may be used to designate multiple replacements.
4670 7
   *                       </p>
4671
   * @param mixed $subject <p>
4672 7
   *                       The string or array being searched and replaced on,
4673 6
   *                       otherwise known as the haystack.
4674
   *                       </p>
4675
   *                       <p>
4676 4
   *                       If subject is an array, then the search and
4677
   *                       replace is performed with every entry of
4678
   *                       subject, and the return value is an array as
4679 4
   *                       well.
4680 4
   *                       </p>
4681 4
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4682
   *
4683 4
   * @return mixed This function returns a string or an array with the replaced values.
4684 3
   */
4685
  public static function str_replace($search, $replace, $subject, &$count = null)
4686 3
  {
4687 3
    return str_replace($search, $replace, $subject, $count);
4688 3
  }
4689
4690 3
  /**
4691 1
   * Shuffles all the characters in the string.
4692
   *
4693 1
   * @param    string $str The input string
4694 1
   *
4695 1
   * @return   string The shuffled string.
4696
   */
4697 1
  public static function str_shuffle($str)
4698
  {
4699
    $array = self::split($str);
4700
4701
    shuffle($array);
4702
4703
    return implode('', $array);
4704
  }
4705
4706
  /**
4707
   * Sort all characters according to code points.
4708
   *
4709
   * @param    string $str    A UTF-8 string.
4710
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4711
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4712 1
   *
4713 3
   * @return   string String of sorted characters
4714
   */
4715 4
  public static function str_sort($str, $unique = false, $desc = false)
4716
  {
4717
    $array = self::codepoints($str);
4718
4719
    if ($unique) {
4720 4
      $array = array_flip(array_flip($array));
4721
    }
4722
4723
    if ($desc) {
4724
      arsort($array);
4725 4
    } else {
4726 4
      asort($array);
4727 2
    }
4728 2
4729
    return self::string($array);
4730 2
  }
4731 2
4732 1
  /**
4733
   * Convert a string to an array.
4734 2
   *
4735
   * @param string $str
4736 4
   * @param int    $len
4737 4
   *
4738 4
   * @return array
4739 4
   */
4740 1
  public static function str_split($str, $len = 1)
4741
  {
4742 7
    // init
4743
    self::checkForSupport();
4744 7
    $len = (int)$len;
4745
4746
    if ($len < 1) {
4747
      return str_split($str, $len);
4748
    }
4749
4750
    if (self::$support['intl'] === true) {
4751
      $a = array();
4752
      $p = 0;
4753
      $l = strlen($str);
4754
      while ($p < $l) {
4755
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4756 1
      }
4757
    } else {
4758 1
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4759 1
      $a = $a[0];
4760 1
    }
4761 1
4762
    if ($len === 1) {
4763 1
      return $a;
4764
    }
4765
4766
    $arrayOutput = array();
4767 1
    $p = -1;
4768
4769
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4770
    foreach ($a as $l => $a) {
4771
      if ($l % $len) {
4772
        $arrayOutput[$p] .= $a;
4773
      } else {
4774
        $arrayOutput[++$p] = $a;
4775
      }
4776 1
    }
4777
4778
    return $arrayOutput;
4779 1
  }
4780
4781
  /**
4782
   * Get a binary representation of a specific character.
4783
   *
4784
   * @param   string $str The input character.
4785
   *
4786
   * @return  string
4787
   */
4788
  public static function str_to_binary($str)
4789
  {
4790 8
    $str = (string)$str;
4791
4792 8
    if (!isset($str[0])) {
4793
      return '';
4794
    }
4795
4796
    // init
4797
    $out = null;
4798
    $max = strlen($str);
4799
4800
    /** @noinspection ForeachInvariantsInspection */
4801
    for ($i = 0; $i < $max; ++$i) {
4802
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4803
    }
4804
4805 8
    return $out;
4806
  }
4807 8
4808 5
  /**
4809 5
   * US-ASCII transliterations of Unicode text.
4810 8
   *
4811
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4812
   * Warning: you should only pass this well formed UTF-8!
4813
   * Be aware it works by making a copy of the input string which it appends transliterated
4814
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4815
   * requiring up to the same amount again as the input string
4816
   *
4817
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4818
   *
4819
   * @author <[email protected]>
4820
   *
4821
   * @param string $str     UTF-8 string to convert
4822
   * @param string $unknown Character use if character unknown. (default is ?)
4823 5
   *
4824
   * @return string US-ASCII string
4825 5
   */
4826
  public static function str_transliterate($str, $unknown = '?')
4827
  {
4828
    static $UTF8_TO_ASCII;
4829 5
4830
    $str = (string)$str;
4831
4832 5
    if (!isset($str[0])) {
4833
      return '';
4834
    }
4835
4836 5
    $str = self::clean($str);
4837 5
4838
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4839
    $chars = $ar[0];
4840
    foreach ($chars as &$c) {
4841
4842
      $ordC0 = ord($c[0]);
4843
4844
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4845
        continue;
4846
      }
4847
4848
      $ordC1 = ord($c[1]);
4849
4850 2
      // ASCII - next please
4851
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4852 2
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4853 2
      }
4854
4855 2
      if ($ordC0 >= 224) {
4856 2
        $ordC2 = ord($c[2]);
4857 2
4858
        if ($ordC0 <= 239) {
4859 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4860 2
        }
4861
4862
        if ($ordC0 >= 240) {
4863
          $ordC3 = ord($c[3]);
4864
4865
          if ($ordC0 <= 247) {
4866
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4867
          }
4868
4869
          if ($ordC0 >= 248) {
4870 1
            $ordC4 = ord($c[4]);
4871
4872 1 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4873
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4874
            }
4875
4876
            if ($ordC0 >= 252) {
4877
              $ordC5 = ord($c[5]);
4878
4879 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4880
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4881
              }
4882
            }
4883
          }
4884
        }
4885
      }
4886
4887
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4888
        $c = $unknown;
4889
        continue;
4890
      }
4891
4892
      if (!isset($ord)) {
4893
        $c = $unknown;
4894 2
        continue;
4895
      }
4896
4897 2
      $bank = $ord >> 8;
4898
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4899 2
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4900
        if (file_exists($bankfile)) {
4901
          /** @noinspection PhpIncludeInspection */
4902
          require $bankfile;
4903
        } else {
4904
          $UTF8_TO_ASCII[$bank] = array();
4905
        }
4906
      }
4907
4908
      $newchar = $ord & 255;
4909
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4910
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4911
      } else {
4912
        $c = $unknown;
4913
      }
4914
    }
4915
4916
    return implode('', $chars);
4917
  }
4918
4919
  /**
4920
   * Counts number of words in the UTF-8 string.
4921
   *
4922
   * @param string $str    The input string.
4923
   * @param int    $format <strong>0</strong> => return a number of words<br />
4924
   *                       <strong>1</strong> => return an array of words
4925 8
   *                       <strong>2</strong> => return an array of words with word-offset as key
4926
   * @param string $charlist
4927 8
   *
4928 8
   * @return array|float The number of words in the string
4929
   */
4930 8
  public static function str_word_count($str, $format = 0, $charlist = '')
4931 2
  {
4932
    $charlist = self::rxClass($charlist, '\pL');
4933
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4934
4935 7
    $len = count($strParts);
4936
4937 7
    if ($format === 1) {
4938 1
4939 1
      $numberOfWords = array();
4940 1
      for ($i = 1; $i < $len; $i += 2) {
4941
        $numberOfWords[] = $strParts[$i];
4942
      }
4943 7
4944 1
    } elseif ($format === 2) {
4945 1
4946
      self::checkForSupport();
4947 7
4948
      $numberOfWords = array();
4949
      $offset = self::strlen($strParts[0]);
4950
      for ($i = 1; $i < $len; $i += 2) {
4951
        $numberOfWords[$offset] = $strParts[$i];
4952
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4953
      }
4954
4955
    } else {
4956
4957
      $numberOfWords = ($len - 1) / 2;
4958
4959 7
    }
4960
4961 7
    return $numberOfWords;
4962 2
  }
4963
4964
  /**
4965
   * Case-insensitive string comparison.
4966 5
   *
4967
   * @param string $str1
4968 5
   * @param string $str2
4969
   *
4970
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4971
   */
4972
  public static function strcasecmp($str1, $str2)
4973
  {
4974
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4975
  }
4976
4977
  /**
4978
   * String comparison.
4979
   *
4980
   * @param string $str1
4981
   * @param string $str2
4982
   *
4983
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4984
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4985 66
   *              <strong>0</strong> if they are equal.
4986
   */
4987 66
  public static function strcmp($str1, $str2)
4988
  {
4989 66
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4990 4
        \Normalizer::normalize($str1, \Normalizer::NFD),
4991
        \Normalizer::normalize($str2, \Normalizer::NFD)
4992
    );
4993
  }
4994 65
4995
  /**
4996
   * Find length of initial segment not matching mask.
4997 65
   *
4998
   * @param string $str
4999
   * @param string $charList
5000
   * @param int    $offset
5001 65
   * @param int    $length
5002
   *
5003
   * @return int|null
5004
   */
5005 65
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5006
  {
5007
    if ('' === $charList .= '') {
5008
      return null;
5009
    }
5010
5011
    if ($offset || 2147483647 !== $length) {
5012
      $str = (string)self::substr($str, $offset, $length);
5013
    } else {
5014
      $str = (string)$str;
5015
    }
5016
5017 1
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5018
      /** @noinspection OffsetOperationsInspection */
5019 1
      return self::strlen($length[1]);
5020
    } else {
5021
      return self::strlen($str);
5022
    }
5023
  }
5024
5025
  /**
5026
   * Makes a UTF-8 string from code points.
5027
   *
5028
   * @param    array $array Integer or Hexadecimal codepoints
5029
   *
5030
   * @return   string UTF-8 encoded string
5031 2
   */
5032
  public static function string($array)
5033 2
  {
5034
    return implode(
5035
        array_map(
5036
            array(
5037
                '\\voku\\helper\\UTF8',
5038
                'chr',
5039
            ),
5040
            $array
5041
        )
5042
    );
5043
  }
5044
5045
  /**
5046
   * Checks if string starts with "UTF-8 BOM" character.
5047
   *
5048
   * @param    string $str The input string.
5049
   *
5050
   * @return   bool True if the string has BOM at the start, False otherwise.
5051
   */
5052
  public static function string_has_bom($str)
5053
  {
5054
    return self::is_bom(substr($str, 0, 3));
5055
  }
5056
5057
  /**
5058
   * Strip HTML and PHP tags from a string.
5059
   *
5060
   * @link http://php.net/manual/en/function.strip-tags.php
5061
   *
5062
   * @param string $str            <p>
5063
   *                               The input string.
5064
   *                               </p>
5065
   * @param string $allowable_tags [optional] <p>
5066
   *                               You can use the optional second parameter to specify tags which should
5067
   *                               not be stripped.
5068
   *                               </p>
5069
   *                               <p>
5070
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5071
   *                               can not be changed with allowable_tags.
5072
   *                               </p>
5073
   *
5074
   * @return string the stripped string.
5075
   */
5076
  public static function strip_tags($str, $allowable_tags = null)
5077
  {
5078
    //clean broken utf8
5079
    $str = self::clean($str);
5080
5081
    return strip_tags($str, $allowable_tags);
5082
  }
5083
5084
  /**
5085
   * Finds position of first occurrence of a string within another, case insensitive.
5086
   *
5087
   * @link http://php.net/manual/en/function.mb-stripos.php
5088
   *
5089
   * @param string  $haystack  <p>
5090
   *                           The string from which to get the position of the first occurrence
5091
   *                           of needle
5092
   *                           </p>
5093
   * @param string  $needle    <p>
5094
   *                           The string to find in haystack
5095
   *                           </p>
5096
   * @param int     $offset    [optional] <p>
5097
   *                           The position in haystack
5098
   *                           to start searching
5099
   *                           </p>
5100
   * @param string  $encoding
5101
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5102
   *
5103 11
   * @return int Return the numeric position of the first occurrence of
5104
   * needle in the haystack
5105 11
   * string, or false if needle is not found.
5106 11
   */
5107
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5108 11
  {
5109 2
    $haystack = (string)$haystack;
5110
    $needle = (string)$needle;
5111
5112
    if (!isset($haystack[0], $needle[0])) {
5113 10
      return false;
5114 10
    }
5115
5116
    // init
5117
    self::checkForSupport();
5118 10
5119
    if ($cleanUtf8 === true) {
5120
      $haystack = self::clean($haystack);
5121
      $needle = self::clean($needle);
5122 10
    }
5123
5124
    // INFO: this is only a fallback for old versions
5125
    if ($encoding === true || $encoding === false) {
5126 1
      $encoding = 'UTF-8';
5127 1
    }
5128 1
5129
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5130 10
  }
5131
5132
  /**
5133 10
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5134 1
   *
5135 1
   * @param string $str
5136
   * @param string $needle
5137 10
   * @param bool   $before_needle
5138
   *
5139
   * @return false|string
5140
   */
5141
  public static function stristr($str, $needle, $before_needle = false)
5142
  {
5143
    if ('' === $needle .= '') {
5144
      return false;
5145
    }
5146
5147
    // init
5148
    self::checkForSupport();
5149
5150
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5151
  }
5152
5153
  /**
5154
   * Get the string length, not the byte-length!
5155
   *
5156
   * @link     http://php.net/manual/en/function.mb-strlen.php
5157
   *
5158
   * @param string  $str       The string being checked for length.
5159
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5160
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5161
   *
5162
   * @return int the number of characters in
5163
   *           string str having character encoding
5164
   *           encoding. A multi-byte character is
5165
   *           counted as 1.
5166
   */
5167
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5168
  {
5169
    $str = (string)$str;
5170
5171
    if (!isset($str[0])) {
5172
      return 0;
5173
    }
5174
5175
    // INFO: this is only a fallback for old versions
5176
    if ($encoding === true || $encoding === false) {
5177
      $encoding = 'UTF-8';
5178
    }
5179
5180
    $encoding = self::normalizeEncoding($encoding);
5181
5182
    switch ($encoding) {
5183
      case 'ASCII':
5184
      case 'CP850':
5185
        return strlen($str);
5186 1
    }
5187
5188 1
    self::checkForSupport();
5189
5190 1
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5191
      $str = self::clean($str);
5192
    }
5193
5194
    return \mb_strlen($str, $encoding);
5195
  }
5196
5197
  /**
5198
   * Case insensitive string comparisons using a "natural order" algorithm.
5199
   *
5200 4
   * @param string $str1
5201
   * @param string $str2
5202 4
   *
5203
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5204
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5205
   *             <strong>0</strong> if they are equal
5206
   */
5207
  public static function strnatcasecmp($str1, $str2)
5208
  {
5209
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5210
  }
5211
5212
  /**
5213
   * String comparisons using a "natural order" algorithm
5214
   *
5215
   * @link  http://php.net/manual/en/function.strnatcmp.php
5216
   *
5217
   * @param string $str1 <p>
5218
   *                     The first string.
5219
   *                     </p>
5220
   * @param string $str2 <p>
5221
   *                     The second string.
5222
   *                     </p>
5223
   *
5224
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5225
   * str1 is less than str2; &gt;
5226
   * 0 if str1 is greater than
5227
   * str2, and 0 if they are equal.
5228
   * @since 4.0
5229
   * @since 5.0
5230
   */
5231
  public static function strnatcmp($str1, $str2)
5232
  {
5233 1
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5234
  }
5235 1
5236
  /**
5237 1
   * Binary safe case-insensitive string comparison of the first n characters
5238
   *
5239
   * @link  http://php.net/manual/en/function.strncasecmp.php
5240
   *
5241
   * @param string $str1 <p>
5242
   *                     The first string.
5243
   *                     </p>
5244
   * @param string $str2 <p>
5245
   *                     The second string.
5246
   *                     </p>
5247
   * @param int    $len  <p>
5248
   *                     The length of strings to be used in the comparison.
5249 1
   *                     </p>
5250
   *
5251 1
   * @return int &lt; 0 if <i>str1</i> is less than
5252
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5253
   * greater than <i>str2</i>, and 0 if they are equal.
5254
   * @since 4.0.4
5255
   * @since 5.0
5256
   */
5257
  public static function strncasecmp($str1, $str2, $len)
5258
  {
5259
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5260
  }
5261
5262
  /**
5263
   * Binary safe string comparison of the first n characters
5264
   *
5265
   * @link  http://php.net/manual/en/function.strncmp.php
5266
   *
5267
   * @param string $str1 <p>
5268
   *                     The first string.
5269
   *                     </p>
5270
   * @param string $str2 <p>
5271
   *                     The second string.
5272
   *                     </p>
5273
   * @param int    $len  <p>
5274
   *                     Number of characters to use in the comparison.
5275
   *                     </p>
5276 10
   *
5277
   * @return int &lt; 0 if <i>str1</i> is less than
5278 10
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5279 10
   * is greater than <i>str2</i>, and 0 if they are
5280
   * equal.
5281 10
   * @since 4.0
5282 2
   * @since 5.0
5283
   */
5284
  public static function strncmp($str1, $str2, $len)
5285
  {
5286 9
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5287
  }
5288 9
5289
  /**
5290
   * Search a string for any of a set of characters
5291
   *
5292 9
   * @link  http://php.net/manual/en/function.strpbrk.php
5293 9
   *
5294
   * @param string $haystack  <p>
5295 9
   *                          The string where char_list is looked for.
5296
   *                          </p>
5297
   * @param string $char_list <p>
5298 1
   *                          This parameter is case sensitive.
5299 1
   *                          </p>
5300 1
   *
5301
   * @return string a string starting from the character found, or false if it is
5302 9
   * not found.
5303 9
   * @since 5.0
5304
   */
5305
  public static function strpbrk($haystack, $char_list)
5306
  {
5307
    $haystack = (string)$haystack;
5308
    $char_list = (string)$char_list;
5309
5310
    if (!isset($haystack[0], $char_list[0])) {
5311
      return false;
5312
    }
5313
5314
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5315
      return substr($haystack, strpos($haystack, $m[0]));
5316
    } else {
5317
      return false;
5318
    }
5319
  }
5320
5321
  /**
5322
   * Find position of first occurrence of string in a string.
5323
   *
5324
   * @link http://php.net/manual/en/function.mb-strpos.php
5325
   *
5326
   * @param string  $haystack     <p>
5327
   *                              The string being checked.
5328
   *                              </p>
5329
   * @param string  $needle       <p>
5330
   *                              The position counted from the beginning of haystack.
5331
   *                              </p>
5332
   * @param int     $offset       [optional] <p>
5333
   *                              The search offset. If it is not specified, 0 is used.
5334
   *                              </p>
5335
   * @param string  $encoding
5336
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5337
   *
5338
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5339 6
   *             If needle is not found it returns false.
5340
   */
5341 6
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5342
  {
5343
    $haystack = (string)$haystack;
5344
    $needle = (string)$needle;
5345 6
5346
    if (!isset($haystack[0], $needle[0])) {
5347
      return false;
5348
    }
5349
5350
    // init
5351
    self::checkForSupport();
5352
    $offset = (int)$offset;
5353
5354
    // iconv and mbstring do not support integer $needle
5355
5356
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5357
      $needle = self::chr($needle);
5358
    }
5359
5360
    if ($cleanUtf8 === true) {
5361
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5362
      // iconv_strpos is not tolerant to invalid characters
5363
5364
      $needle = self::clean((string)$needle);
5365
      $haystack = self::clean($haystack);
5366 1
    }
5367
5368 1
    if (self::$support['mbstring'] === true) {
5369
5370 1
      // INFO: this is only a fallback for old versions
5371
      if ($encoding === true || $encoding === false) {
5372
        $encoding = 'UTF-8';
5373
      }
5374
5375
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5376
    }
5377
5378
    if (self::$support['iconv'] === true) {
5379
      // ignore invalid negative offset to keep compatility
5380
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5381
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5382
    }
5383 10
5384
    if ($offset > 0) {
5385 10
      $haystack = self::substr($haystack, $offset);
5386 10
    }
5387 10
5388 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5389 10
      $left = substr($haystack, 0, $pos);
5390 1
5391 1
      // negative offset not supported in PHP strpos(), ignoring
5392 1
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5393
    }
5394 10
5395
    return false;
5396 10
  }
5397
5398 10
  /**
5399 1
   * Finds the last occurrence of a character in a string within another.
5400 1
   *
5401
   * @link http://php.net/manual/en/function.mb-strrchr.php
5402
   *
5403 10
   * @param string $haystack <p>
5404 10
   *                         The string from which to get the last occurrence
5405
   *                         of needle
5406 10
   *                         </p>
5407
   * @param string $needle   <p>
5408 10
   *                         The string to find in haystack
5409
   *                         </p>
5410
   * @param bool   $part     [optional] <p>
5411
   *                         Determines which portion of haystack
5412
   *                         this function returns.
5413
   *                         If set to true, it returns all of haystack
5414
   *                         from the beginning to the last occurrence of needle.
5415
   *                         If set to false, it returns all of haystack
5416
   *                         from the last occurrence of needle to the end,
5417
   *                         </p>
5418
   * @param string $encoding [optional] <p>
5419
   *                         Character encoding name to use.
5420
   *                         If it is omitted, internal character encoding is used.
5421
   *                         </p>
5422
   *
5423
   * @return string the portion of haystack.
5424 20
   * or false if needle is not found.
5425
   */
5426 20
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5427
  {
5428 20
    self::checkForSupport();
5429 5
5430
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5431
  }
5432
5433 18
  /**
5434
   * Reverses characters order in the string.
5435 18
   *
5436
   * @param    string $str The input string
5437
   *
5438
   * @return   string The string with characters in the reverse sequence
5439
   */
5440
  public static function strrev($str)
5441
  {
5442
    return implode(array_reverse(self::split($str)));
5443
  }
5444
5445 3
  /**
5446
   * Finds the last occurrence of a character in a string within another, case insensitive.
5447 3
   *
5448
   * @link http://php.net/manual/en/function.mb-strrichr.php
5449
   *
5450
   * @param string $haystack <p>
5451
   *                         The string from which to get the last occurrence
5452
   *                         of needle
5453
   *                         </p>
5454
   * @param string $needle   <p>
5455
   *                         The string to find in haystack
5456
   *                         </p>
5457
   * @param bool   $part     [optional] <p>
5458
   *                         Determines which portion of haystack
5459
   *                         this function returns.
5460
   *                         If set to true, it returns all of haystack
5461
   *                         from the beginning to the last occurrence of needle.
5462 16
   *                         If set to false, it returns all of haystack
5463
   *                         from the last occurrence of needle to the end,
5464 16
   *                         </p>
5465
   * @param string $encoding [optional] <p>
5466 16
   *                         Character encoding name to use.
5467 4
   *                         If it is omitted, internal character encoding is used.
5468
   *                         </p>
5469
   *
5470
   * @return string the portion of haystack.
5471 15
   * or false if needle is not found.
5472
   */
5473 15
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5474 15
  {
5475
    self::checkForSupport();
5476
5477
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5478
  }
5479
5480
  /**
5481
   * Find position of last occurrence of a case-insensitive string.
5482
   *
5483
   * @param    string $haystack The string to look in
5484
   * @param    string $needle   The string to look for
5485
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5486
   *
5487
   * @return   int The position of offset
5488
   */
5489
  public static function strripos($haystack, $needle, $offset = 0)
5490
  {
5491
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5492
  }
5493
5494
  /**
5495
   * Find position of last occurrence of a string in a string.
5496
   *
5497
   * @link http://php.net/manual/en/function.mb-strrpos.php
5498
   *
5499
   * @param string     $haystack  <p>
5500
   *                              The string being checked, for the last occurrence
5501
   *                              of needle
5502
   *                              </p>
5503 1
   * @param string|int $needle    <p>
5504
   *                              The string to find in haystack.
5505 1
   *                              Or a code point as int.
5506
   *                              </p>
5507
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5508
   *                              the string. Negative values will stop searching at an arbitrary point
5509
   *                              prior to the end of the string.
5510
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5511
   *
5512
   * @return int the numeric position of
5513
   * the last occurrence of needle in the
5514
   * haystack string. If
5515
   * needle is not found, it returns false.
5516
   */
5517
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5518
  {
5519
    $haystack = (string)$haystack;
5520 1
5521
    if (((int)$needle) === $needle && ($needle >= 0)) {
5522
      $needle = self::chr($needle);
5523
    }
5524
5525
    $needle = (string)$needle;
5526
5527
    if (!isset($haystack[0], $needle[0])) {
5528
      return false;
5529
    }
5530 1
5531
    // init
5532
    self::checkForSupport();
5533 1
5534
    $needle = (string)$needle;
5535 1
    $offset = (int)$offset;
5536
5537
    if ($cleanUtf8 === true) {
5538
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5539
5540
      $needle = self::clean($needle);
5541
      $haystack = self::clean($haystack);
5542
    }
5543
5544
    if (self::$support['mbstring'] === true) {
5545
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5546
    }
5547
5548
    if (self::$support['iconv'] === true) {
5549
      return \grapheme_strrpos($haystack, $needle, $offset);
5550
    }
5551
5552
    // fallback
5553
5554
    if ($offset > 0) {
5555
      $haystack = self::substr($haystack, $offset);
5556
    } elseif ($offset < 0) {
5557
      $haystack = self::substr($haystack, 0, $offset);
5558 39
    }
5559
5560 39 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5561
      $left = substr($haystack, 0, $pos);
5562 39
5563 9
      // negative offset not supported in PHP strpos(), ignoring
5564
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5565
    }
5566
5567 37
    return false;
5568
  }
5569 37
5570
  /**
5571
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5572
   * mask.
5573 1
   *
5574 1
   * @param string $str
5575
   * @param string $mask
5576 37
   * @param int    $offset
5577 22
   * @param int    $length
5578 22
   *
5579 33
   * @return int|null
5580
   */
5581
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5582 37
  {
5583
    if ($offset || 2147483647 !== $length) {
5584
      $str = self::substr($str, $offset, $length);
5585 37
    }
5586 1
5587 1
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5588
  }
5589 37
5590
  /**
5591
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5592
   *
5593
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5594
   *
5595
   * @param string $haystack      <p>
5596
   *                              The input string. Must be valid UTF-8.
5597
   *                              </p>
5598
   * @param string $needle        <p>
5599
   *                              The string to look for. Must be valid UTF-8.
5600
   *                              </p>
5601
   * @param bool   $before_needle [optional] <p>
5602
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5603
   *                              haystack before the first occurrence of the needle (excluding the needle).
5604
   *                              </p>
5605
   *
5606
   * @return string the portion of string, or FALSE if needle is not found.
5607
   */
5608
  public static function strstr($haystack, $needle, $before_needle = false)
5609
  {
5610
    self::checkForSupport();
5611
5612
    return \grapheme_strstr($haystack, $needle, $before_needle);
5613
  }
5614
5615
  /**
5616
   * Unicode transformation for case-less matching.
5617
   *
5618 1
   * @link http://unicode.org/reports/tr21/tr21-5.html
5619
   *
5620 1
   * @param string $str
5621 1
   * @param bool   $full
5622
   *
5623 1
   * @return string
5624
   */
5625
  public static function strtocasefold($str, $full = true)
5626
  {
5627
    static $fullCaseFold = null;
5628
    static $commonCaseFoldKeys = null;
5629
    static $commonCaseFoldValues = null;
5630
5631
    if ($commonCaseFoldKeys === null) {
5632
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5633
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5634
    }
5635
5636
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5637
5638
    if ($full) {
5639
5640
      if ($fullCaseFold === null) {
5641
        $fullCaseFold = self::getData('caseFolding_full');
5642
      }
5643
5644
      /** @noinspection OffsetOperationsInspection */
5645
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5646
    }
5647
5648
    $str = self::clean($str);
5649
5650
    return self::strtolower($str);
5651
  }
5652
5653
  /**
5654
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5655
   * Make a string lowercase.
5656
   *
5657
   * @link http://php.net/manual/en/function.mb-strtolower.php
5658
   *
5659
   * @param string $str <p>
5660
   *                    The string being lowercased.
5661
   *                    </p>
5662
   * @param string $encoding
5663
   *
5664
   * @return string str with all alphabetic characters converted to lowercase.
5665 6
   */
5666
  public static function strtolower($str, $encoding = 'UTF-8')
5667
  {
5668 6
    $str = (string)$str;
5669 1
5670
    if (!isset($str[0])) {
5671
      return '';
5672 1
    }
5673 1
5674 1
    // init
5675 1
    self::checkForSupport();
5676
5677
    return \mb_strtolower($str, $encoding);
5678
  }
5679 1
5680 1
  /**
5681 1
   * Generic case sensitive transformation for collation matching.
5682 1
   *
5683 1
   * @param string $s
5684 1
   *
5685 1
   * @return string
5686 1
   */
5687
  protected static function strtonatfold($s)
5688
  {
5689
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5690 1
  }
5691 1
5692 1
  /**
5693 1
   * Make a string uppercase.
5694 1
   *
5695 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5696 1
   *
5697 1
   * @param string $str <p>
5698
   *                    The string being uppercased.
5699
   *                    </p>
5700 1
   * @param string $encoding
5701 1
   *
5702 1
   * @return string str with all alphabetic characters converted to uppercase.
5703 1
   */
5704
  public static function strtoupper($str, $encoding = 'UTF-8')
5705
  {
5706
    $str = (string)$str;
5707 1
5708
    if (!isset($str[0])) {
5709 6
      return '';
5710 1
    }
5711 1
5712 1
    // init
5713 1
    self::checkForSupport();
5714
5715 1
    if (self::$support['mbstring'] === true) {
5716
      return \mb_strtoupper($str, $encoding);
5717
    } else {
5718 6
5719 6
      // fallback
5720
5721 6
      static $caseTableKeys = null;
5722 4
      static $caseTableValues = null;
5723
5724 4
      if ($caseTableKeys === null) {
5725 4
        $caseTable = self::case_table();
5726
        $caseTableKeys = array_keys($caseTable);
5727 6
        $caseTableValues = array_values($caseTable);
5728
      }
5729 6
5730
      $str = self::clean($str);
5731
5732
      return str_replace($caseTableKeys, $caseTableValues, $str);
5733
    }
5734
  }
5735
5736
  /**
5737
   * Translate characters or replace sub-strings.
5738
   *
5739
   * @link  http://php.net/manual/en/function.strtr.php
5740 1
   *
5741
   * @param string       $str  <p>
5742 1
   *                           The string being translated.
5743
   *                           </p>
5744 1
   * @param string|array $from <p>
5745 1
   *                           The string replacing from.
5746
   *                           </p>
5747
   * @param string|array $to   <p>
5748 1
   *                           The string being translated to to.
5749
   *                           </p>
5750 1
   *
5751 1
   * @return string This function returns a copy of str,
5752
   * translating all occurrences of each character in
5753 1
   * from to the corresponding character in
5754
   * to.
5755 1
   * @since 4.0
5756 1
   * @since 5.0
5757
   */
5758 1
  public static function strtr($str, $from, $to = INF)
5759
  {
5760 1
    if (INF !== $to) {
5761
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5761 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5762 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5762 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5763
      $countFrom = count($from);
5764 1
      $countTo = count($to);
5765
5766
      if ($countFrom > $countTo) {
5767
        $from = array_slice($from, 0, $countTo);
5768
      } elseif ($countFrom < $countTo) {
5769
        $to = array_slice($to, 0, $countFrom);
5770
      }
5771
5772
      $from = array_combine($from, $to);
5773
    }
5774
5775 6
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5758 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5776
  }
5777 6
5778
  /**
5779
   * Return the width of a string.
5780
   *
5781
   * @param string $s
5782
   *
5783
   * @return int
5784
   */
5785
  public static function strwidth($s)
5786
  {
5787
    // init
5788
    self::checkForSupport();
5789
5790
    return \mb_strwidth($s, 'UTF-8');
5791
  }
5792
5793
  /**
5794
   * Get part of a string.
5795
   *
5796
   * @link http://php.net/manual/en/function.mb-substr.php
5797
   *
5798
   * @param string  $str       <p>
5799
   *                           The string being checked.
5800
   *                           </p>
5801
   * @param int     $start     <p>
5802
   *                           The first position used in str.
5803
   *                           </p>
5804
   * @param int     $length    [optional] <p>
5805
   *                           The maximum length of the returned string.
5806
   *                           </p>
5807
   * @param string  $encoding
5808
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5809
   *
5810
   * @return string mb_substr returns the portion of
5811
   * str specified by the start and length parameters.
5812 7
   */
5813
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5814 7
  {
5815
    $str = (string)$str;
5816 7
5817
    if (!isset($str[0])) {
5818 7
      return '';
5819 2
    }
5820
5821
    // init
5822 6
    self::checkForSupport();
5823
5824 6
    if ($cleanUtf8 === true) {
5825 3
      // iconv and mbstring are not tolerant to invalid encoding
5826
      // further, their behaviour is inconsistent with that of PHP's substr
5827 3
5828
      $str = self::clean($str);
5829 3
    }
5830
5831
    if ($length === null) {
5832 3
      $length = (int)self::strlen($str);
5833
    } else {
5834 3
      $length = (int)$length;
5835 3
    }
5836
5837
    if (self::$support['mbstring'] === true) {
5838 3
5839 3
      // INFO: this is only a fallback for old versions
5840 3
      if ($encoding === true || $encoding === false) {
5841
        $encoding = 'UTF-8';
5842
      }
5843
5844
      return \mb_substr($str, $start, $length, $encoding);
5845
    }
5846
5847
    if (self::$support['iconv'] === true) {
5848
      return (string)\grapheme_substr($str, $start, $length);
5849
    }
5850
5851
    // fallback
5852 3
5853
    // split to array, and remove invalid characters
5854 1
    $array = self::split($str);
5855 1
5856 1
    // extract relevant part, and join to make sting again
5857
    return implode(array_slice($array, $start, $length));
5858 1
  }
5859 1
5860 1
  /**
5861 1
   * Binary safe comparison of two strings from an offset, up to length characters.
5862
   *
5863 1
   * @param string  $main_str           The main string being compared.
5864
   * @param string  $str                The secondary string being compared.
5865
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5866 1
   *                                    end of the string.
5867
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5868
   *                                    the str compared to the length of main_str less the offset.
5869 1
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5870
   *
5871 3
   * @return int
5872 1
   */
5873 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5874
  {
5875 3
    $main_str = self::substr($main_str, $offset, $length);
5876 3
    $str = self::substr($str, 0, self::strlen($main_str));
5877
5878 3
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5879 3
  }
5880
5881 6
  /**
5882
   * Count the number of substring occurrences
5883
   *
5884
   * @link  http://php.net/manual/en/function.substr-count.php
5885
   *
5886
   * @param string $haystack <p>
5887
   *                         The string to search in
5888
   *                         </p>
5889
   * @param string $needle   <p>
5890
   *                         The substring to search for
5891
   *                         </p>
5892
   * @param int    $offset   [optional] <p>
5893
   *                         The offset where to start counting
5894
   *                         </p>
5895
   * @param int    $length   [optional] <p>
5896
   *                         The maximum length after the specified offset to search for the
5897
   *                         substring. It outputs a warning if the offset plus the length is
5898
   *                         greater than the haystack length.
5899
   *                         </p>
5900
   *
5901
   * @return int This functions returns an integer.
5902
   * @since 4.0
5903 2
   * @since 5.0
5904
   */
5905 2
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5906
  {
5907
    $haystack = (string)$haystack;
5908
    $needle = (string)$needle;
5909
5910
    if (!isset($haystack[0], $needle[0])) {
5911
      return 0;
5912
    }
5913
5914
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5915
      $offset = (int)$offset;
5916
      $length = (int)$length;
5917
5918
      $haystack = self::substr($haystack, $offset, $length);
5919
    }
5920
5921
    self::checkForSupport();
5922
5923
    return \mb_substr_count($haystack, $needle);
5924
  }
5925
5926
  /**
5927
   * Replace text within a portion of a string.
5928
   *
5929 20
   * source: https://gist.github.com/stemar/8287074
5930
   *
5931 20
   * @param string|array   $str
5932 2
   * @param string|array   $replacement
5933
   * @param int|array      $start
5934 2
   * @param null|int|array $length
5935 2
   *
5936
   * @return array|string
5937 2
   */
5938
  public static function substr_replace($str, $replacement, $start, $length = null)
5939
  {
5940 20
    if (is_array($str)) {
5941
      $num = count($str);
5942 20
5943 9
      // $replacement
5944
      if (is_array($replacement)) {
5945
        $replacement = array_slice($replacement, 0, $num);
5946 20
      } else {
5947
        $replacement = array_pad(array($replacement), $num, $replacement);
5948 20
      }
5949
5950 20
      // $start
5951 20 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5952
        $start = array_slice($start, 0, $num);
5953 20
        foreach ($start as &$valueTmp) {
5954 20
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5955 20
        }
5956 20
        unset($valueTmp);
5957
      } else {
5958 20
        $start = array_pad(array($start), $num, $start);
5959
      }
5960 18
5961 17
      // $length
5962 17
      if (!isset($length)) {
5963 17
        $length = array_fill(0, $num, 0);
5964 5 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5965 5
        $length = array_slice($length, 0, $num);
5966 5
        foreach ($length as &$valueTmpV2) {
5967
          if (isset($valueTmpV2)) {
5968
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5969 20
          } else {
5970
            $valueTmpV2 = 0;
5971 18
          }
5972 14
        }
5973 14
        unset($valueTmpV2);
5974 14
      } else {
5975 8
        $length = array_pad(array($length), $num, $length);
5976 8
      }
5977 8
5978
      // Recursive call
5979
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5980 19
    } else {
5981
      if (is_array($replacement)) {
5982 9
        if (count($replacement) > 0) {
5983 3
          $replacement = $replacement[0];
5984 3
        } else {
5985 3
          $replacement = '';
5986 6
        }
5987 6
      }
5988 6
    }
5989
5990
    preg_match_all('/./us', (string)$str, $smatches);
5991 9
    preg_match_all('/./us', (string)$replacement, $rmatches);
5992 6
5993 6
    if ($length === null) {
5994 6
      self::checkForSupport();
5995
5996
      $length = \mb_strlen($str);
5997 20
    }
5998
5999 2
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6000 2
6001
    return implode($smatches[0], null);
6002
  }
6003 2
6004 2
  /**
6005 2
   * Returns a case swapped version of the string.
6006
   *
6007
   * @param string $str
6008 2
   * @param string $encoding
6009 18
   *
6010
   * @return string each character's case swapped
6011 20
   */
6012
  public static function swapCase($str, $encoding = 'UTF-8')
6013 20
  {
6014
    $str = (string)$str;
6015
6016 20
    if (!isset($str[0])) {
6017 20
      return '';
6018
    }
6019 3
6020 20
    $str = self::clean($str);
6021
6022 20
    $strSwappedCase = preg_replace_callback(
6023
        '/[\S]/u',
6024
        function ($match) use ($encoding) {
6025 20
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6026 20
6027 20
          if ($match[0] === $marchToUpper) {
6028 2
            return UTF8::strtolower($match[0], $encoding);
6029 20
          } else {
6030
            return $marchToUpper;
6031 20
          }
6032
        },
6033 20
        $str
6034
    );
6035
6036
    return $strSwappedCase;
6037
  }
6038
6039
  /**
6040
   * alias for "UTF8::to_ascii()"
6041
   *
6042
   * @param string $s The input string e.g. a UTF-8 String
6043 2
   * @param string $subst_chr
6044
   *
6045 2
   * @return string
6046
   */
6047 1
  public static function toAscii($s, $subst_chr = '?')
6048
  {
6049 1
    return self::to_ascii($s, $subst_chr);
6050 1
  }
6051
6052 1
  /**
6053 2
   * alias for "UTF8::to_latin1()"
6054 2
   *
6055
   * @param $str
6056
   *
6057
   * @return string
6058
   */
6059
  public static function toLatin1($str)
6060
  {
6061
    return self::to_latin1($str);
6062
  }
6063
6064
  /**
6065
   * alias for "UTF8::to_utf8"
6066
   *
6067
   * @param string $str
6068
   *
6069
   * @return string
6070
   */
6071
  public static function toUTF8($str)
6072
  {
6073 26
    return self::to_utf8($str);
6074
  }
6075 26
6076
  /**
6077 26
   * convert to ASCII
6078 5
   *
6079
   * @param string $s The input string e.g. a UTF-8 String
6080
   * @param string $subst_chr
6081
   *
6082 22
   * @return string
6083 6
   */
6084
  public static function to_ascii($s, $subst_chr = '?')
6085
  {
6086 16
    static $translitExtra = null;
6087
6088
    $s = (string)$s;
6089
6090
    if (!isset($s[0])) {
6091
      return '';
6092
    }
6093
6094
    $s = self::clean($s);
6095
6096 14
    if (preg_match("/[\x80-\xFF]/", $s)) {
6097
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6098 14
6099
      $glibc = 'glibc' === ICONV_IMPL;
6100
6101
      preg_match_all('/./u', $s, $s);
6102
6103
      /** @noinspection AlterInForeachInspection */
6104
      foreach ($s[0] as &$c) {
6105
6106
        if (!isset($c[1])) {
6107
          continue;
6108
        }
6109
6110
        if ($glibc) {
6111
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6112
        } else {
6113
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6114
6115
          if ($t !== false && is_string($t)) {
6116
            if (!isset($t[0])) {
6117
              $t = '?';
6118
            } elseif (isset($t[1])) {
6119
              $t = ltrim($t, '\'`"^~');
6120
            }
6121 8
          }
6122
        }
6123 8
6124 2
        if ('?' === $t) {
6125
6126
          if ($translitExtra === null) {
6127
            $translitExtra = (array)self::getData('translit_extra');
6128 7
          }
6129 7
6130
          if (isset($translitExtra[$c])) {
6131 7
            $t = $translitExtra[$c];
6132 1
          } else {
6133 1
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6134 7
6135
            if ($t[0] < "\x80") {
6136
              $t = $t[0];
6137 7
            } else {
6138
              $t = $subst_chr;
6139 7
            }
6140
          }
6141
        }
6142
6143 1
        if ('?' === $t) {
6144 1
          $t = self::str_transliterate($c, $subst_chr);
6145 1
        }
6146 7
6147 7
        $c = $t;
6148 7
      }
6149 7
6150 7
      $s = implode('', $s[0]);
6151
    }
6152 7
6153
    return $s;
6154
  }
6155
6156
  /**
6157
   * alias for "UTF8::to_win1252()"
6158
   *
6159
   * @param   string $str
6160
   *
6161
   * @return  array|string
6162
   */
6163
  public static function to_iso8859($str)
6164
  {
6165
    return self::to_win1252($str);
6166
  }
6167
6168
  /**
6169
   * alias for "UTF8::to_win1252()"
6170
   *
6171
   * @param string|array $str
6172 1
   *
6173
   * @return string|array
6174 1
   */
6175
  public static function to_latin1($str)
6176 1
  {
6177 1
    return self::to_win1252($str);
6178
  }
6179
6180 1
  /**
6181
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6182 1
   *
6183
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6184 1
   *
6185 1
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6186 1
   *
6187 1
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6188
   *    are followed by any of these:  ("group B")
6189 1
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6190 1
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6191 1
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6192
   * is also a valid unicode character, and will be left unchanged.
6193 1
   *
6194
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6195
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6196
   *
6197
   * @param string|array $str Any string or array.
6198
   *
6199
   * @return string The same string, but UTF8 encoded.
6200
   */
6201
  public static function to_utf8($str)
6202
  {
6203
    if (is_array($str)) {
6204
      foreach ($str as $k => $v) {
6205
        /** @noinspection AlterInForeachInspection */
6206
        $str[$k] = self::to_utf8($v);
6207
      }
6208
6209
      return $str;
6210
    }
6211
6212
    $str = (string)$str;
6213
6214
    if (!isset($str[0])) {
6215
      return $str;
6216
    }
6217
6218
    $max = strlen($str);
6219
    $buf = '';
6220
6221
    /** @noinspection ForeachInvariantsInspection */
6222
    for ($i = 0; $i < $max; $i++) {
6223
      $c1 = $str[$i];
6224
6225
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6226
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6227
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6228
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6229
6230
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6231
6232
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6233
            $buf .= $c1 . $c2;
6234
            $i++;
6235
          } else { // not valid UTF8 - convert it
6236
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6237
            $cc2 = ($c1 & "\x3f") | "\x80";
6238
            $buf .= $cc1 . $cc2;
6239
          }
6240
6241 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6242
6243
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6244
            $buf .= $c1 . $c2 . $c3;
6245
            $i += 2;
6246
          } else { // not valid UTF8 - convert it
6247
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6248
            $cc2 = ($c1 & "\x3f") | "\x80";
6249
            $buf .= $cc1 . $cc2;
6250
          }
6251
6252
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6253
6254 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6255
            $buf .= $c1 . $c2 . $c3 . $c4;
6256
            $i += 3;
6257
          } else { // not valid UTF8 - convert it
6258
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6259
            $cc2 = ($c1 & "\x3f") | "\x80";
6260
            $buf .= $cc1 . $cc2;
6261
          }
6262
6263
        } else { // doesn't look like UTF8, but should be converted
6264
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6265
          $cc2 = (($c1 & "\x3f") | "\x80");
6266
          $buf .= $cc1 . $cc2;
6267
        }
6268
6269
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6270
6271
        $ordC1 = ord($c1);
6272
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6273
          $buf .= self::$win1252ToUtf8[$ordC1];
6274
        } else {
6275
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6276
          $cc2 = (($c1 & "\x3f") | "\x80");
6277
          $buf .= $cc1 . $cc2;
6278
        }
6279
6280
      } else { // it doesn't need conversion
6281
        $buf .= $c1;
6282
      }
6283
    }
6284
6285
    self::checkForSupport();
6286
6287
    // decode unicode escape sequences
6288
    $buf = preg_replace_callback(
6289
        '/\\\\u([0-9a-f]{4})/i',
6290
        function ($match) {
6291
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6292
        },
6293
        $buf
6294
    );
6295
6296
    // decode UTF-8 codepoints
6297
    $buf = preg_replace_callback(
6298
        '/&#\d{2,4};/',
6299
        function ($match) {
6300
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6301
        },
6302
        $buf
6303
    );
6304
6305
    return $buf;
6306
  }
6307
6308
  /**
6309
   * Convert a string into "win1252"-encoding.
6310
   *
6311
   * @param  string|array $str
6312
   *
6313
   * @return string|array
6314
   */
6315
  protected static function to_win1252($str)
6316
  {
6317
    if (is_array($str)) {
6318
6319
      foreach ($str as $k => $v) {
6320
        /** @noinspection AlterInForeachInspection */
6321
        $str[$k] = self::to_win1252($v);
6322
      }
6323
6324
      return $str;
6325
    }
6326
6327
    $str = (string)$str;
6328
6329
    if (!isset($str[0])) {
6330
      return '';
6331
    }
6332
6333
    return self::utf8_decode($str);
6334
  }
6335
6336
  /**
6337
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6338
   *
6339
   * INFO: This is slower then "trim()"
6340
   *
6341
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
6342
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6343
   *
6344
   * @param    string $str   The string to be trimmed
6345
   * @param    string $chars Optional characters to be stripped
6346
   *
6347
   * @return   string The trimmed string
6348
   */
6349
  public static function trim($str = '', $chars = INF)
6350
  {
6351
    $str = (string)$str;
6352
6353
    if (!isset($str[0])) {
6354
      return '';
6355
    }
6356
6357
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6358
    if ($chars === INF || !$chars) {
6359
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6360
    }
6361
6362
    return self::rtrim(self::ltrim($str, $chars), $chars);
6363
  }
6364
6365
  /**
6366
   * Makes string's first char uppercase.
6367
   *
6368
   * @param    string $str The input string
6369
   *
6370
   * @return   string The resulting string
6371
   */
6372
  public static function ucfirst($str)
6373
  {
6374
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6375
  }
6376
6377
  /**
6378
   * alias for "UTF8::ucfirst"
6379
   *
6380
   * @param $str
6381
   *
6382
   * @return string
6383
   */
6384
  public static function ucword($str)
6385
  {
6386
    return self::ucfirst($str);
6387
  }
6388
6389
  /**
6390
   * Uppercase for all words in the string.
6391
   *
6392
   * @param  string $str
6393
   * @param array   $exceptions
6394
   *
6395
   * @return string
6396
   */
6397
  public static function ucwords($str, $exceptions = array())
6398
  {
6399
    if (!$str) {
6400
      return '';
6401
    }
6402
6403
    // init
6404
    $words = explode(' ', $str);
6405
    $newwords = array();
6406
6407
    if (count($exceptions) > 0) {
6408
      $useExceptions = true;
6409
    } else {
6410
      $useExceptions = false;
6411
    }
6412
6413
    foreach ($words as $word) {
6414
      if (
6415
          ($useExceptions === false)
6416
          ||
6417
          (
6418
              $useExceptions === true
6419
              &&
6420
              !in_array($word, $exceptions, true)
6421
          )
6422
      ) {
6423
        $word = self::ucfirst($word);
6424
      }
6425
      $newwords[] = $word;
6426
    }
6427
6428
    return self::ucfirst(implode(' ', $newwords));
6429
  }
6430
6431
  /**
6432
   * Multi decode html entity & fix urlencoded-win1252-chars.
6433
   *
6434
   * e.g:
6435
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6436
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6437
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6438
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6439
   * 'Düsseldorf'                   => 'Düsseldorf'
6440 6
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6441
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6442 6
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6443 6
   *
6444
   * @param string $str
6445 6
   *
6446
   * @return string
6447 6
   */
6448 5
  public static function urldecode($str)
6449
  {
6450
    $str = (string)$str;
6451
6452 6
    if (!isset($str[0])) {
6453
      return '';
6454 6
    }
6455
6456 6
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6457 1
6458 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6459 1
6460
    $str = self::fix_simple_utf8(
6461 6
        rawurldecode(
6462
            self::html_entity_decode(
6463
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6464
                $flags
6465
            )
6466
        )
6467
    );
6468
6469
    return (string)$str;
6470
  }
6471 6
6472
  /**
6473 6
   * Return a array with "urlencoded"-win1252 -> UTF-8
6474
   *
6475 6
   * @return mixed
6476 6
   */
6477
  public static function urldecode_fix_win1252_chars()
6478
  {
6479 5
    static $array = array(
6480 5
        '%20' => ' ',
6481
        '%21' => '!',
6482 5
        '%22' => '"',
6483 1
        '%23' => '#',
6484 1
        '%24' => '$',
6485 1
        '%25' => '%',
6486
        '%26' => '&',
6487 5
        '%27' => "'",
6488
        '%28' => '(',
6489
        '%29' => ')',
6490
        '%2A' => '*',
6491
        '%2B' => '+',
6492
        '%2C' => ',',
6493
        '%2D' => '-',
6494
        '%2E' => '.',
6495
        '%2F' => '/',
6496
        '%30' => '0',
6497
        '%31' => '1',
6498
        '%32' => '2',
6499
        '%33' => '3',
6500
        '%34' => '4',
6501
        '%35' => '5',
6502
        '%36' => '6',
6503
        '%37' => '7',
6504
        '%38' => '8',
6505
        '%39' => '9',
6506
        '%3A' => ':',
6507
        '%3B' => ';',
6508
        '%3C' => '<',
6509
        '%3D' => '=',
6510
        '%3E' => '>',
6511
        '%3F' => '?',
6512
        '%40' => '@',
6513
        '%41' => 'A',
6514
        '%42' => 'B',
6515
        '%43' => 'C',
6516
        '%44' => 'D',
6517
        '%45' => 'E',
6518
        '%46' => 'F',
6519 1
        '%47' => 'G',
6520
        '%48' => 'H',
6521 1
        '%49' => 'I',
6522
        '%4A' => 'J',
6523
        '%4B' => 'K',
6524
        '%4C' => 'L',
6525
        '%4D' => 'M',
6526
        '%4E' => 'N',
6527
        '%4F' => 'O',
6528
        '%50' => 'P',
6529
        '%51' => 'Q',
6530
        '%52' => 'R',
6531
        '%53' => 'S',
6532
        '%54' => 'T',
6533 1
        '%55' => 'U',
6534
        '%56' => 'V',
6535 1
        '%57' => 'W',
6536
        '%58' => 'X',
6537
        '%59' => 'Y',
6538
        '%5A' => 'Z',
6539 1
        '%5B' => '[',
6540
        '%5C' => '\\',
6541 1
        '%5D' => ']',
6542
        '%5E' => '^',
6543
        '%5F' => '_',
6544 1
        '%60' => '`',
6545 1
        '%61' => 'a',
6546 1
        '%62' => 'b',
6547 1
        '%63' => 'c',
6548 1
        '%64' => 'd',
6549
        '%65' => 'e',
6550
        '%66' => 'f',
6551 1
        '%67' => 'g',
6552
        '%68' => 'h',
6553
        '%69' => 'i',
6554
        '%6A' => 'j',
6555
        '%6B' => 'k',
6556
        '%6C' => 'l',
6557
        '%6D' => 'm',
6558
        '%6E' => 'n',
6559
        '%6F' => 'o',
6560
        '%70' => 'p',
6561
        '%71' => 'q',
6562
        '%72' => 'r',
6563
        '%73' => 's',
6564 4
        '%74' => 't',
6565
        '%75' => 'u',
6566 4
        '%76' => 'v',
6567
        '%77' => 'w',
6568
        '%78' => 'x',
6569
        '%79' => 'y',
6570 4
        '%7A' => 'z',
6571 4
        '%7B' => '{',
6572 4
        '%7C' => '|',
6573
        '%7D' => '}',
6574 4
        '%7E' => '~',
6575 4
        '%7F' => '',
6576 4
        '%80' => '`',
6577 4
        '%81' => '',
6578
        '%82' => '‚',
6579 4
        '%83' => 'ƒ',
6580
        '%84' => '„',
6581
        '%85' => '…',
6582
        '%86' => '†',
6583
        '%87' => '‡',
6584 4
        '%88' => 'ˆ',
6585
        '%89' => '‰',
6586 4
        '%8A' => 'Š',
6587
        '%8B' => '‹',
6588
        '%8C' => 'Œ',
6589
        '%8D' => '',
6590
        '%8E' => 'Ž',
6591 4
        '%8F' => '',
6592 4
        '%90' => '',
6593
        '%91' => '‘',
6594 4
        '%92' => '’',
6595 4
        '%93' => '“',
6596 4
        '%94' => '”',
6597 4
        '%95' => '•',
6598 4
        '%96' => '–',
6599
        '%97' => '—',
6600 4
        '%98' => '˜',
6601 4
        '%99' => '™',
6602 4
        '%9A' => 'š',
6603 4
        '%9B' => '›',
6604
        '%9C' => 'œ',
6605 4
        '%9D' => '',
6606 3
        '%9E' => 'ž',
6607 3
        '%9F' => 'Ÿ',
6608 3
        '%A0' => '',
6609 3
        '%A1' => '¡',
6610
        '%A2' => '¢',
6611 3
        '%A3' => '£',
6612
        '%A4' => '¤',
6613
        '%A5' => '¥',
6614
        '%A6' => '¦',
6615 3
        '%A7' => '§',
6616 3
        '%A8' => '¨',
6617
        '%A9' => '©',
6618 4
        '%AA' => 'ª',
6619
        '%AB' => '«',
6620
        '%AC' => '¬',
6621
        '%AD' => '',
6622
        '%AE' => '®',
6623
        '%AF' => '¯',
6624
        '%B0' => '°',
6625
        '%B1' => '±',
6626
        '%B2' => '²',
6627
        '%B3' => '³',
6628
        '%B4' => '´',
6629
        '%B5' => 'µ',
6630
        '%B6' => '¶',
6631
        '%B7' => '·',
6632
        '%B8' => '¸',
6633
        '%B9' => '¹',
6634
        '%BA' => 'º',
6635
        '%BB' => '»',
6636
        '%BC' => '¼',
6637
        '%BD' => '½',
6638
        '%BE' => '¾',
6639
        '%BF' => '¿',
6640
        '%C0' => 'À',
6641
        '%C1' => 'Á',
6642
        '%C2' => 'Â',
6643
        '%C3' => 'Ã',
6644
        '%C4' => 'Ä',
6645
        '%C5' => 'Å',
6646
        '%C6' => 'Æ',
6647
        '%C7' => 'Ç',
6648
        '%C8' => 'È',
6649
        '%C9' => 'É',
6650
        '%CA' => 'Ê',
6651
        '%CB' => 'Ë',
6652
        '%CC' => 'Ì',
6653
        '%CD' => 'Í',
6654
        '%CE' => 'Î',
6655
        '%CF' => 'Ï',
6656
        '%D0' => 'Ð',
6657
        '%D1' => 'Ñ',
6658
        '%D2' => 'Ò',
6659
        '%D3' => 'Ó',
6660
        '%D4' => 'Ô',
6661
        '%D5' => 'Õ',
6662
        '%D6' => 'Ö',
6663
        '%D7' => '×',
6664
        '%D8' => 'Ø',
6665
        '%D9' => 'Ù',
6666
        '%DA' => 'Ú',
6667
        '%DB' => 'Û',
6668
        '%DC' => 'Ü',
6669
        '%DD' => 'Ý',
6670
        '%DE' => 'Þ',
6671
        '%DF' => 'ß',
6672
        '%E0' => 'à',
6673
        '%E1' => 'á',
6674
        '%E2' => 'â',
6675
        '%E3' => 'ã',
6676
        '%E4' => 'ä',
6677
        '%E5' => 'å',
6678
        '%E6' => 'æ',
6679
        '%E7' => 'ç',
6680
        '%E8' => 'è',
6681
        '%E9' => 'é',
6682
        '%EA' => 'ê',
6683
        '%EB' => 'ë',
6684
        '%EC' => 'ì',
6685
        '%ED' => 'í',
6686
        '%EE' => 'î',
6687
        '%EF' => 'ï',
6688
        '%F0' => 'ð',
6689
        '%F1' => 'ñ',
6690
        '%F2' => 'ò',
6691
        '%F3' => 'ó',
6692
        '%F4' => 'ô',
6693
        '%F5' => 'õ',
6694
        '%F6' => 'ö',
6695
        '%F7' => '÷',
6696
        '%F8' => 'ø',
6697
        '%F9' => 'ù',
6698
        '%FA' => 'ú',
6699
        '%FB' => 'û',
6700
        '%FC' => 'ü',
6701
        '%FD' => 'ý',
6702
        '%FE' => 'þ',
6703
        '%FF' => 'ÿ',
6704
    );
6705
6706
    return $array;
6707
  }
6708
6709
  /**
6710
   * Decodes an UTF-8 string to ISO-8859-1.
6711
   *
6712
   * @param string $str
6713
   *
6714
   * @return string
6715
   */
6716
  public static function utf8_decode($str)
6717
  {
6718
    static $utf8ToWin1252Keys = null;
6719
    static $utf8ToWin1252Values = null;
6720
6721
    $str = (string)$str;
6722
6723
    if (!isset($str[0])) {
6724
      return '';
6725
    }
6726
6727
    // init
6728
    self::checkForSupport();
6729
6730
    $str = self::to_utf8($str);
6731
6732
    if ($utf8ToWin1252Keys === null) {
6733
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6734
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6735
    }
6736
6737
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6738
  }
6739
6740
  /**
6741
   * Encodes an ISO-8859-1 string to UTF-8.
6742
   *
6743
   * @param string $str
6744
   *
6745
   * @return string
6746
   */
6747
  public static function utf8_encode($str)
6748
  {
6749
    $str = \utf8_encode($str);
6750
6751
    if (false === strpos($str, "\xC2")) {
6752
      return $str;
6753
    } else {
6754
6755
      static $cp1252ToUtf8Keys = null;
6756
      static $cp1252ToUtf8Values = null;
6757
6758
      if ($cp1252ToUtf8Keys === null) {
6759
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6760
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6761
      }
6762
6763
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6764
    }
6765
  }
6766
6767
  /**
6768
   * fix -> utf8-win1252 chars
6769
   *
6770
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6771
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6772
   * See: http://en.wikipedia.org/wiki/Windows-1252
6773
   *
6774
   * @deprecated use "UTF8::fix_simple_utf8()"
6775
   *
6776
   * @param   string $str
6777
   *
6778
   * @return  string
6779
   */
6780
  public static function utf8_fix_win1252_chars($str)
6781
  {
6782
    return self::fix_simple_utf8($str);
6783
  }
6784
6785
  /**
6786
   * Returns an array with all utf8 whitespace characters.
6787
   *
6788
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6789
   *
6790
   * @author: Derek E. [email protected]
6791
   *
6792
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6793
   *         as defined in above URL
6794
   */
6795
  public static function whitespace_table()
6796
  {
6797
    return self::$whitespaceTable;
6798
  }
6799
6800
  /**
6801
   * Limit the number of words in a string.
6802
   *
6803
   * @param  string $str
6804
   * @param  int    $words
6805
   * @param  string $strAddOn
6806
   *
6807
   * @return string
6808
   */
6809
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6810
  {
6811
    $str = (string)$str;
6812
6813
    if (!isset($str[0])) {
6814
      return '';
6815
    }
6816
6817
    $words = (int)$words;
6818
6819
    if ($words < 1) {
6820
      return '';
6821
    }
6822
6823
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6824
6825
    if (
6826
        !isset($matches[0])
6827
        ||
6828
        self::strlen($str) === self::strlen($matches[0])
6829
    ) {
6830
      return $str;
6831
    }
6832
6833
    return self::rtrim($matches[0]) . $strAddOn;
6834
  }
6835
6836
  /**
6837
   * Wraps a string to a given number of characters
6838
   *
6839
   * @link  http://php.net/manual/en/function.wordwrap.php
6840
   *
6841
   * @param string $str   <p>
6842
   *                      The input string.
6843
   *                      </p>
6844
   * @param int    $width [optional] <p>
6845
   *                      The column width.
6846
   *                      </p>
6847
   * @param string $break [optional] <p>
6848
   *                      The line is broken using the optional
6849
   *                      break parameter.
6850
   *                      </p>
6851
   * @param bool   $cut   [optional] <p>
6852
   *                      If the cut is set to true, the string is
6853
   *                      always wrapped at or before the specified width. So if you have
6854
   *                      a word that is larger than the given width, it is broken apart.
6855
   *                      (See second example).
6856
   *                      </p>
6857
   *
6858
   * @return string the given string wrapped at the specified column.
6859
   * @since 4.0.2
6860
   * @since 5.0
6861
   */
6862
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6863
  {
6864
    $str = (string)$str;
6865
    $break = (string)$break;
6866
6867
    if (!isset($str[0], $break[0])) {
6868
      return '';
6869
    }
6870
6871
    $w = '';
6872
    $strSplit = explode($break, $str);
6873
    $count = count($strSplit);
6874
6875
    if (1 === $count && '' === $strSplit[0]) {
6876
      return '';
6877
    }
6878
6879
    $chars = array();
6880
    /** @noinspection ForeachInvariantsInspection */
6881
    for ($i = 0; $i < $count; ++$i) {
6882
6883
      if ($i) {
6884
        $chars[] = $break;
6885
        $w .= '#';
6886
      }
6887
6888
      $c = $strSplit[$i];
6889
      unset($strSplit[$i]);
6890
6891
      foreach (self::split($c) as $c) {
6892
        $chars[] = $c;
6893
        $w .= ' ' === $c ? ' ' : '?';
6894
      }
6895
    }
6896
6897
    $strReturn = '';
6898
    $j = 0;
6899
    $b = $i = -1;
6900
    $w = wordwrap($w, $width, '#', $cut);
6901
6902
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6903
      for (++$i; $i < $b; ++$i) {
6904
        $strReturn .= $chars[$j];
6905
        unset($chars[$j++]);
6906
      }
6907
6908
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6909
        unset($chars[$j++]);
6910
      }
6911
6912
      $strReturn .= $break;
6913
    }
6914
6915
    return $strReturn . implode('', $chars);
6916
  }
6917
6918
  /**
6919
   * Returns an array of Unicode White Space characters.
6920
   *
6921
   * @return   array An array with numeric code point as key and White Space Character as value.
6922
   */
6923
  public static function ws()
6924
  {
6925
    return self::$whitespace;
6926
  }
6927
6928
}
6929