Completed
Push — master ( 8adf69...a1d5e4 )
by Lars
05:56
created

UTF8::html_encode()   B

Complexity

Conditions 3
Paths 3

Size

Total Lines 24
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 3.0032

Importance

Changes 3
Bugs 0 Features 0
Metric Value
c 3
b 0
f 0
dl 0
loc 24
ccs 13
cts 14
cp 0.9286
rs 8.9713
cc 3
eloc 13
nc 3
nop 2
crap 3.0032
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Numeric code point => UTF-8 Character
83
   *
84
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
85
   *
86
   * @var array
87
   */
88
  protected static $whitespace = array(
89
    // NUL Byte
90
    0     => "\x0",
91
    // Tab
92
    9     => "\x9",
93
    // New Line
94
    10    => "\xa",
95
    // Vertical Tab
96
    11    => "\xb",
97
    // Carriage Return
98
    13    => "\xd",
99
    // Ordinary Space
100
    32    => "\x20",
101
    // NO-BREAK SPACE
102
    160   => "\xc2\xa0",
103
    // OGHAM SPACE MARK
104
    5760  => "\xe1\x9a\x80",
105
    // MONGOLIAN VOWEL SEPARATOR
106
    6158  => "\xe1\xa0\x8e",
107
    // EN QUAD
108
    8192  => "\xe2\x80\x80",
109
    // EM QUAD
110
    8193  => "\xe2\x80\x81",
111
    // EN SPACE
112
    8194  => "\xe2\x80\x82",
113
    // EM SPACE
114
    8195  => "\xe2\x80\x83",
115
    // THREE-PER-EM SPACE
116
    8196  => "\xe2\x80\x84",
117
    // FOUR-PER-EM SPACE
118
    8197  => "\xe2\x80\x85",
119
    // SIX-PER-EM SPACE
120
    8198  => "\xe2\x80\x86",
121
    // FIGURE SPACE
122
    8199  => "\xe2\x80\x87",
123
    // PUNCTUATION SPACE
124
    8200  => "\xe2\x80\x88",
125
    // THIN SPACE
126
    8201  => "\xe2\x80\x89",
127
    //HAIR SPACE
128
    8202  => "\xe2\x80\x8a",
129
    // LINE SEPARATOR
130
    8232  => "\xe2\x80\xa8",
131
    // PARAGRAPH SEPARATOR
132
    8233  => "\xe2\x80\xa9",
133
    // NARROW NO-BREAK SPACE
134
    8239  => "\xe2\x80\xaf",
135
    // MEDIUM MATHEMATICAL SPACE
136
    8287  => "\xe2\x81\x9f",
137
    // IDEOGRAPHIC SPACE
138
    12288 => "\xe3\x80\x80",
139
  );
140
141
  /**
142
   * @var array
143
   */
144
  protected static $whitespaceTable = array(
145
      'SPACE'                     => "\x20",
146
      'NO-BREAK SPACE'            => "\xc2\xa0",
147
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
148
      'EN QUAD'                   => "\xe2\x80\x80",
149
      'EM QUAD'                   => "\xe2\x80\x81",
150
      'EN SPACE'                  => "\xe2\x80\x82",
151
      'EM SPACE'                  => "\xe2\x80\x83",
152
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
153
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
154
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
155
      'FIGURE SPACE'              => "\xe2\x80\x87",
156
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
157
      'THIN SPACE'                => "\xe2\x80\x89",
158
      'HAIR SPACE'                => "\xe2\x80\x8a",
159
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
160
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
161
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
162
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
163
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
164
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
165
  );
166
167
  /**
168
   * bidirectional text chars
169
   *
170
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
171
   *
172
   * @var array
173
   */
174
  protected static $bidiUniCodeControlsTable = array(
175
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
176
    8234 => "\xE2\x80\xAA",
177
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
178
    8235 => "\xE2\x80\xAB",
179
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
180
    8236 => "\xE2\x80\xAC",
181
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
182
    8237 => "\xE2\x80\xAD",
183
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
184
    8238 => "\xE2\x80\xAE",
185
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
186
    8294 => "\xE2\x81\xA6",
187
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
188
    8295 => "\xE2\x81\xA7",
189
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
190
    8296 => "\xE2\x81\xA8",
191
    // POP DIRECTIONAL ISOLATE
192
    8297 => "\xE2\x81\xA9",
193
  );
194
195
  /**
196
   * @var array
197
   */
198
  protected static $commonCaseFold = array(
199
      'ſ'            => 's',
200
      "\xCD\x85"     => 'ι',
201
      'ς'            => 'σ',
202
      "\xCF\x90"     => 'β',
203
      "\xCF\x91"     => 'θ',
204
      "\xCF\x95"     => 'φ',
205
      "\xCF\x96"     => 'π',
206
      "\xCF\xB0"     => 'κ',
207
      "\xCF\xB1"     => 'ρ',
208
      "\xCF\xB5"     => 'ε',
209
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
210
      "\xE1\xBE\xBE" => 'ι',
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $brokenUtf8ToUtf8 = array(
217
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
218
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
219
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
220
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
221
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
222
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
223
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
224
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
225
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
226
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
227
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
228
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
229
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
230
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
231
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
232
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
233
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
234
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
235
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
236
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
237
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
238
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
239
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
240
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
241
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
242
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
243
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
244
      'ü'       => 'ü',
245
      'ä'       => 'ä',
246
      'ö'       => 'ö',
247
      'Ö'       => 'Ö',
248
      'ß'       => 'ß',
249
      'Ã '       => 'à',
250
      'á'       => 'á',
251
      'â'       => 'â',
252
      'ã'       => 'ã',
253
      'ù'       => 'ù',
254
      'ú'       => 'ú',
255
      'û'       => 'û',
256
      'Ù'       => 'Ù',
257
      'Ú'       => 'Ú',
258
      'Û'       => 'Û',
259
      'Ü'       => 'Ü',
260
      'ò'       => 'ò',
261
      'ó'       => 'ó',
262
      'ô'       => 'ô',
263
      'è'       => 'è',
264
      'é'       => 'é',
265
      'ê'       => 'ê',
266
      'ë'       => 'ë',
267
      'À'       => 'À',
268
      'Á'       => 'Á',
269
      'Â'       => 'Â',
270
      'Ã'       => 'Ã',
271
      'Ä'       => 'Ä',
272
      'Ã…'       => 'Å',
273
      'Ç'       => 'Ç',
274
      'È'       => 'È',
275
      'É'       => 'É',
276
      'Ê'       => 'Ê',
277
      'Ë'       => 'Ë',
278
      'ÃŒ'       => 'Ì',
279
      'Í'       => 'Í',
280
      'ÃŽ'       => 'Î',
281
      'Ï'       => 'Ï',
282
      'Ñ'       => 'Ñ',
283
      'Ã’'       => 'Ò',
284
      'Ó'       => 'Ó',
285
      'Ô'       => 'Ô',
286
      'Õ'       => 'Õ',
287
      'Ø'       => 'Ø',
288
      'Ã¥'       => 'å',
289
      'æ'       => 'æ',
290
      'ç'       => 'ç',
291
      'ì'       => 'ì',
292
      'í'       => 'í',
293
      'î'       => 'î',
294
      'ï'       => 'ï',
295
      'ð'       => 'ð',
296
      'ñ'       => 'ñ',
297
      'õ'       => 'õ',
298
      'ø'       => 'ø',
299
      'ý'       => 'ý',
300
      'ÿ'       => 'ÿ',
301
      '€'      => '€',
302
  );
303
304
  /**
305
   * @var array
306
   */
307
  protected static $utf8ToWin1252 = array(
308
      "\xe2\x82\xac" => "\x80", // EURO SIGN
309
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
310
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
311
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
312
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
313
      "\xe2\x80\xa0" => "\x86", // DAGGER
314
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
315
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
316
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
317
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
318
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
319
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
320
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
321
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
322
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
323
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
324
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
325
      "\xe2\x80\xa2" => "\x95", // BULLET
326
      "\xe2\x80\x93" => "\x96", // EN DASH
327
      "\xe2\x80\x94" => "\x97", // EM DASH
328
      "\xcb\x9c"     => "\x98", // SMALL TILDE
329
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
330
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
331
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
332
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
333
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
334
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
335
  );
336
337
  /**
338
   * @var array
339
   */
340
  protected static $utf8MSWord = array(
341
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
342
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
343
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
344
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
345
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
346
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
347
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
348
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
349
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
350
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
351
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
352
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
353
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
354
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
355
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
356
  );
357
358
  protected static $iconvEncoding = array(
359
      'ANSI_X3.4-1968',
360
      'ANSI_X3.4-1986',
361
      'ASCII',
362
      'CP367',
363
      'IBM367',
364
      'ISO-IR-6',
365
      'ISO646-US',
366
      'ISO_646.IRV:1991',
367
      'US',
368
      'US-ASCII',
369
      'CSASCII',
370
      'UTF-8',
371
      'ISO-10646-UCS-2',
372
      'UCS-2',
373
      'CSUNICODE',
374
      'UCS-2BE',
375
      'UNICODE-1-1',
376
      'UNICODEBIG',
377
      'CSUNICODE11',
378
      'UCS-2LE',
379
      'UNICODELITTLE',
380
      'ISO-10646-UCS-4',
381
      'UCS-4',
382
      'CSUCS4',
383
      'UCS-4BE',
384
      'UCS-4LE',
385
      'UTF-16',
386
      'UTF-16BE',
387
      'UTF-16LE',
388
      'UTF-32',
389
      'UTF-32BE',
390
      'UTF-32LE',
391
      'UNICODE-1-1-UTF-7',
392
      'UTF-7',
393
      'CSUNICODE11UTF7',
394
      'UCS-2-INTERNAL',
395
      'UCS-2-SWAPPED',
396
      'UCS-4-INTERNAL',
397
      'UCS-4-SWAPPED',
398
      'C99',
399
      'JAVA',
400
      'CP819',
401
      'IBM819',
402
      'ISO-8859-1',
403
      'ISO-IR-100',
404
      'ISO8859-1',
405
      'ISO_8859-1',
406
      'ISO_8859-1:1987',
407
      'L1',
408
      'LATIN1',
409
      'CSISOLATIN1',
410
      'ISO-8859-2',
411
      'ISO-IR-101',
412
      'ISO8859-2',
413
      'ISO_8859-2',
414
      'ISO_8859-2:1987',
415
      'L2',
416
      'LATIN2',
417
      'CSISOLATIN2',
418
      'ISO-8859-3',
419
      'ISO-IR-109',
420
      'ISO8859-3',
421
      'ISO_8859-3',
422
      'ISO_8859-3:1988',
423
      'L3',
424
      'LATIN3',
425
      'CSISOLATIN3',
426
      'ISO-8859-4',
427
      'ISO-IR-110',
428
      'ISO8859-4',
429
      'ISO_8859-4',
430
      'ISO_8859-4:1988',
431
      'L4',
432
      'LATIN4',
433
      'CSISOLATIN4',
434
      'CYRILLIC',
435
      'ISO-8859-5',
436
      'ISO-IR-144',
437
      'ISO8859-5',
438
      'ISO_8859-5',
439
      'ISO_8859-5:1988',
440
      'CSISOLATINCYRILLIC',
441
      'ARABIC',
442
      'ASMO-708',
443
      'ECMA-114',
444
      'ISO-8859-6',
445
      'ISO-IR-127',
446
      'ISO8859-6',
447
      'ISO_8859-6',
448
      'ISO_8859-6:1987',
449
      'CSISOLATINARABIC',
450
      'ECMA-118',
451
      'ELOT_928',
452
      'GREEK',
453
      'GREEK8',
454
      'ISO-8859-7',
455
      'ISO-IR-126',
456
      'ISO8859-7',
457
      'ISO_8859-7',
458
      'ISO_8859-7:1987',
459
      'ISO_8859-7:2003',
460
      'CSISOLATINGREEK',
461
      'HEBREW',
462
      'ISO-8859-8',
463
      'ISO-IR-138',
464
      'ISO8859-8',
465
      'ISO_8859-8',
466
      'ISO_8859-8:1988',
467
      'CSISOLATINHEBREW',
468
      'ISO-8859-9',
469
      'ISO-IR-148',
470
      'ISO8859-9',
471
      'ISO_8859-9',
472
      'ISO_8859-9:1989',
473
      'L5',
474
      'LATIN5',
475
      'CSISOLATIN5',
476
      'ISO-8859-10',
477
      'ISO-IR-157',
478
      'ISO8859-10',
479
      'ISO_8859-10',
480
      'ISO_8859-10:1992',
481
      'L6',
482
      'LATIN6',
483
      'CSISOLATIN6',
484
      'ISO-8859-11',
485
      'ISO8859-11',
486
      'ISO_8859-11',
487
      'ISO-8859-13',
488
      'ISO-IR-179',
489
      'ISO8859-13',
490
      'ISO_8859-13',
491
      'L7',
492
      'LATIN7',
493
      'ISO-8859-14',
494
      'ISO-CELTIC',
495
      'ISO-IR-199',
496
      'ISO8859-14',
497
      'ISO_8859-14',
498
      'ISO_8859-14:1998',
499
      'L8',
500
      'LATIN8',
501
      'ISO-8859-15',
502
      'ISO-IR-203',
503
      'ISO8859-15',
504
      'ISO_8859-15',
505
      'ISO_8859-15:1998',
506
      'LATIN-9',
507
      'ISO-8859-16',
508
      'ISO-IR-226',
509
      'ISO8859-16',
510
      'ISO_8859-16',
511
      'ISO_8859-16:2001',
512
      'L10',
513
      'LATIN10',
514
      'KOI8-R',
515
      'CSKOI8R',
516
      'KOI8-U',
517
      'KOI8-RU',
518
      'CP1250',
519
      'MS-EE',
520
      'WINDOWS-1250',
521
      'CP1251',
522
      'MS-CYRL',
523
      'WINDOWS-1251',
524
      'CP1252',
525
      'MS-ANSI',
526
      'WINDOWS-1252',
527
      'CP1253',
528
      'MS-GREEK',
529
      'WINDOWS-1253',
530
      'CP1254',
531
      'MS-TURK',
532
      'WINDOWS-1254',
533
      'CP1255',
534
      'MS-HEBR',
535
      'WINDOWS-1255',
536
      'CP1256',
537
      'MS-ARAB',
538
      'WINDOWS-1256',
539
      'CP1257',
540
      'WINBALTRIM',
541
      'WINDOWS-1257',
542
      'CP1258',
543
      'WINDOWS-1258',
544
      '850',
545
      'CP850',
546
      'IBM850',
547
      'CSPC850MULTILINGUAL',
548
      '862',
549
      'CP862',
550
      'IBM862',
551
      'CSPC862LATINHEBREW',
552
      '866',
553
      'CP866',
554
      'IBM866',
555
      'CSIBM866',
556
      'MAC',
557
      'MACINTOSH',
558
      'MACROMAN',
559
      'CSMACINTOSH',
560
      'MACCENTRALEUROPE',
561
      'MACICELAND',
562
      'MACCROATIAN',
563
      'MACROMANIA',
564
      'MACCYRILLIC',
565
      'MACUKRAINE',
566
      'MACGREEK',
567
      'MACTURKISH',
568
      'MACHEBREW',
569
      'MACARABIC',
570
      'MACTHAI',
571
      'HP-ROMAN8',
572
      'R8',
573
      'ROMAN8',
574
      'CSHPROMAN8',
575
      'NEXTSTEP',
576
      'ARMSCII-8',
577
      'GEORGIAN-ACADEMY',
578
      'GEORGIAN-PS',
579
      'KOI8-T',
580
      'CP154',
581
      'CYRILLIC-ASIAN',
582
      'PT154',
583
      'PTCP154',
584
      'CSPTCP154',
585
      'KZ-1048',
586
      'RK1048',
587
      'STRK1048-2002',
588
      'CSKZ1048',
589
      'MULELAO-1',
590
      'CP1133',
591
      'IBM-CP1133',
592
      'ISO-IR-166',
593
      'TIS-620',
594
      'TIS620',
595
      'TIS620-0',
596
      'TIS620.2529-1',
597
      'TIS620.2533-0',
598
      'TIS620.2533-1',
599
      'CP874',
600
      'WINDOWS-874',
601
      'VISCII',
602
      'VISCII1.1-1',
603
      'CSVISCII',
604
      'TCVN',
605
      'TCVN-5712',
606
      'TCVN5712-1',
607
      'TCVN5712-1:1993',
608
      'ISO-IR-14',
609
      'ISO646-JP',
610
      'JIS_C6220-1969-RO',
611
      'JP',
612
      'CSISO14JISC6220RO',
613
      'JISX0201-1976',
614
      'JIS_X0201',
615
      'X0201',
616
      'CSHALFWIDTHKATAKANA',
617
      'ISO-IR-87',
618
      'JIS0208',
619
      'JIS_C6226-1983',
620
      'JIS_X0208',
621
      'JIS_X0208-1983',
622
      'JIS_X0208-1990',
623
      'X0208',
624
      'CSISO87JISX0208',
625
      'ISO-IR-159',
626
      'JIS_X0212',
627
      'JIS_X0212-1990',
628
      'JIS_X0212.1990-0',
629
      'X0212',
630
      'CSISO159JISX02121990',
631
      'CN',
632
      'GB_1988-80',
633
      'ISO-IR-57',
634
      'ISO646-CN',
635
      'CSISO57GB1988',
636
      'CHINESE',
637
      'GB_2312-80',
638
      'ISO-IR-58',
639
      'CSISO58GB231280',
640
      'CN-GB-ISOIR165',
641
      'ISO-IR-165',
642
      'ISO-IR-149',
643
      'KOREAN',
644
      'KSC_5601',
645
      'KS_C_5601-1987',
646
      'KS_C_5601-1989',
647
      'CSKSC56011987',
648
      'EUC-JP',
649
      'EUCJP',
650
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
651
      'CSEUCPKDFMTJAPANESE',
652
      'MS_KANJI',
653
      'SHIFT-JIS',
654
      'SHIFT_JIS',
655
      'SJIS',
656
      'CSSHIFTJIS',
657
      'CP932',
658
      'ISO-2022-JP',
659
      'CSISO2022JP',
660
      'ISO-2022-JP-1',
661
      'ISO-2022-JP-2',
662
      'CSISO2022JP2',
663
      'CN-GB',
664
      'EUC-CN',
665
      'EUCCN',
666
      'GB2312',
667
      'CSGB2312',
668
      'GBK',
669
      'CP936',
670
      'MS936',
671
      'WINDOWS-936',
672
      'GB18030',
673
      'ISO-2022-CN',
674
      'CSISO2022CN',
675
      'ISO-2022-CN-EXT',
676
      'HZ',
677
      'HZ-GB-2312',
678
      'EUC-TW',
679
      'EUCTW',
680
      'CSEUCTW',
681
      'BIG-5',
682
      'BIG-FIVE',
683
      'BIG5',
684
      'BIGFIVE',
685
      'CN-BIG5',
686
      'CSBIG5',
687
      'CP950',
688
      'BIG5-HKSCS:1999',
689
      'BIG5-HKSCS:2001',
690
      'BIG5-HKSCS',
691
      'BIG5-HKSCS:2004',
692
      'BIG5HKSCS',
693
      'EUC-KR',
694
      'EUCKR',
695
      'CSEUCKR',
696
      'CP949',
697
      'UHC',
698
      'CP1361',
699
      'JOHAB',
700
      'ISO-2022-KR',
701
      'CSISO2022KR',
702
      'CP856',
703
      'CP922',
704
      'CP943',
705
      'CP1046',
706
      'CP1124',
707
      'CP1129',
708
      'CP1161',
709
      'IBM-1161',
710
      'IBM1161',
711
      'CSIBM1161',
712
      'CP1162',
713
      'IBM-1162',
714
      'IBM1162',
715
      'CSIBM1162',
716
      'CP1163',
717
      'IBM-1163',
718
      'IBM1163',
719
      'CSIBM1163',
720
      'DEC-KANJI',
721
      'DEC-HANYU',
722
      '437',
723
      'CP437',
724
      'IBM437',
725
      'CSPC8CODEPAGE437',
726
      'CP737',
727
      'CP775',
728
      'IBM775',
729
      'CSPC775BALTIC',
730
      '852',
731
      'CP852',
732
      'IBM852',
733
      'CSPCP852',
734
      'CP853',
735
      '855',
736
      'CP855',
737
      'IBM855',
738
      'CSIBM855',
739
      '857',
740
      'CP857',
741
      'IBM857',
742
      'CSIBM857',
743
      'CP858',
744
      '860',
745
      'CP860',
746
      'IBM860',
747
      'CSIBM860',
748
      '861',
749
      'CP-IS',
750
      'CP861',
751
      'IBM861',
752
      'CSIBM861',
753
      '863',
754
      'CP863',
755
      'IBM863',
756
      'CSIBM863',
757
      'CP864',
758
      'IBM864',
759
      'CSIBM864',
760
      '865',
761
      'CP865',
762
      'IBM865',
763
      'CSIBM865',
764
      '869',
765
      'CP-GR',
766
      'CP869',
767
      'IBM869',
768
      'CSIBM869',
769
      'CP1125',
770
      'EUC-JISX0213',
771
      'SHIFT_JISX0213',
772
      'ISO-2022-JP-3',
773
      'BIG5-2003',
774
      'ISO-IR-230',
775
      'TDS565',
776
      'ATARI',
777
      'ATARIST',
778
      'RISCOS-LATIN1',
779
  );
780
781
  /**
782
   * @var array
783
   */
784
  private static $support = array();
785
786
  /**
787
   * __construct()
788
   */
789
  public function __construct()
790 1
  {
791
    self::checkForSupport();
792 1
  }
793 1
794
  /**
795
   * Returns a single UTF-8 character from string.
796
   *
797
   * @param    string $str A UTF-8 string.
798
   * @param    int    $pos The position of character to return.
799
   *
800
   * @return   string Single Multi-Byte character.
801
   */
802
  public static function access($str, $pos)
803 1
  {
804
    // Return the character at the specified position: $str[1] like functionality.
805
806
    return self::substr($str, $pos, 1);
807 1
  }
808
809
  /**
810
   * Prepends BOM character to the string and returns the whole string.
811
   *
812
   * INFO: If BOM already existed there, the Input string is returned.
813
   *
814
   * @param    string $str The input string
815
   *
816
   * @return   string The output string that contains BOM
817
   */
818
  public static function add_bom_to_string($str)
819
  {
820
    if (!self::is_bom(substr($str, 0, 3))) {
821
      $str = self::bom() . $str;
822
    }
823
824
    return $str;
825
  }
826
827
  /**
828
   * Returns the Byte Order Mark Character.
829
   *
830
   * @return   string Byte Order Mark
831
   */
832
  public static function bom()
833 2
  {
834
    return "\xEF\xBB\xBF";
835 2
  }
836
837
  /**
838
   * @alias of UTF8::chr_map()
839
   *
840
   * @param $callback
841
   * @param $str
842
   *
843
   * @return array
844
   */
845
  public static function callback($callback, $str)
846 1
  {
847
    return self::chr_map($callback, $str);
848 1
  }
849
850
  /**
851
   * Returns an array of all lower and upper case UTF-8 encoded characters.
852
   *
853
   * @return   string An array with lower case chars as keys and upper chars as values.
854
   */
855
  protected static function case_table()
856
  {
857
    static $case = array(
858
859
      // lower => upper
860
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
861
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
862
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
863
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
864
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
865
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
866
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
867
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
868
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
869
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
870
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
871
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
872
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
873
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
874
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
875
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
876
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
877
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
878
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
879
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
880
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
881
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
882
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
883
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
884
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
885
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
886
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
887
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
888
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
889
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
890
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
891
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
892
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
893
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
894
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
895
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
896
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
897
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
898
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
899
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
900
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
901
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
902
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
903
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
904
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
905
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
906
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
907
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
908
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
909
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
910
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
911
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
912
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
913
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
914
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
915
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
916
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
917
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
918
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
919
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
920
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
921
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
922
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
923
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
924
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
925
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
926
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
927
      "\xea\x9e\x87"     => "\xea\x9e\x86",
928
      "\xea\x9e\x85"     => "\xea\x9e\x84",
929
      "\xea\x9e\x83"     => "\xea\x9e\x82",
930
      "\xea\x9e\x81"     => "\xea\x9e\x80",
931
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
932
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
933
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
934
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
935
      "\xea\x9d\xad"     => "\xea\x9d\xac",
936
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
937
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
938
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
939
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
940
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
941
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
942
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
943
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
944
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
945
      "\xea\x9d\x99"     => "\xea\x9d\x98",
946
      "\xea\x9d\x97"     => "\xea\x9d\x96",
947
      "\xea\x9d\x95"     => "\xea\x9d\x94",
948
      "\xea\x9d\x93"     => "\xea\x9d\x92",
949
      "\xea\x9d\x91"     => "\xea\x9d\x90",
950
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
951
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
952
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
953
      "\xea\x9d\x89"     => "\xea\x9d\x88",
954
      "\xea\x9d\x87"     => "\xea\x9d\x86",
955
      "\xea\x9d\x85"     => "\xea\x9d\x84",
956
      "\xea\x9d\x83"     => "\xea\x9d\x82",
957
      "\xea\x9d\x81"     => "\xea\x9d\x80",
958
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
959
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
960
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
961
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
962
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
963
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
964
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
965
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
966
      "\xea\x9c\xad"     => "\xea\x9c\xac",
967
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
968
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
969
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
970
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
971
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
972
      "\xea\x9a\x97"     => "\xea\x9a\x96",
973
      "\xea\x9a\x95"     => "\xea\x9a\x94",
974
      "\xea\x9a\x93"     => "\xea\x9a\x92",
975
      "\xea\x9a\x91"     => "\xea\x9a\x90",
976
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
977
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
978
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
979
      "\xea\x9a\x89"     => "\xea\x9a\x88",
980
      "\xea\x9a\x87"     => "\xea\x9a\x86",
981
      "\xea\x9a\x85"     => "\xea\x9a\x84",
982
      "\xea\x9a\x83"     => "\xea\x9a\x82",
983
      "\xea\x9a\x81"     => "\xea\x9a\x80",
984
      "\xea\x99\xad"     => "\xea\x99\xac",
985
      "\xea\x99\xab"     => "\xea\x99\xaa",
986
      "\xea\x99\xa9"     => "\xea\x99\xa8",
987
      "\xea\x99\xa7"     => "\xea\x99\xa6",
988
      "\xea\x99\xa5"     => "\xea\x99\xa4",
989
      "\xea\x99\xa3"     => "\xea\x99\xa2",
990
      "\xea\x99\x9f"     => "\xea\x99\x9e",
991
      "\xea\x99\x9d"     => "\xea\x99\x9c",
992
      "\xea\x99\x9b"     => "\xea\x99\x9a",
993
      "\xea\x99\x99"     => "\xea\x99\x98",
994
      "\xea\x99\x97"     => "\xea\x99\x96",
995
      "\xea\x99\x95"     => "\xea\x99\x94",
996
      "\xea\x99\x93"     => "\xea\x99\x92",
997
      "\xea\x99\x91"     => "\xea\x99\x90",
998
      "\xea\x99\x8f"     => "\xea\x99\x8e",
999
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1000
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1001
      "\xea\x99\x89"     => "\xea\x99\x88",
1002
      "\xea\x99\x87"     => "\xea\x99\x86",
1003
      "\xea\x99\x85"     => "\xea\x99\x84",
1004
      "\xea\x99\x83"     => "\xea\x99\x82",
1005
      "\xea\x99\x81"     => "\xea\x99\x80",
1006
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1007
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1008
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1009
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1010
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1011
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1012
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1013
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1014
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1015
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1016
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1017
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1018
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1019
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1020
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1021
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1022
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1023
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1024
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1025
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1026
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1027
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1028
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1029
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1030
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1031
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1032
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1033
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1034
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1035
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1036
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1037
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1038
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1039
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1040
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1041
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1042
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1043
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1044
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1045
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1046
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1047
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1048
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1049
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1050
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1051
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1052
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1053
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1054
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1055
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1056
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1057
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1058
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1059
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1060
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1061
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1062
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1063
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1064
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1065
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1066
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1067
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1068
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1069
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1070
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1071
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1072
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1073
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1074
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1075
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1076
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1077
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1078
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1079
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1080
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1081
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1082
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1083
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1084
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1085
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1086
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1087
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1088
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1089
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1090
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1091
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1092
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1093
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1094
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1095
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1096
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1097
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1098
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1099
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1100
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1101
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1102
      "\xe2\xb1\xa5"     => "\xc8\xba",
1103
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1104
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1105
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1106
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1107
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1108
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1109
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1110
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1111
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1112
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1113
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1114
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1115
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1116
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1117
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1118
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1119
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1120
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1121
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1122
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1123
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1124
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1125
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1126
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1127
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1128
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1129
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1130
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1131
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1132
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1133
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1134
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1135
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1136
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1137
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1138
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1139
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1140
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1141
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1142
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1143
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1144
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1145
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1146
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1147
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1148
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1149
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1150
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1151
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1152
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1153
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1154
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1155
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1156
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1157
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1158
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1159
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1160
      "\xe1\xbe\xbe"     => "\xce\x99",
1161
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1162
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1163
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1164
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1165
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1166
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1167
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1168
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1169
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1170
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1171
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1172
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1173
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1174
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1175
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1176
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1177
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1178
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1179
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1180
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1181
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1182
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1183
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1184
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1185
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1186
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1187
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1188
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1189
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1190
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1191
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1192
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1193
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1194
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1195
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1196
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1197
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1198
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1199
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1200
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1201
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1202
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1203
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1204
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1205
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1206
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1207
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1208
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1209
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1210
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1211
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1212
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1213
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1214
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1215
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1216
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1217
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1218
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1219
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1220
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1221
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1222
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1223
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1224
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1225
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1226
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1227
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1228
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1229
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1230
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1231
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1232
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1233
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1234
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1235
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1236
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1237
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1238
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1239
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1240
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1241
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1242
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1243
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1244
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1245
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1246
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1247
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1248
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1249
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1250
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1251
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1252
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1253
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1254
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1255
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1256
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1257
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1258
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1259
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1260
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1261
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1262
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1263
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1264
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1265
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1266
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1267
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1268
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1269
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1270
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1271
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1272
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1273
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1274
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1275
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1276
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1277
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1278
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1279
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1280
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1281
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1282
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1283
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1284
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1285
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1286
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1287
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1288
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1289
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1290
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1291
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1292
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1293
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1294
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1295
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1296
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1297
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1298
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1299
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1300
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1301
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1302
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1303
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1304
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1305
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1306
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1307
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1308
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1309
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1310
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1311
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1312
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1313
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1314
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1315
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1316
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1317
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1318
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1319
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1320
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1321
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1322
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1323
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1324
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1325
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1326
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1327
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1328
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1329
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1330
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1331
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1332
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1333
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1334
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1335
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1336
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1337
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1338
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1339
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1340
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1341
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1342
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1343
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1344
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1345
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1346
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1347
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1348
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1349
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1350
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1351
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1352
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1353
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1354
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1355
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1356
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1357
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1358
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1359
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1360
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1361
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1362
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1363
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1364
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1365
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1366
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1367
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1368
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1369
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1370
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1371
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1372
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1373
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1374
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1375
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1376
      "\xd6\x86"         => "\xd5\x96",
1377
      "\xd6\x85"         => "\xd5\x95",
1378
      "\xd6\x84"         => "\xd5\x94",
1379
      "\xd6\x83"         => "\xd5\x93",
1380
      "\xd6\x82"         => "\xd5\x92",
1381
      "\xd6\x81"         => "\xd5\x91",
1382
      "\xd6\x80"         => "\xd5\x90",
1383
      "\xd5\xbf"         => "\xd5\x8f",
1384
      "\xd5\xbe"         => "\xd5\x8e",
1385
      "\xd5\xbd"         => "\xd5\x8d",
1386
      "\xd5\xbc"         => "\xd5\x8c",
1387
      "\xd5\xbb"         => "\xd5\x8b",
1388
      "\xd5\xba"         => "\xd5\x8a",
1389
      "\xd5\xb9"         => "\xd5\x89",
1390
      "\xd5\xb8"         => "\xd5\x88",
1391
      "\xd5\xb7"         => "\xd5\x87",
1392
      "\xd5\xb6"         => "\xd5\x86",
1393
      "\xd5\xb5"         => "\xd5\x85",
1394
      "\xd5\xb4"         => "\xd5\x84",
1395
      "\xd5\xb3"         => "\xd5\x83",
1396
      "\xd5\xb2"         => "\xd5\x82",
1397
      "\xd5\xb1"         => "\xd5\x81",
1398
      "\xd5\xb0"         => "\xd5\x80",
1399
      "\xd5\xaf"         => "\xd4\xbf",
1400
      "\xd5\xae"         => "\xd4\xbe",
1401
      "\xd5\xad"         => "\xd4\xbd",
1402
      "\xd5\xac"         => "\xd4\xbc",
1403
      "\xd5\xab"         => "\xd4\xbb",
1404
      "\xd5\xaa"         => "\xd4\xba",
1405
      "\xd5\xa9"         => "\xd4\xb9",
1406
      "\xd5\xa8"         => "\xd4\xb8",
1407
      "\xd5\xa7"         => "\xd4\xb7",
1408
      "\xd5\xa6"         => "\xd4\xb6",
1409
      "\xd5\xa5"         => "\xd4\xb5",
1410
      "\xd5\xa4"         => "\xd4\xb4",
1411
      "\xd5\xa3"         => "\xd4\xb3",
1412
      "\xd5\xa2"         => "\xd4\xb2",
1413
      "\xd5\xa1"         => "\xd4\xb1",
1414
      "\xd4\xa5"         => "\xd4\xa4",
1415
      "\xd4\xa3"         => "\xd4\xa2",
1416
      "\xd4\xa1"         => "\xd4\xa0",
1417
      "\xd4\x9f"         => "\xd4\x9e",
1418
      "\xd4\x9d"         => "\xd4\x9c",
1419
      "\xd4\x9b"         => "\xd4\x9a",
1420
      "\xd4\x99"         => "\xd4\x98",
1421
      "\xd4\x97"         => "\xd4\x96",
1422
      "\xd4\x95"         => "\xd4\x94",
1423
      "\xd4\x93"         => "\xd4\x92",
1424
      "\xd4\x91"         => "\xd4\x90",
1425
      "\xd4\x8f"         => "\xd4\x8e",
1426
      "\xd4\x8d"         => "\xd4\x8c",
1427
      "\xd4\x8b"         => "\xd4\x8a",
1428
      "\xd4\x89"         => "\xd4\x88",
1429
      "\xd4\x87"         => "\xd4\x86",
1430
      "\xd4\x85"         => "\xd4\x84",
1431
      "\xd4\x83"         => "\xd4\x82",
1432
      "\xd4\x81"         => "\xd4\x80",
1433
      "\xd3\xbf"         => "\xd3\xbe",
1434
      "\xd3\xbd"         => "\xd3\xbc",
1435
      "\xd3\xbb"         => "\xd3\xba",
1436
      "\xd3\xb9"         => "\xd3\xb8",
1437
      "\xd3\xb7"         => "\xd3\xb6",
1438
      "\xd3\xb5"         => "\xd3\xb4",
1439
      "\xd3\xb3"         => "\xd3\xb2",
1440
      "\xd3\xb1"         => "\xd3\xb0",
1441
      "\xd3\xaf"         => "\xd3\xae",
1442
      "\xd3\xad"         => "\xd3\xac",
1443
      "\xd3\xab"         => "\xd3\xaa",
1444
      "\xd3\xa9"         => "\xd3\xa8",
1445
      "\xd3\xa7"         => "\xd3\xa6",
1446
      "\xd3\xa5"         => "\xd3\xa4",
1447
      "\xd3\xa3"         => "\xd3\xa2",
1448
      "\xd3\xa1"         => "\xd3\xa0",
1449
      "\xd3\x9f"         => "\xd3\x9e",
1450
      "\xd3\x9d"         => "\xd3\x9c",
1451
      "\xd3\x9b"         => "\xd3\x9a",
1452
      "\xd3\x99"         => "\xd3\x98",
1453
      "\xd3\x97"         => "\xd3\x96",
1454
      "\xd3\x95"         => "\xd3\x94",
1455
      "\xd3\x93"         => "\xd3\x92",
1456
      "\xd3\x91"         => "\xd3\x90",
1457
      "\xd3\x8f"         => "\xd3\x80",
1458
      "\xd3\x8e"         => "\xd3\x8d",
1459
      "\xd3\x8c"         => "\xd3\x8b",
1460
      "\xd3\x8a"         => "\xd3\x89",
1461
      "\xd3\x88"         => "\xd3\x87",
1462
      "\xd3\x86"         => "\xd3\x85",
1463
      "\xd3\x84"         => "\xd3\x83",
1464
      "\xd3\x82"         => "\xd3\x81",
1465
      "\xd2\xbf"         => "\xd2\xbe",
1466
      "\xd2\xbd"         => "\xd2\xbc",
1467
      "\xd2\xbb"         => "\xd2\xba",
1468
      "\xd2\xb9"         => "\xd2\xb8",
1469
      "\xd2\xb7"         => "\xd2\xb6",
1470
      "\xd2\xb5"         => "\xd2\xb4",
1471
      "\xd2\xb3"         => "\xd2\xb2",
1472
      "\xd2\xb1"         => "\xd2\xb0",
1473
      "\xd2\xaf"         => "\xd2\xae",
1474
      "\xd2\xad"         => "\xd2\xac",
1475
      "\xd2\xab"         => "\xd2\xaa",
1476
      "\xd2\xa9"         => "\xd2\xa8",
1477
      "\xd2\xa7"         => "\xd2\xa6",
1478
      "\xd2\xa5"         => "\xd2\xa4",
1479
      "\xd2\xa3"         => "\xd2\xa2",
1480
      "\xd2\xa1"         => "\xd2\xa0",
1481
      "\xd2\x9f"         => "\xd2\x9e",
1482
      "\xd2\x9d"         => "\xd2\x9c",
1483
      "\xd2\x9b"         => "\xd2\x9a",
1484
      "\xd2\x99"         => "\xd2\x98",
1485
      "\xd2\x97"         => "\xd2\x96",
1486
      "\xd2\x95"         => "\xd2\x94",
1487
      "\xd2\x93"         => "\xd2\x92",
1488
      "\xd2\x91"         => "\xd2\x90",
1489
      "\xd2\x8f"         => "\xd2\x8e",
1490
      "\xd2\x8d"         => "\xd2\x8c",
1491
      "\xd2\x8b"         => "\xd2\x8a",
1492
      "\xd2\x81"         => "\xd2\x80",
1493
      "\xd1\xbf"         => "\xd1\xbe",
1494
      "\xd1\xbd"         => "\xd1\xbc",
1495
      "\xd1\xbb"         => "\xd1\xba",
1496
      "\xd1\xb9"         => "\xd1\xb8",
1497
      "\xd1\xb7"         => "\xd1\xb6",
1498
      "\xd1\xb5"         => "\xd1\xb4",
1499
      "\xd1\xb3"         => "\xd1\xb2",
1500
      "\xd1\xb1"         => "\xd1\xb0",
1501
      "\xd1\xaf"         => "\xd1\xae",
1502
      "\xd1\xad"         => "\xd1\xac",
1503
      "\xd1\xab"         => "\xd1\xaa",
1504
      "\xd1\xa9"         => "\xd1\xa8",
1505
      "\xd1\xa7"         => "\xd1\xa6",
1506
      "\xd1\xa5"         => "\xd1\xa4",
1507
      "\xd1\xa3"         => "\xd1\xa2",
1508
      "\xd1\xa1"         => "\xd1\xa0",
1509
      "\xd1\x9f"         => "\xd0\x8f",
1510
      "\xd1\x9e"         => "\xd0\x8e",
1511
      "\xd1\x9d"         => "\xd0\x8d",
1512
      "\xd1\x9c"         => "\xd0\x8c",
1513
      "\xd1\x9b"         => "\xd0\x8b",
1514
      "\xd1\x9a"         => "\xd0\x8a",
1515
      "\xd1\x99"         => "\xd0\x89",
1516
      "\xd1\x98"         => "\xd0\x88",
1517
      "\xd1\x97"         => "\xd0\x87",
1518
      "\xd1\x96"         => "\xd0\x86",
1519
      "\xd1\x95"         => "\xd0\x85",
1520
      "\xd1\x94"         => "\xd0\x84",
1521
      "\xd1\x93"         => "\xd0\x83",
1522
      "\xd1\x92"         => "\xd0\x82",
1523
      "\xd1\x91"         => "\xd0\x81",
1524
      "\xd1\x90"         => "\xd0\x80",
1525
      "\xd1\x8f"         => "\xd0\xaf",
1526
      "\xd1\x8e"         => "\xd0\xae",
1527
      "\xd1\x8d"         => "\xd0\xad",
1528
      "\xd1\x8c"         => "\xd0\xac",
1529
      "\xd1\x8b"         => "\xd0\xab",
1530
      "\xd1\x8a"         => "\xd0\xaa",
1531
      "\xd1\x89"         => "\xd0\xa9",
1532
      "\xd1\x88"         => "\xd0\xa8",
1533
      "\xd1\x87"         => "\xd0\xa7",
1534
      "\xd1\x86"         => "\xd0\xa6",
1535
      "\xd1\x85"         => "\xd0\xa5",
1536
      "\xd1\x84"         => "\xd0\xa4",
1537
      "\xd1\x83"         => "\xd0\xa3",
1538
      "\xd1\x82"         => "\xd0\xa2",
1539
      "\xd1\x81"         => "\xd0\xa1",
1540
      "\xd1\x80"         => "\xd0\xa0",
1541
      "\xd0\xbf"         => "\xd0\x9f",
1542
      "\xd0\xbe"         => "\xd0\x9e",
1543
      "\xd0\xbd"         => "\xd0\x9d",
1544
      "\xd0\xbc"         => "\xd0\x9c",
1545
      "\xd0\xbb"         => "\xd0\x9b",
1546
      "\xd0\xba"         => "\xd0\x9a",
1547
      "\xd0\xb9"         => "\xd0\x99",
1548
      "\xd0\xb8"         => "\xd0\x98",
1549
      "\xd0\xb7"         => "\xd0\x97",
1550
      "\xd0\xb6"         => "\xd0\x96",
1551
      "\xd0\xb5"         => "\xd0\x95",
1552
      "\xd0\xb4"         => "\xd0\x94",
1553
      "\xd0\xb3"         => "\xd0\x93",
1554
      "\xd0\xb2"         => "\xd0\x92",
1555
      "\xd0\xb1"         => "\xd0\x91",
1556
      "\xd0\xb0"         => "\xd0\x90",
1557
      "\xcf\xbb"         => "\xcf\xba",
1558
      "\xcf\xb8"         => "\xcf\xb7",
1559
      "\xcf\xb5"         => "\xce\x95",
1560
      "\xcf\xb2"         => "\xcf\xb9",
1561
      "\xcf\xb1"         => "\xce\xa1",
1562
      "\xcf\xb0"         => "\xce\x9a",
1563
      "\xcf\xaf"         => "\xcf\xae",
1564
      "\xcf\xad"         => "\xcf\xac",
1565
      "\xcf\xab"         => "\xcf\xaa",
1566
      "\xcf\xa9"         => "\xcf\xa8",
1567
      "\xcf\xa7"         => "\xcf\xa6",
1568
      "\xcf\xa5"         => "\xcf\xa4",
1569
      "\xcf\xa3"         => "\xcf\xa2",
1570
      "\xcf\xa1"         => "\xcf\xa0",
1571
      "\xcf\x9f"         => "\xcf\x9e",
1572
      "\xcf\x9d"         => "\xcf\x9c",
1573
      "\xcf\x9b"         => "\xcf\x9a",
1574
      "\xcf\x99"         => "\xcf\x98",
1575
      "\xcf\x97"         => "\xcf\x8f",
1576
      "\xcf\x96"         => "\xce\xa0",
1577
      "\xcf\x95"         => "\xce\xa6",
1578
      "\xcf\x91"         => "\xce\x98",
1579
      "\xcf\x90"         => "\xce\x92",
1580
      "\xcf\x8e"         => "\xce\x8f",
1581
      "\xcf\x8d"         => "\xce\x8e",
1582
      "\xcf\x8c"         => "\xce\x8c",
1583
      "\xcf\x8b"         => "\xce\xab",
1584
      "\xcf\x8a"         => "\xce\xaa",
1585
      "\xcf\x89"         => "\xce\xa9",
1586
      "\xcf\x88"         => "\xce\xa8",
1587
      "\xcf\x87"         => "\xce\xa7",
1588
      "\xcf\x86"         => "\xce\xa6",
1589
      "\xcf\x85"         => "\xce\xa5",
1590
      "\xcf\x84"         => "\xce\xa4",
1591
      "\xcf\x83"         => "\xce\xa3",
1592
      "\xcf\x82"         => "\xce\xa3",
1593
      "\xcf\x81"         => "\xce\xa1",
1594
      "\xcf\x80"         => "\xce\xa0",
1595
      "\xce\xbf"         => "\xce\x9f",
1596
      "\xce\xbe"         => "\xce\x9e",
1597
      "\xce\xbd"         => "\xce\x9d",
1598
      "\xce\xbc"         => "\xce\x9c",
1599
      "\xce\xbb"         => "\xce\x9b",
1600
      "\xce\xba"         => "\xce\x9a",
1601
      "\xce\xb9"         => "\xce\x99",
1602
      "\xce\xb8"         => "\xce\x98",
1603
      "\xce\xb7"         => "\xce\x97",
1604
      "\xce\xb6"         => "\xce\x96",
1605
      "\xce\xb5"         => "\xce\x95",
1606
      "\xce\xb4"         => "\xce\x94",
1607
      "\xce\xb3"         => "\xce\x93",
1608
      "\xce\xb2"         => "\xce\x92",
1609
      "\xce\xb1"         => "\xce\x91",
1610
      "\xce\xaf"         => "\xce\x8a",
1611
      "\xce\xae"         => "\xce\x89",
1612
      "\xce\xad"         => "\xce\x88",
1613
      "\xce\xac"         => "\xce\x86",
1614
      "\xcd\xbd"         => "\xcf\xbf",
1615
      "\xcd\xbc"         => "\xcf\xbe",
1616
      "\xcd\xbb"         => "\xcf\xbd",
1617
      "\xcd\xb7"         => "\xcd\xb6",
1618
      "\xcd\xb3"         => "\xcd\xb2",
1619
      "\xcd\xb1"         => "\xcd\xb0",
1620
      "\xca\x92"         => "\xc6\xb7",
1621
      "\xca\x8c"         => "\xc9\x85",
1622
      "\xca\x8b"         => "\xc6\xb2",
1623
      "\xca\x8a"         => "\xc6\xb1",
1624
      "\xca\x89"         => "\xc9\x84",
1625
      "\xca\x88"         => "\xc6\xae",
1626
      "\xca\x83"         => "\xc6\xa9",
1627
      "\xca\x80"         => "\xc6\xa6",
1628
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1629
      "\xc9\xb5"         => "\xc6\x9f",
1630
      "\xc9\xb2"         => "\xc6\x9d",
1631
      "\xc9\xb1"         => "\xe2\xb1\xae",
1632
      "\xc9\xaf"         => "\xc6\x9c",
1633
      "\xc9\xab"         => "\xe2\xb1\xa2",
1634
      "\xc9\xa9"         => "\xc6\x96",
1635
      "\xc9\xa8"         => "\xc6\x97",
1636
      "\xc9\xa5"         => "\xea\x9e\x8d",
1637
      "\xc9\xa3"         => "\xc6\x94",
1638
      "\xc9\xa0"         => "\xc6\x93",
1639
      "\xc9\x9b"         => "\xc6\x90",
1640
      "\xc9\x99"         => "\xc6\x8f",
1641
      "\xc9\x97"         => "\xc6\x8a",
1642
      "\xc9\x96"         => "\xc6\x89",
1643
      "\xc9\x94"         => "\xc6\x86",
1644
      "\xc9\x93"         => "\xc6\x81",
1645
      "\xc9\x92"         => "\xe2\xb1\xb0",
1646
      "\xc9\x91"         => "\xe2\xb1\xad",
1647
      "\xc9\x90"         => "\xe2\xb1\xaf",
1648
      "\xc9\x8f"         => "\xc9\x8e",
1649
      "\xc9\x8d"         => "\xc9\x8c",
1650
      "\xc9\x8b"         => "\xc9\x8a",
1651
      "\xc9\x89"         => "\xc9\x88",
1652
      "\xc9\x87"         => "\xc9\x86",
1653
      "\xc9\x82"         => "\xc9\x81",
1654
      "\xc9\x80"         => "\xe2\xb1\xbf",
1655
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1656
      "\xc8\xbc"         => "\xc8\xbb",
1657
      "\xc8\xb3"         => "\xc8\xb2",
1658
      "\xc8\xb1"         => "\xc8\xb0",
1659
      "\xc8\xaf"         => "\xc8\xae",
1660
      "\xc8\xad"         => "\xc8\xac",
1661
      "\xc8\xab"         => "\xc8\xaa",
1662
      "\xc8\xa9"         => "\xc8\xa8",
1663
      "\xc8\xa7"         => "\xc8\xa6",
1664
      "\xc8\xa5"         => "\xc8\xa4",
1665
      "\xc8\xa3"         => "\xc8\xa2",
1666
      "\xc8\x9f"         => "\xc8\x9e",
1667
      "\xc8\x9d"         => "\xc8\x9c",
1668
      "\xc8\x9b"         => "\xc8\x9a",
1669
      "\xc8\x99"         => "\xc8\x98",
1670
      "\xc8\x97"         => "\xc8\x96",
1671
      "\xc8\x95"         => "\xc8\x94",
1672
      "\xc8\x93"         => "\xc8\x92",
1673
      "\xc8\x91"         => "\xc8\x90",
1674
      "\xc8\x8f"         => "\xc8\x8e",
1675
      "\xc8\x8d"         => "\xc8\x8c",
1676
      "\xc8\x8b"         => "\xc8\x8a",
1677
      "\xc8\x89"         => "\xc8\x88",
1678
      "\xc8\x87"         => "\xc8\x86",
1679
      "\xc8\x85"         => "\xc8\x84",
1680
      "\xc8\x83"         => "\xc8\x82",
1681
      "\xc8\x81"         => "\xc8\x80",
1682
      "\xc7\xbf"         => "\xc7\xbe",
1683
      "\xc7\xbd"         => "\xc7\xbc",
1684
      "\xc7\xbb"         => "\xc7\xba",
1685
      "\xc7\xb9"         => "\xc7\xb8",
1686
      "\xc7\xb5"         => "\xc7\xb4",
1687
      "\xc7\xb3"         => "\xc7\xb2",
1688
      "\xc7\xaf"         => "\xc7\xae",
1689
      "\xc7\xad"         => "\xc7\xac",
1690
      "\xc7\xab"         => "\xc7\xaa",
1691
      "\xc7\xa9"         => "\xc7\xa8",
1692
      "\xc7\xa7"         => "\xc7\xa6",
1693
      "\xc7\xa5"         => "\xc7\xa4",
1694
      "\xc7\xa3"         => "\xc7\xa2",
1695
      "\xc7\xa1"         => "\xc7\xa0",
1696
      "\xc7\x9f"         => "\xc7\x9e",
1697
      "\xc7\x9d"         => "\xc6\x8e",
1698
      "\xc7\x9c"         => "\xc7\x9b",
1699
      "\xc7\x9a"         => "\xc7\x99",
1700
      "\xc7\x98"         => "\xc7\x97",
1701
      "\xc7\x96"         => "\xc7\x95",
1702
      "\xc7\x94"         => "\xc7\x93",
1703
      "\xc7\x92"         => "\xc7\x91",
1704
      "\xc7\x90"         => "\xc7\x8f",
1705
      "\xc7\x8e"         => "\xc7\x8d",
1706
      "\xc7\x8c"         => "\xc7\x8b",
1707
      "\xc7\x89"         => "\xc7\x88",
1708
      "\xc7\x86"         => "\xc7\x85",
1709
      "\xc6\xbf"         => "\xc7\xb7",
1710
      "\xc6\xbd"         => "\xc6\xbc",
1711
      "\xc6\xb9"         => "\xc6\xb8",
1712
      "\xc6\xb6"         => "\xc6\xb5",
1713
      "\xc6\xb4"         => "\xc6\xb3",
1714
      "\xc6\xb0"         => "\xc6\xaf",
1715
      "\xc6\xad"         => "\xc6\xac",
1716
      "\xc6\xa8"         => "\xc6\xa7",
1717
      "\xc6\xa5"         => "\xc6\xa4",
1718
      "\xc6\xa3"         => "\xc6\xa2",
1719
      "\xc6\xa1"         => "\xc6\xa0",
1720
      "\xc6\x9e"         => "\xc8\xa0",
1721
      "\xc6\x9a"         => "\xc8\xbd",
1722
      "\xc6\x99"         => "\xc6\x98",
1723
      "\xc6\x95"         => "\xc7\xb6",
1724
      "\xc6\x92"         => "\xc6\x91",
1725
      "\xc6\x8c"         => "\xc6\x8b",
1726
      "\xc6\x88"         => "\xc6\x87",
1727
      "\xc6\x85"         => "\xc6\x84",
1728
      "\xc6\x83"         => "\xc6\x82",
1729
      "\xc6\x80"         => "\xc9\x83",
1730
      "\xc5\xbf"         => "\x53",
1731
      "\xc5\xbe"         => "\xc5\xbd",
1732
      "\xc5\xbc"         => "\xc5\xbb",
1733
      "\xc5\xba"         => "\xc5\xb9",
1734
      "\xc5\xb7"         => "\xc5\xb6",
1735
      "\xc5\xb5"         => "\xc5\xb4",
1736
      "\xc5\xb3"         => "\xc5\xb2",
1737
      "\xc5\xb1"         => "\xc5\xb0",
1738
      "\xc5\xaf"         => "\xc5\xae",
1739
      "\xc5\xad"         => "\xc5\xac",
1740
      "\xc5\xab"         => "\xc5\xaa",
1741
      "\xc5\xa9"         => "\xc5\xa8",
1742
      "\xc5\xa7"         => "\xc5\xa6",
1743
      "\xc5\xa5"         => "\xc5\xa4",
1744
      "\xc5\xa3"         => "\xc5\xa2",
1745
      "\xc5\xa1"         => "\xc5\xa0",
1746
      "\xc5\x9f"         => "\xc5\x9e",
1747
      "\xc5\x9d"         => "\xc5\x9c",
1748
      "\xc5\x9b"         => "\xc5\x9a",
1749
      "\xc5\x99"         => "\xc5\x98",
1750
      "\xc5\x97"         => "\xc5\x96",
1751
      "\xc5\x95"         => "\xc5\x94",
1752
      "\xc5\x93"         => "\xc5\x92",
1753
      "\xc5\x91"         => "\xc5\x90",
1754
      "\xc5\x8f"         => "\xc5\x8e",
1755
      "\xc5\x8d"         => "\xc5\x8c",
1756
      "\xc5\x8b"         => "\xc5\x8a",
1757
      "\xc5\x88"         => "\xc5\x87",
1758
      "\xc5\x86"         => "\xc5\x85",
1759
      "\xc5\x84"         => "\xc5\x83",
1760
      "\xc5\x82"         => "\xc5\x81",
1761
      "\xc5\x80"         => "\xc4\xbf",
1762
      "\xc4\xbe"         => "\xc4\xbd",
1763
      "\xc4\xbc"         => "\xc4\xbb",
1764
      "\xc4\xba"         => "\xc4\xb9",
1765
      "\xc4\xb7"         => "\xc4\xb6",
1766
      "\xc4\xb5"         => "\xc4\xb4",
1767
      "\xc4\xb3"         => "\xc4\xb2",
1768
      "\xc4\xb1"         => "\x49",
1769
      "\xc4\xaf"         => "\xc4\xae",
1770
      "\xc4\xad"         => "\xc4\xac",
1771
      "\xc4\xab"         => "\xc4\xaa",
1772
      "\xc4\xa9"         => "\xc4\xa8",
1773
      "\xc4\xa7"         => "\xc4\xa6",
1774
      "\xc4\xa5"         => "\xc4\xa4",
1775
      "\xc4\xa3"         => "\xc4\xa2",
1776
      "\xc4\xa1"         => "\xc4\xa0",
1777
      "\xc4\x9f"         => "\xc4\x9e",
1778
      "\xc4\x9d"         => "\xc4\x9c",
1779
      "\xc4\x9b"         => "\xc4\x9a",
1780
      "\xc4\x99"         => "\xc4\x98",
1781
      "\xc4\x97"         => "\xc4\x96",
1782
      "\xc4\x95"         => "\xc4\x94",
1783
      "\xc4\x93"         => "\xc4\x92",
1784
      "\xc4\x91"         => "\xc4\x90",
1785
      "\xc4\x8f"         => "\xc4\x8e",
1786
      "\xc4\x8d"         => "\xc4\x8c",
1787
      "\xc4\x8b"         => "\xc4\x8a",
1788
      "\xc4\x89"         => "\xc4\x88",
1789
      "\xc4\x87"         => "\xc4\x86",
1790
      "\xc4\x85"         => "\xc4\x84",
1791
      "\xc4\x83"         => "\xc4\x82",
1792
      "\xc4\x81"         => "\xc4\x80",
1793
      "\xc3\xbf"         => "\xc5\xb8",
1794
      "\xc3\xbe"         => "\xc3\x9e",
1795
      "\xc3\xbd"         => "\xc3\x9d",
1796
      "\xc3\xbc"         => "\xc3\x9c",
1797
      "\xc3\xbb"         => "\xc3\x9b",
1798
      "\xc3\xba"         => "\xc3\x9a",
1799
      "\xc3\xb9"         => "\xc3\x99",
1800
      "\xc3\xb8"         => "\xc3\x98",
1801
      "\xc3\xb6"         => "\xc3\x96",
1802
      "\xc3\xb5"         => "\xc3\x95",
1803
      "\xc3\xb4"         => "\xc3\x94",
1804
      "\xc3\xb3"         => "\xc3\x93",
1805
      "\xc3\xb2"         => "\xc3\x92",
1806
      "\xc3\xb1"         => "\xc3\x91",
1807
      "\xc3\xb0"         => "\xc3\x90",
1808
      "\xc3\xaf"         => "\xc3\x8f",
1809
      "\xc3\xae"         => "\xc3\x8e",
1810
      "\xc3\xad"         => "\xc3\x8d",
1811
      "\xc3\xac"         => "\xc3\x8c",
1812
      "\xc3\xab"         => "\xc3\x8b",
1813
      "\xc3\xaa"         => "\xc3\x8a",
1814
      "\xc3\xa9"         => "\xc3\x89",
1815
      "\xc3\xa8"         => "\xc3\x88",
1816
      "\xc3\xa7"         => "\xc3\x87",
1817
      "\xc3\xa6"         => "\xc3\x86",
1818
      "\xc3\xa5"         => "\xc3\x85",
1819
      "\xc3\xa4"         => "\xc3\x84",
1820
      "\xc3\xa3"         => "\xc3\x83",
1821
      "\xc3\xa2"         => "\xc3\x82",
1822
      "\xc3\xa1"         => "\xc3\x81",
1823
      "\xc3\xa0"         => "\xc3\x80",
1824
      "\xc2\xb5"         => "\xce\x9c",
1825
      "\x7a"             => "\x5a",
1826
      "\x79"             => "\x59",
1827
      "\x78"             => "\x58",
1828
      "\x77"             => "\x57",
1829
      "\x76"             => "\x56",
1830
      "\x75"             => "\x55",
1831
      "\x74"             => "\x54",
1832
      "\x73"             => "\x53",
1833
      "\x72"             => "\x52",
1834
      "\x71"             => "\x51",
1835
      "\x70"             => "\x50",
1836
      "\x6f"             => "\x4f",
1837
      "\x6e"             => "\x4e",
1838
      "\x6d"             => "\x4d",
1839
      "\x6c"             => "\x4c",
1840
      "\x6b"             => "\x4b",
1841
      "\x6a"             => "\x4a",
1842
      "\x69"             => "\x49",
1843
      "\x68"             => "\x48",
1844
      "\x67"             => "\x47",
1845
      "\x66"             => "\x46",
1846
      "\x65"             => "\x45",
1847
      "\x64"             => "\x44",
1848
      "\x63"             => "\x43",
1849
      "\x62"             => "\x42",
1850
      "\x61"             => "\x41",
1851
1852
    );
1853
1854
    return $case;
1855
  }
1856
1857
  /**
1858
   * check for UTF8-Support
1859
   */
1860
  public static function checkForSupport()
1861 157
  {
1862
    if (!isset(self::$support['mbstring'])) {
1863 157
1864
      self::$support['mbstring'] = self::mbstring_loaded();
1865 1
      self::$support['iconv'] = self::iconv_loaded();
1866 1
      self::$support['intl'] = self::intl_loaded();
1867 1
      self::$support['intlChar'] = self::intlChar_loaded();
1868 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1869 1
    }
1870 157
  }
1871
1872
  /**
1873
   * Generates a UTF-8 encoded character from the given code point.
1874
   *
1875
   * @param    int $code_point The code point for which to generate a character.
1876
   *
1877
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1878
   */
1879 8
  public static function chr($code_point)
1880
  {
1881 8
    self::checkForSupport();
1882
1883 8
    $i = (int)$code_point;
1884
1885
    if (self::$support['intlChar'] === true) {
1886
      return \IntlChar::chr($code_point);
1887
    }
1888
1889
    if ($i !== $code_point) {
1890 8
      $i = (int)self::hex_to_int($code_point);
1891
    }
1892
1893
    if (!$i) {
1894
      return '';
1895
    }
1896
1897
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1898
  }
1899
1900
  /**
1901
   * Applies callback to all characters of a string.
1902 1
   *
1903
   * @param    string $callback The callback function.
1904 1
   * @param    string $str      UTF-8 string to run callback on.
1905
   *
1906 1
   * @return   array The outcome of callback.
1907
   */
1908
1909
  public static function chr_map($callback, $str)
1910
  {
1911
    $chars = self::split($str);
1912
1913
    return array_map($callback, $chars);
1914
  }
1915
1916
  /**
1917
   * Generates an array of byte length of each character of a Unicode string.
1918
   *
1919
   * 1 byte => U+0000  - U+007F
1920
   * 2 byte => U+0080  - U+07FF
1921 2
   * 3 byte => U+0800  - U+FFFF
1922
   * 4 byte => U+10000 - U+10FFFF
1923 2
   *
1924 2
   * @param    string $str The original Unicode string.
1925
   *
1926
   * @return   array An array of byte lengths of each character.
1927 2
   */
1928
  public static function chr_size_list($str)
1929
  {
1930
    if (!$str) {
1931
      return array();
1932
    }
1933
1934
    return array_map('strlen', self::split($str));
1935
  }
1936
1937 2
  /**
1938
   * Get a decimal code representation of a specific character.
1939 2
   *
1940 2
   * @param   string $chr The input character
1941 2
   *
1942
   * @return  int
1943 2
   */
1944
  public static function chr_to_decimal($chr)
1945 2
  {
1946
    $chr = (string)$chr;
1947
    $code = self::ord($chr[0]);
1948 2
    $bytes = 1;
1949
1950 2
    if (!($code & 0x80)) {
1951 2
      // 0xxxxxxx
1952 2
      return $code;
1953
    }
1954 1
1955 1
    if (($code & 0xe0) === 0xc0) {
1956 1
      // 110xxxxx
1957
      $bytes = 2;
1958
      $code &= ~0xc0;
1959
    } elseif (($code & 0xf0) === 0xe0) {
1960
      // 1110xxxx
1961
      $bytes = 3;
1962 2
      $code &= ~0xe0;
1963
    } elseif (($code & 0xf8) === 0xf0) {
1964 2
      // 11110xxx
1965 2
      $bytes = 4;
1966
      $code &= ~0xf0;
1967 2
    }
1968
1969
    for ($i = 2; $i <= $bytes; $i++) {
1970
      // 10xxxxxx
1971
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1972
    }
1973
1974
    return $code;
1975
  }
1976
1977
  /**
1978
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1979
   *
1980
   * @param    string $chr The input character
1981
   * @param    string $pfix
1982
   *
1983
   * @return   string The code point encoded as U+xxxx
1984
   */
1985
  public static function chr_to_hex($chr, $pfix = 'U+')
1986
  {
1987
    return self::int_to_hex(self::ord($chr), $pfix);
1988
  }
1989
1990
  /**
1991
   * Splits a string into smaller chunks and multiple lines, using the specified
1992
   * line ending character.
1993 1
   *
1994
   * @param    string $body     The original string to be split.
1995 1
   * @param    int    $chunklen The maximum character length of a chunk.
1996
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1997
   *
1998
   * @return   string The chunked string
1999
   */
2000
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2001
  {
2002
    return implode($end, self::split($body, $chunklen));
2003
  }
2004
2005
  /**
2006
   * accepts a string and removes all non-UTF-8 characters from it.
2007
   *
2008
   * @param string $str                     The string to be sanitized.
2009 35
   * @param bool   $remove_bom
2010
   * @param bool   $normalize_whitespace
2011
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2012
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2013
   *
2014
   * @return string Clean UTF-8 encoded string
2015
   */
2016
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2017
  {
2018
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2019
    // caused connection reset problem on larger strings
2020
2021
    $regx = '/
2022
      (
2023
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2024 35
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2025 35
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2026
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2027 35
        ){1,100}                      # ...one or more times
2028 35
      )
2029
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2030 35
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2031 7
    /x';
2032 7
    $str = preg_replace($regx, '$1', $str);
2033
2034 35
    $str = self::replace_diamond_question_mark($str, '');
2035 1
    $str = self::remove_invisible_characters($str);
2036 1
2037
    if ($normalize_whitespace === true) {
2038 35
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2039 4
    }
2040 4
2041
    if ($normalize_msword === true) {
2042 35
      $str = self::normalize_msword($str);
2043
    }
2044
2045
    if ($remove_bom === true) {
2046
      $str = self::removeBOM($str);
2047
    }
2048
2049
    return $str;
2050
  }
2051
2052 3
  /**
2053
   * Clean-up a and show only printable UTF-8 chars at the end.
2054 3
   *
2055
   * @param string $str
2056 3
   *
2057 1
   * @return string
2058
   */
2059
  public static function cleanup($str)
2060
  {
2061 3
    $str = (string)$str;
2062
2063
    if (!isset($str[0])) {
2064
      return '';
2065
    }
2066
2067
    // fixed ISO <-> UTF-8 Errors
2068 3
    $str = self::fix_simple_utf8($str);
2069
2070 3
    // remove all none UTF-8 symbols
2071
    // && remove diamond question mark (�)
2072
    // && remove remove invisible characters (e.g. "\0")
2073
    // && remove BOM
2074
    // && normalize whitespace chars (but keep non-breaking-spaces)
2075
    $str = self::clean($str, true, true, false, true);
2076
2077
    return (string)$str;
2078
  }
2079
2080
  /**
2081
   * Accepts a string and returns an array of Unicode code points.
2082 3
   *
2083
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
2084 3
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
2085 3
   *                          default, code points will be returned as integers.
2086 3
   *
2087
   * @return   array The array of code points
2088 3
   */
2089
  public static function codepoints($arg, $u_style = false)
2090 3
  {
2091 3
    if (is_string($arg)) {
2092 3
      $arg = self::split($arg);
2093
    }
2094 3
2095
    $arg = array_map(
2096 3
        array(
2097
            '\\voku\\helper\\UTF8',
2098
            'ord',
2099
        ),
2100
        $arg
2101
    );
2102
2103
    if ($u_style) {
2104
      $arg = array_map(
2105
          array(
2106 3
              '\\voku\\helper\\UTF8',
2107
              'int_to_hex',
2108
          ),
2109
          $arg
2110
      );
2111
    }
2112
2113
    return $arg;
2114
  }
2115
2116
  /**
2117 3
   * Returns count of characters used in a string.
2118
   *
2119 3
   * @param    string $str The input string.
2120
   *
2121 3
   * @return   array An associative array of Character as keys and
2122
   *           their count as values.
2123 3
   */
2124
  public static function count_chars($str) // there is no $mode parameters
2125
  {
2126
    $array = array_count_values(self::split($str));
2127
2128
    ksort($array);
2129
2130
    return $array;
2131
  }
2132
2133 1
  /**
2134
   * Get a UTF-8 character from its decimal code representation.
2135 1
   *
2136
   * @param   int $code Code.
2137 1
   *
2138 1
   * @return  string
2139 1
   */
2140
  public static function decimal_to_chr($code)
2141 1
  {
2142
    self::checkForSupport();
2143
2144
    return \mb_convert_encoding(
2145
        '&#x' . dechex($code) . ';',
2146
        'UTF-8',
2147
        'HTML-ENTITIES'
2148
    );
2149
  }
2150
2151
  /**
2152
   * encode a string
2153
   *
2154
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2155 11
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2156
   *
2157 11
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2158
   * @param string $str      the string
2159 11
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2160 11
   *                         otherwise we auto-detect the current string-encoding
2161
   *
2162
   * @return string
2163 1
   */
2164 1
  public static function encode($encoding, $str, $force = true)
2165
  {
2166
    $str = (string)$str;
2167
    $encoding = (string)$encoding;
2168
2169
    if (!isset($str[0], $encoding[0])) {
2170
      return $str;
2171
    }
2172
2173
    $encoding = self::normalizeEncoding($encoding);
2174
    $encodingDetected = self::str_detect_encoding($str);
2175
2176
    if (
2177
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2178
        &&
2179
        (
2180
            $force === true
2181
            ||
2182
            $encodingDetected !== $encoding
2183
        )
2184
    ) {
2185
      self::checkForSupport();
2186
2187 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2188
          $encoding === 'UTF-8'
2189
          &&
2190
          (
2191
              $force === true
2192
              || $encodingDetected === 'UTF-8'
2193
              || $encodingDetected === 'WINDOWS-1252'
2194
              || $encodingDetected === 'ISO-8859-1'
2195
          )
2196
      ) {
2197
        return self::to_utf8($str);
2198
      }
2199
2200 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2201
          $encoding === 'ISO-8859-1'
2202
          &&
2203
          (
2204
              $force === true
2205
              || $encodingDetected === 'ISO-8859-1'
2206
              || $encodingDetected === 'UTF-8'
2207
          )
2208
      ) {
2209
        return self::to_win1252($str);
2210
      }
2211
2212
      $strEncoded = \mb_convert_encoding(
2213
          $str,
2214
          $encoding,
2215
          $encodingDetected
2216
      );
2217
2218
      if ($strEncoded) {
2219
        return $strEncoded;
2220
      }
2221
    }
2222
2223
    return $str;
2224
  }
2225
2226
  /**
2227
   * Callback function for preg_replace_callback use.
2228
   *
2229
   * @param  array $matches PREG matches
2230
   *
2231
   * @return string
2232
   */
2233
  protected static function entityCallback($matches)
2234
  {
2235
    self::checkForSupport();
2236
2237
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2238
2239
    if ($return === "'") {
2240
      return '&#x27;';
2241
    }
2242
2243
    return $return;
2244
  }
2245
2246
  /**
2247
   * Reads entire file into a string.
2248
   *
2249
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2250
   *
2251
   * @link http://php.net/manual/en/function.file-get-contents.php
2252 2
   *
2253
   * @param string   $filename      <p>
2254
   *                                Name of the file to read.
2255 2
   *                                </p>
2256 2
   * @param int      $flags         [optional] <p>
2257
   *                                Prior to PHP 6, this parameter is called
2258 2
   *                                use_include_path and is a bool.
2259 2
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2260
   *                                to trigger include path
2261
   *                                search.
2262
   *                                </p>
2263 2
   *                                <p>
2264 2
   *                                The value of flags can be any combination of
2265
   *                                the following flags (with some restrictions), joined with the
2266 2
   *                                binary OR (|)
2267 2
   *                                operator.
2268
   *                                </p>
2269 2
   *                                <p>
2270 1
   *                                <table>
2271 1
   *                                Available flags
2272 2
   *                                <tr valign="top">
2273
   *                                <td>Flag</td>
2274
   *                                <td>Description</td>
2275
   *                                </tr>
2276 2
   *                                <tr valign="top">
2277
   *                                <td>
2278
   *                                FILE_USE_INCLUDE_PATH
2279
   *                                </td>
2280 2
   *                                <td>
2281 2
   *                                Search for filename in the include directory.
2282
   *                                See include_path for more
2283 2
   *                                information.
2284
   *                                </td>
2285 2
   *                                </tr>
2286 1
   *                                <tr valign="top">
2287 1
   *                                <td>
2288 1
   *                                FILE_TEXT
2289 1
   *                                </td>
2290 1
   *                                <td>
2291 1
   *                                As of PHP 6, the default encoding of the read
2292
   *                                data is UTF-8. You can specify a different encoding by creating a
2293 2
   *                                custom context or by changing the default using
2294 2
   *                                stream_default_encoding. This flag cannot be
2295 2
   *                                used with FILE_BINARY.
2296 2
   *                                </td>
2297
   *                                </tr>
2298
   *                                <tr valign="top">
2299 2
   *                                <td>
2300
   *                                FILE_BINARY
2301
   *                                </td>
2302
   *                                <td>
2303
   *                                With this flag, the file is read in binary mode. This is the default
2304
   *                                setting and cannot be used with FILE_TEXT.
2305
   *                                </td>
2306
   *                                </tr>
2307
   *                                </table>
2308
   *                                </p>
2309 1
   * @param resource $context       [optional] <p>
2310
   *                                A valid context resource created with
2311 1
   *                                stream_context_create. If you don't need to use a
2312
   *                                custom context, you can skip this parameter by &null;.
2313
   *                                </p>
2314
   * @param int      $offset        [optional] <p>
2315
   *                                The offset where the reading starts.
2316
   *                                </p>
2317
   * @param int      $maxlen        [optional] <p>
2318
   *                                Maximum length of data read. The default is to read until end
2319
   *                                of file is reached.
2320
   *                                </p>
2321
   * @param int      $timeout
2322
   *
2323 7
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2324
   *                                default utf-8 chars
2325 7
   *
2326 7
   * @return string The function returns the read data or false on failure.
2327 2
   */
2328
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2329 1
  {
2330 2
    // init
2331 2
    $timeout = (int)$timeout;
2332 7
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2333 1
2334 1
    if ($timeout && $context === null) {
2335 1
      $context = stream_context_create(
2336 1
          array(
2337 7
              'http' =>
2338 7
                  array(
2339
                      'timeout' => $timeout,
2340
                  ),
2341
          )
2342 7
      );
2343 7
    }
2344 1
2345 1
    if (is_int($maxlen)) {
2346 7
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2347
    } else {
2348 7
      $data = file_get_contents($filename, $flags, $context, $offset);
2349 5
    }
2350 5
2351 4
    // return false on error
2352
    if ($data === false) {
2353
      return false;
2354
    }
2355 7
2356
    if ($convertToUtf8 === true) {
2357
      self::checkForSupport();
2358
2359
      $data = self::encode('UTF-8', $data, false);
2360 7
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2361 7
    }
2362 7
2363
    // clean utf-8 string
2364 7
    return $data;
2365
  }
2366
2367
  /**
2368
   * Checks if a file starts with BOM character.
2369
   *
2370
   * @param    string $file_path Path to a valid file.
2371
   *
2372
   * @return   bool True if the file has BOM at the start, False otherwise.
2373
   */
2374
  public static function file_has_bom($file_path)
2375
  {
2376
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
2377
  }
2378
2379
  /**
2380
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2381
   *
2382
   * @param mixed  $var
2383
   * @param int    $normalization_form
2384
   * @param string $leading_combining
2385
   *
2386
   * @return mixed
2387
   */
2388
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2389
  {
2390
    switch (gettype($var)) {
2391 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2392
        foreach ($var as $k => $v) {
2393
          /** @noinspection AlterInForeachInspection */
2394
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2395
        }
2396
        break;
2397 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2398
        foreach ($var as $k => $v) {
2399
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2400
        }
2401
        break;
2402
      case 'string':
2403 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2404
          // Workaround https://bugs.php.net/65732
2405
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2406
        }
2407 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2408
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2409
            $n = '-';
2410
          } else {
2411
            $n = \Normalizer::normalize($var, $normalization_form);
2412
2413
            if (isset($n[0])) {
2414
              $var = $n;
2415
            } else {
2416
              $var = self::encode('UTF-8', $var);
2417 1
            }
2418
2419 1
          }
2420 1
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2421 1
            // Prevent leading combining chars
2422 1
            // for NFC-safe concatenations.
2423
            $var = $leading_combining . $var;
2424
          }
2425 1
        }
2426
        break;
2427
    }
2428
2429
    return $var;
2430
  }
2431
2432
  /**
2433
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2434
   *
2435
   * @param int    $type
2436
   * @param string $var
2437 1
   * @param int    $filter
2438
   * @param mixed  $option
2439 1
   *
2440 1
   * @return mixed
2441 1
   */
2442 1 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2443
  {
2444
    if (4 > func_num_args()) {
2445 1
      $var = filter_input($type, $var, $filter);
2446
    } else {
2447
      $var = filter_input($type, $var, $filter, $option);
2448
    }
2449
2450
    return self::filter($var);
2451
  }
2452
2453
  /**
2454
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2455
   *
2456
   * @param int   $type
2457 1
   * @param mixed $definition
2458
   * @param bool  $add_empty
2459 1
   *
2460
   * @return mixed
2461
   */
2462 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2463
  {
2464
    if (2 > func_num_args()) {
2465
      $a = filter_input_array($type);
2466
    } else {
2467
      $a = filter_input_array($type, $definition, $add_empty);
2468
    }
2469 8
2470
    return self::filter($a);
2471 8
  }
2472 8
2473
  /**
2474 8
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2475
   *
2476 8
   * @param mixed $var
2477 2
   * @param int   $filter
2478
   * @param mixed $option
2479
   *
2480 8
   * @return mixed
2481 1
   */
2482 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2483 1
  {
2484
    if (3 > func_num_args()) {
2485 8
      $var = filter_var($var, $filter);
2486
    } else {
2487
      $var = filter_var($var, $filter, $option);
2488
    }
2489
2490
    return self::filter($var);
2491
  }
2492
2493
  /**
2494
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2495 1
   *
2496
   * @param array $data
2497 1
   * @param mixed $definition
2498
   * @param bool  $add_empty
2499
   *
2500
   * @return mixed
2501
   */
2502 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2503
  {
2504
    if (2 > func_num_args()) {
2505
      $a = filter_var_array($data);
2506
    } else {
2507 1
      $a = filter_var_array($data, $definition, $add_empty);
2508 1
    }
2509 1
2510 1
    return self::filter($a);
2511 1
  }
2512
2513 1
  /**
2514
   * Checks if the number of Unicode characters in a string are not
2515
   * more than the specified integer.
2516
   *
2517
   * @param    string $str      The original string to be checked.
2518
   * @param    int    $box_size The size in number of chars to be checked against string.
2519
   *
2520
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2521
   */
2522
  public static function fits_inside($str, $box_size)
2523 1
  {
2524
    return (self::strlen($str) <= $box_size);
2525 1
  }
2526
2527 1
  /**
2528 1
   * Fixing a broken UTF-8 string.
2529
   *
2530
   * @param string $str
2531 1
   *
2532
   * @return string
2533 1
   */
2534 1
  public static function fix_simple_utf8($str)
2535 1
  {
2536 1
    static $brokenUtf8ToUtf8Keys = null;
2537 1
    static $brokenUtf8ToUtf8Values = null;
2538 1
2539 1
    $str = (string)$str;
2540 1
2541 1
    if (!isset($str[0])) {
2542 1
      return '';
2543 1
    }
2544
2545
    if ($brokenUtf8ToUtf8Keys === null) {
2546
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2547
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2548
    }
2549
2550
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2551
  }
2552
2553
  /**
2554
   * Fix a double (or multiple) encoded UTF8 string.
2555
   *
2556
   * @param array|string $str
2557
   *
2558
   * @return string
2559
   */
2560
  public static function fix_utf8($str)
2561
  {
2562
    if (is_array($str)) {
2563 1
2564 1
      foreach ($str as $k => $v) {
2565
        /** @noinspection AlterInForeachInspection */
2566
        $str[$k] = self::fix_utf8($v);
2567
      }
2568
2569
      return $str;
2570
    }
2571
2572
    $last = '';
2573
    while ($last !== $str) {
2574
      $last = $str;
2575
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2575 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2576
    }
2577
2578
    return $str;
2579
  }
2580
2581
  /**
2582
   * Get character of a specific character.
2583
   *
2584
   * @param   string $char Character.
2585
   *
2586
   * @return  string 'RTL' or 'LTR'
2587
   */
2588
  public static function getCharDirection($char)
2589
  {
2590
    // init
2591
    self::checkForSupport();
2592
2593
    if (self::$support['intlChar'] === true) {
2594
      $tmpReturn = \IntlChar::charDirection($char);
2595
2596
      // from "IntlChar"-Class
2597
      $charDirection = array(
2598
          'RTL' => array(1, 13, 14, 15, 21),
2599
          'LTR' => array(0, 11, 12, 20),
2600
      );
2601
2602
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2603
        return 'LTR';
2604
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2605
        return 'RTL';
2606
      }
2607
    }
2608
2609
    $c = static::chr_to_decimal($char);
2610
2611
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2612
      return 'LTR';
2613
    }
2614
2615
    if (0x85e >= $c) {
2616
2617
      if (0x5be === $c ||
2618
          0x5c0 === $c ||
2619
          0x5c3 === $c ||
2620
          0x5c6 === $c ||
2621
          (0x5d0 <= $c && 0x5ea >= $c) ||
2622
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2623 2
          0x608 === $c ||
2624
          0x60b === $c ||
2625 2
          0x60d === $c ||
2626 2
          0x61b === $c ||
2627 2
          (0x61e <= $c && 0x64a >= $c) ||
2628
          (0x66d <= $c && 0x66f >= $c) ||
2629
          (0x671 <= $c && 0x6d5 >= $c) ||
2630
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2631
          (0x6ee <= $c && 0x6ef >= $c) ||
2632
          (0x6fa <= $c && 0x70d >= $c) ||
2633
          0x710 === $c ||
2634
          (0x712 <= $c && 0x72f >= $c) ||
2635
          (0x74d <= $c && 0x7a5 >= $c) ||
2636
          0x7b1 === $c ||
2637
          (0x7c0 <= $c && 0x7ea >= $c) ||
2638
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2639
          0x7fa === $c ||
2640 1
          (0x800 <= $c && 0x815 >= $c) ||
2641
          0x81a === $c ||
2642 1
          0x824 === $c ||
2643 1
          0x828 === $c ||
2644
          (0x830 <= $c && 0x83e >= $c) ||
2645 1
          (0x840 <= $c && 0x858 >= $c) ||
2646 1
          0x85e === $c
2647
      ) {
2648
        return 'RTL';
2649
      }
2650 1
2651
    } elseif (0x200f === $c) {
2652 1
2653 1
      return 'RTL';
2654 1
2655
    } elseif (0xfb1d <= $c) {
2656 1
2657 1
      if (0xfb1d === $c ||
2658 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2659 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2660 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2661
          0xfb3e === $c ||
2662 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2663
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2664 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2665 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2666
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2667
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2668
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2669 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2670 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
2671
          (0x10800 <= $c && 0x10805 >= $c) ||
2672 1
          0x10808 === $c ||
2673
          (0x1080a <= $c && 0x10835 >= $c) ||
2674 1
          (0x10837 <= $c && 0x10838 >= $c) ||
2675 1
          0x1083c === $c ||
2676 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2677
          (0x10857 <= $c && 0x1085f >= $c) ||
2678 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2679
          (0x10920 <= $c && 0x10939 >= $c) ||
2680
          0x1093f === $c ||
2681
          0x10a00 === $c ||
2682
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2683
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2684
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2685
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2686
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2687
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2688
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2689
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2690
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2691
          (0x10b78 <= $c && 0x10b7f >= $c)
2692
      ) {
2693
        return 'RTL';
2694
      }
2695
    }
2696
2697
    return 'LTR';
2698
  }
2699
2700
  /**
2701
   * get data from "/data/*.ser"
2702
   *
2703
   * @param string $file
2704
   *
2705
   * @return bool|string|array|int false on error
2706
   */
2707
  protected static function getData($file)
2708 1
  {
2709
    $file = __DIR__ . '/data/' . $file . '.php';
2710 1
    if (file_exists($file)) {
2711 1
      /** @noinspection PhpIncludeInspection */
2712
      return require $file;
2713 1
    } else {
2714 1
      return false;
2715 1
    }
2716 1
  }
2717 1
2718 1
  /**
2719
   * Creates a random string of UTF-8 characters.
2720
   *
2721
   * @param    int $len The length of string in characters.
2722
   *
2723
   * @return   string String consisting of random characters.
2724
   */
2725
  public static function hash($len = 8)
2726
  {
2727
    static $chars = array();
2728
    static $chars_len = null;
2729
2730
    if ($len <= 0) {
2731
      return '';
2732
    }
2733
2734
    // init
2735
    self::checkForSupport();
2736
2737
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2738
      if (self::$support['pcre_utf8'] === true) {
2739
        $chars = array_map(
2740
            array(
2741
                '\\voku\\helper\\UTF8',
2742
                'chr',
2743
            ),
2744
            range(48, 79)
2745
        );
2746
2747
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2748
2749
        $chars = array_values(array_filter($chars));
2750
      } else {
2751
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2752
      }
2753
2754
      $chars_len = count($chars);
2755
    }
2756
2757
    $hash = '';
2758
2759
    for (; $len; --$len) {
2760
      $hash .= $chars[mt_rand() % $chars_len];
2761
    }
2762
2763
    return $hash;
2764
  }
2765
2766
  /**
2767
   * Converts hexadecimal U+xxxx code point representation to Integer.
2768
   *
2769
   * INFO: opposite to UTF8::int_to_hex( )
2770
   *
2771
   * @param    string $str The hexadecimal code point representation.
2772
   *
2773
   * @return   int The code point, or 0 on failure.
2774
   */
2775
  public static function hex_to_int($str)
2776
  {
2777
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2778
      return intval($match[1], 16);
2779
    }
2780
2781
    return 0;
2782
  }
2783
2784
  /**
2785
   * Converts a UTF-8 string to a series of HTML numbered entities.
2786
   *
2787
   * e.g.: &#123;&#39;&#1740;
2788
   *
2789
   * @param  string $str The Unicode string to be encoded as numbered entities.
2790 15
   * @param    bool   $keepAsciiChars Keep ASCII chars.
2791
   *
2792 15
   * @return string HTML numbered entities.
2793
   */
2794 15
  public static function html_encode($str, $keepAsciiChars = false)
2795 3
  {
2796
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2797
    if (function_exists('mb_encode_numericentity')) {
2798 15
2799 4
      $startCode = 0x00;
2800
      if ($keepAsciiChars === true) {
2801
        $startCode = 0x80;
2802 15
      }
2803 3
2804 3
      return mb_encode_numericentity(
2805 3
          $str,
2806
          array($startCode, 0xffff, 0, 0xffff,),
2807
          self::str_detect_encoding($str)
2808 3
      );
2809
    }
2810
2811 15
    return implode(
2812
        array_map(
2813 15
            function($data) use ($keepAsciiChars) { return self::single_chr_html_encode($data, $keepAsciiChars); },
2814
            self::split($str)
2815
        )
2816 15
    );
2817 15
  }
2818 15
2819
  /**
2820 15
   * UTF-8 version of html_entity_decode()
2821
   *
2822 15
   * The reason we are not using html_entity_decode() by itself is because
2823
   * while it is not technically correct to leave out the semicolon
2824 15
   * at the end of an entity most browsers will still interpret the entity
2825
   * correctly. html_entity_decode() does not convert entities without
2826
   * semicolons, so we are left with our own little solution here. Bummer.
2827
   *
2828
   * Convert all HTML entities to their applicable characters
2829
   *
2830
   * @link http://php.net/manual/en/function.html-entity-decode.php
2831
   *
2832
   * @param string $str      <p>
2833
   *                         The input string.
2834 12
   *                         </p>
2835
   * @param int    $flags    [optional] <p>
2836 12
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2837
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2838 12
   *                         <table>
2839
   *                         Available <i>flags</i> constants
2840 12
   *                         <tr valign="top">
2841 5
   *                         <td>Constant Name</td>
2842
   *                         <td>Description</td>
2843
   *                         </tr>
2844 11
   *                         <tr valign="top">
2845
   *                         <td><b>ENT_COMPAT</b></td>
2846
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2847
   *                         </tr>
2848
   *                         <tr valign="top">
2849
   *                         <td><b>ENT_QUOTES</b></td>
2850
   *                         <td>Will convert both double and single quotes.</td>
2851
   *                         </tr>
2852
   *                         <tr valign="top">
2853
   *                         <td><b>ENT_NOQUOTES</b></td>
2854
   *                         <td>Will leave both double and single quotes unconverted.</td>
2855
   *                         </tr>
2856
   *                         <tr valign="top">
2857
   *                         <td><b>ENT_HTML401</b></td>
2858
   *                         <td>
2859
   *                         Handle code as HTML 4.01.
2860
   *                         </td>
2861
   *                         </tr>
2862
   *                         <tr valign="top">
2863
   *                         <td><b>ENT_XML1</b></td>
2864
   *                         <td>
2865
   *                         Handle code as XML 1.
2866
   *                         </td>
2867
   *                         </tr>
2868
   *                         <tr valign="top">
2869
   *                         <td><b>ENT_XHTML</b></td>
2870
   *                         <td>
2871
   *                         Handle code as XHTML.
2872
   *                         </td>
2873
   *                         </tr>
2874
   *                         <tr valign="top">
2875
   *                         <td><b>ENT_HTML5</b></td>
2876
   *                         <td>
2877
   *                         Handle code as HTML 5.
2878
   *                         </td>
2879
   *                         </tr>
2880
   *                         </table>
2881
   *                         </p>
2882
   * @param string $encoding [optional] <p>
2883
   *                         Encoding to use.
2884
   *                         </p>
2885
   *
2886
   * @return string the decoded string.
2887
   */
2888
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2889
  {
2890
    $str = (string)$str;
2891
2892
    if (!isset($str[0])) {
2893
      return '';
2894
    }
2895
2896
    if (strpos($str, '&') === false) {
2897
      return $str;
2898
    }
2899
2900
    if ($flags === null) {
2901
      if (Bootup::is_php('5.4') === true) {
2902
        $flags = ENT_COMPAT | ENT_HTML5;
2903
      } else {
2904
        $flags = ENT_COMPAT;
2905
      }
2906
    }
2907
2908
    do {
2909
      $str_compare = $str;
2910
2911
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2912
2913
      // decode numeric & UTF16 two byte entities
2914
      $str = html_entity_decode(
2915
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2916
          $flags,
2917
          $encoding
2918
      );
2919
2920
    } while ($str_compare !== $str);
2921
2922
    return $str;
2923
  }
2924
2925
  /**
2926
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2927
   *
2928
   * @link http://php.net/manual/en/function.htmlentities.php
2929
   *
2930
   * @param string $str           <p>
2931
   *                              The input string.
2932
   *                              </p>
2933
   * @param int    $flags         [optional] <p>
2934
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2935
   *                              invalid code unit sequences and the used document type. The default is
2936
   *                              ENT_COMPAT | ENT_HTML401.
2937
   *                              <table>
2938
   *                              Available <i>flags</i> constants
2939
   *                              <tr valign="top">
2940
   *                              <td>Constant Name</td>
2941
   *                              <td>Description</td>
2942
   *                              </tr>
2943
   *                              <tr valign="top">
2944
   *                              <td><b>ENT_COMPAT</b></td>
2945
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2946
   *                              </tr>
2947
   *                              <tr valign="top">
2948
   *                              <td><b>ENT_QUOTES</b></td>
2949
   *                              <td>Will convert both double and single quotes.</td>
2950 2
   *                              </tr>
2951
   *                              <tr valign="top">
2952 2
   *                              <td><b>ENT_NOQUOTES</b></td>
2953
   *                              <td>Will leave both double and single quotes unconverted.</td>
2954
   *                              </tr>
2955
   *                              <tr valign="top">
2956
   *                              <td><b>ENT_IGNORE</b></td>
2957
   *                              <td>
2958
   *                              Silently discard invalid code unit sequences instead of returning
2959
   *                              an empty string. Using this flag is discouraged as it
2960
   *                              may have security implications.
2961
   *                              </td>
2962
   *                              </tr>
2963
   *                              <tr valign="top">
2964
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2965
   *                              <td>
2966
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2967
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2968
   *                              </td>
2969
   *                              </tr>
2970
   *                              <tr valign="top">
2971
   *                              <td><b>ENT_DISALLOWED</b></td>
2972
   *                              <td>
2973
   *                              Replace invalid code points for the given document type with a
2974
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2975
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2976
   *                              instance, to ensure the well-formedness of XML documents with
2977
   *                              embedded external content.
2978
   *                              </td>
2979
   *                              </tr>
2980
   *                              <tr valign="top">
2981
   *                              <td><b>ENT_HTML401</b></td>
2982
   *                              <td>
2983
   *                              Handle code as HTML 4.01.
2984
   *                              </td>
2985
   *                              </tr>
2986
   *                              <tr valign="top">
2987
   *                              <td><b>ENT_XML1</b></td>
2988
   *                              <td>
2989
   *                              Handle code as XML 1.
2990
   *                              </td>
2991
   *                              </tr>
2992
   *                              <tr valign="top">
2993
   *                              <td><b>ENT_XHTML</b></td>
2994
   *                              <td>
2995
   *                              Handle code as XHTML.
2996
   *                              </td>
2997
   *                              </tr>
2998
   *                              <tr valign="top">
2999
   *                              <td><b>ENT_HTML5</b></td>
3000
   *                              <td>
3001
   *                              Handle code as HTML 5.
3002
   *                              </td>
3003
   *                              </tr>
3004
   *                              </table>
3005
   *                              </p>
3006
   * @param string $encoding      [optional] <p>
3007
   *                              Like <b>htmlspecialchars</b>,
3008
   *                              <b>htmlentities</b> takes an optional third argument
3009
   *                              <i>encoding</i> which defines encoding used in
3010
   *                              conversion.
3011
   *                              Although this argument is technically optional, you are highly
3012
   *                              encouraged to specify the correct value for your code.
3013
   *                              </p>
3014
   * @param bool   $double_encode [optional] <p>
3015
   *                              When <i>double_encode</i> is turned off PHP will not
3016
   *                              encode existing html entities. The default is to convert everything.
3017
   *                              </p>
3018
   *
3019
   *
3020
   * @return string the encoded string.
3021
   * </p>
3022
   * <p>
3023
   * If the input <i>string</i> contains an invalid code unit
3024
   * sequence within the given <i>encoding</i> an empty string
3025
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3026
   * <b>ENT_SUBSTITUTE</b> flags are set.
3027
   */
3028
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3029
  {
3030
    return htmlentities($str, $flags, $encoding, $double_encode);
3031
  }
3032
3033
  /**
3034
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
3035
   *
3036
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3037
   *
3038
   * @param string $str           <p>
3039
   *                              The string being converted.
3040
   *                              </p>
3041
   * @param int    $flags         [optional] <p>
3042
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3043
   *                              invalid code unit sequences and the used document type. The default is
3044
   *                              ENT_COMPAT | ENT_HTML401.
3045
   *                              <table>
3046
   *                              Available <i>flags</i> constants
3047
   *                              <tr valign="top">
3048
   *                              <td>Constant Name</td>
3049
   *                              <td>Description</td>
3050
   *                              </tr>
3051
   *                              <tr valign="top">
3052
   *                              <td><b>ENT_COMPAT</b></td>
3053
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3054
   *                              </tr>
3055
   *                              <tr valign="top">
3056
   *                              <td><b>ENT_QUOTES</b></td>
3057
   *                              <td>Will convert both double and single quotes.</td>
3058
   *                              </tr>
3059
   *                              <tr valign="top">
3060
   *                              <td><b>ENT_NOQUOTES</b></td>
3061
   *                              <td>Will leave both double and single quotes unconverted.</td>
3062 1
   *                              </tr>
3063
   *                              <tr valign="top">
3064 1
   *                              <td><b>ENT_IGNORE</b></td>
3065
   *                              <td>
3066
   *                              Silently discard invalid code unit sequences instead of returning
3067
   *                              an empty string. Using this flag is discouraged as it
3068
   *                              may have security implications.
3069
   *                              </td>
3070
   *                              </tr>
3071
   *                              <tr valign="top">
3072 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3073
   *                              <td>
3074 1
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3075
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3076
   *                              </td>
3077
   *                              </tr>
3078
   *                              <tr valign="top">
3079
   *                              <td><b>ENT_DISALLOWED</b></td>
3080
   *                              <td>
3081
   *                              Replace invalid code points for the given document type with a
3082
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3083
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3084
   *                              instance, to ensure the well-formedness of XML documents with
3085
   *                              embedded external content.
3086
   *                              </td>
3087
   *                              </tr>
3088
   *                              <tr valign="top">
3089
   *                              <td><b>ENT_HTML401</b></td>
3090
   *                              <td>
3091
   *                              Handle code as HTML 4.01.
3092
   *                              </td>
3093
   *                              </tr>
3094
   *                              <tr valign="top">
3095
   *                              <td><b>ENT_XML1</b></td>
3096
   *                              <td>
3097
   *                              Handle code as XML 1.
3098
   *                              </td>
3099
   *                              </tr>
3100
   *                              <tr valign="top">
3101
   *                              <td><b>ENT_XHTML</b></td>
3102
   *                              <td>
3103 1
   *                              Handle code as XHTML.
3104
   *                              </td>
3105 1
   *                              </tr>
3106
   *                              <tr valign="top">
3107
   *                              <td><b>ENT_HTML5</b></td>
3108
   *                              <td>
3109
   *                              Handle code as HTML 5.
3110
   *                              </td>
3111
   *                              </tr>
3112
   *                              </table>
3113
   *                              </p>
3114
   * @param string $encoding      [optional] <p>
3115 1
   *                              Defines encoding used in conversion.
3116
   *                              </p>
3117 1
   *                              <p>
3118
   *                              For the purposes of this function, the encodings
3119
   *                              ISO-8859-1, ISO-8859-15,
3120
   *                              UTF-8, cp866,
3121
   *                              cp1251, cp1252, and
3122
   *                              KOI8-R are effectively equivalent, provided the
3123
   *                              <i>string</i> itself is valid for the encoding, as
3124
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3125
   *                              the same positions in all of these encodings.
3126
   *                              </p>
3127 1
   * @param bool   $double_encode [optional] <p>
3128
   *                              When <i>double_encode</i> is turned off PHP will not
3129 1
   *                              encode existing html entities, the default is to convert everything.
3130
   *                              </p>
3131
   *
3132
   * @return string The converted string.
3133
   * </p>
3134
   * <p>
3135
   * If the input <i>string</i> contains an invalid code unit
3136
   * sequence within the given <i>encoding</i> an empty string
3137
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3138
   * <b>ENT_SUBSTITUTE</b> flags are set.
3139
   */
3140
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3141
  {
3142
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3143
  }
3144
3145
  /**
3146
   * checks whether iconv is available on the server
3147
   *
3148
   * @return   bool True if available, False otherwise
3149
   */
3150
  public static function iconv_loaded()
3151
  {
3152
    return extension_loaded('iconv') ? true : false;
3153
  }
3154
3155
  /**
3156
   * Converts Integer to hexadecimal U+xxxx code point representation.
3157
   *
3158
   * @param    int    $int The integer to be converted to hexadecimal code point.
3159
   * @param    string $pfix
3160
   *
3161
   * @return   string The code point, or empty string on failure.
3162
   */
3163
  public static function int_to_hex($int, $pfix = 'U+')
3164
  {
3165
    if (ctype_digit((string)$int)) {
3166
      $hex = dechex((int)$int);
3167
3168
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3169
3170
      return $pfix . $hex;
3171
    }
3172
3173
    return '';
3174
  }
3175
3176
  /**
3177
   * checks whether intl is available on the server
3178
   *
3179 16
   * @return   bool True if available, False otherwise
3180
   */
3181 16
  public static function intl_loaded()
3182
  {
3183
    return extension_loaded('intl') ? true : false;
3184
  }
3185
3186
  /**
3187
   * checks whether intl-char is available on the server
3188
   *
3189
   * @return   bool True if available, False otherwise
3190
   */
3191
  public static function intlChar_loaded()
3192 4
  {
3193
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3194 4
  }
3195
3196
  /**
3197
   * alias for "UTF8::is_ascii()"
3198
   *
3199
   * @param string $str
3200
   *
3201
   * @return boolean
3202
   */
3203
  public static function isAscii($str)
3204 1
  {
3205
    return self::is_ascii($str);
3206 1
  }
3207
3208 1
  /**
3209 1
   * alias for "UTF8::is_base64"
3210
   *
3211
   * @param string $str
3212 1
   *
3213 1
   * @return bool
3214
   */
3215 1
  public static function isBase64($str)
3216
  {
3217
    return self::is_base64($str);
3218
  }
3219
3220
  /**
3221
   * alias for "UTF8::is_bom"
3222
   *
3223
   * @param string $utf8_chr
3224
   *
3225
   * @return boolean
3226 4
   */
3227
  public static function isBom($utf8_chr)
3228
  {
3229 4
    return self::is_bom($utf8_chr);
3230
  }
3231
3232 4
  /**
3233
   * Try to check if a string is a json-string...
3234 4
   *
3235 4
   * @param $str
3236 4
   *
3237 4
   * @return bool
3238 3
   */
3239
  public static function isJson($str)
3240 4
  {
3241
    $str = (string)$str;
3242
3243
    if (!isset($str[0])) {
3244
      return false;
3245
    }
3246
3247
    if (
3248
        is_object(json_decode($str))
3249
        &&
3250
        json_last_error() === JSON_ERROR_NONE
3251
    ) {
3252
      return true;
3253
    } else {
3254
      return false;
3255
    }
3256
  }
3257
3258
  /**
3259
   * check if string contains any html-tags <lall>
3260
   *
3261
   * @param string $str
3262
   *
3263
   * @return boolean
3264
   */
3265
  public static function isHtml($str)
3266
  {
3267
    $str = (string)$str;
3268
3269
    if (!isset($str[0])) {
3270
      return false;
3271
    }
3272
3273 2
    // init
3274
    $matches = array();
3275 2
3276
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3277
3278
    if (count($matches) == 0) {
3279
      return false;
3280
    } else {
3281
      return true;
3282
    }
3283
  }
3284
3285 2
  /**
3286
   * alias for "UTF8::is_utf8"
3287 2
   *
3288 2
   * @param string $str
3289
   *
3290 2
   * @return bool
3291 2
   */
3292 2
  public static function isUtf8($str)
3293 2
  {
3294 2
    return self::is_utf8($str);
3295 2
  }
3296 2
3297 2
  /**
3298 2
   * Checks if a string is 7 bit ASCII.
3299 1
   *
3300 1
   * @param    string $str The string to check.
3301 2
   *
3302 2
   * @return   bool <strong>true</strong> if it is ASCII<br />
3303 2
   *                <strong>false</strong> otherwise
3304
   */
3305 2
  public static function is_ascii($str)
3306 2
  {
3307 2
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3308 2
  }
3309 2
3310 2
  /**
3311 2
   * Returns true if the string is base64 encoded, false otherwise.
3312 2
   *
3313 2
   * @param string $str
3314 1
   *
3315 1
   * @return bool Whether or not $str is base64 encoded
3316 2
   */
3317 2
  public static function is_base64($str)
3318 2
  {
3319
    $str = (string)$str;
3320 2
3321 1
    if (!isset($str[0])) {
3322 1
      return false;
3323
    }
3324 1
3325
    if (base64_encode(base64_decode($str, true)) === $str) {
3326
      return true;
3327
    } else {
3328 2
      return false;
3329
    }
3330 2
  }
3331
3332
  /**
3333
   * Check if the input is binary... (is look like a hack)
3334
   *
3335
   * @param string $input
3336
   *
3337
   * @return bool
3338
   */
3339
  public static function is_binary($input)
3340 2
  {
3341
3342 2
    $testLength = strlen($input);
3343 2
3344
    if (
3345 2
        preg_match('~^[01]+$~', $input)
3346 2
        ||
3347 2
        substr_count($input, "\x00") > 0
3348 2
        ||
3349 2
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3350 2
    ) {
3351 2
      return true;
3352 2
    } else {
3353 2
      return false;
3354
    }
3355
  }
3356 2
3357 2
  /**
3358 2
   * Check if the file is binary.
3359
   *
3360 2
   * @param string $file
3361 2
   *
3362 2
   * @return boolean
3363 1
   */
3364 1
  public static function is_binary_file($file)
3365 1
  {
3366 1
    try {
3367 1
      $fp = fopen($file, 'r');
3368 1
      $block = fread($fp, 512);
3369
      fclose($fp);
3370
    } catch (\Exception $e) {
3371 1
      $block = '';
3372 1
    }
3373 1
3374
    return self::is_binary($block);
3375 2
  }
3376
3377
  /**
3378
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3379
   *
3380
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3381
   *
3382
   * @param    string $utf8_chr The input string.
3383 2
   *
3384
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3385 2
   */
3386
  public static function is_bom($utf8_chr)
3387
  {
3388
    return ($utf8_chr === self::bom());
3389
  }
3390
3391
  /**
3392
   * Check if the string is UTF-16.
3393
   *
3394
   * @param string $str
3395
   *
3396
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3397 34
   */
3398 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3399 34
  {
3400
    if (self::is_binary($str)) {
3401 34
      self::checkForSupport();
3402 3
3403
      $maybeUTF16LE = 0;
3404
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3405 32
      if ($test !== false && strlen($test) > 1) {
3406
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3407
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3408
        if ($test3 === $test) {
3409
          $strChars = self::count_chars($str);
3410
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3411
            if (in_array($test3char, $strChars, true) === true) {
3412
              $maybeUTF16LE++;
3413
            }
3414
          }
3415 32
        }
3416
      }
3417 32
3418 32
      $maybeUTF16BE = 0;
3419 32
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3420
      if ($test !== false && strlen($test) > 1) {
3421
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3422 32
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3423 32
        if ($test3 === $test) {
3424 32
          $strChars = self::count_chars($str);
3425
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3426
            if (in_array($test3char, $strChars, true) === true) {
3427 32
              $maybeUTF16BE++;
3428
            }
3429 30
          }
3430 32
        }
3431
      }
3432 28
3433 28
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3434 28
        if ($maybeUTF16LE > $maybeUTF16BE) {
3435 28
          return 1;
3436 30
        } else {
3437
          return 2;
3438 13
        }
3439 13
      }
3440 13
3441 13
    }
3442 23
3443
    return false;
3444 6
  }
3445 6
3446 6
  /**
3447 6
   * Check if the string is UTF-32.
3448 12
   *
3449
   * @param string $str
3450
   *
3451
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3452
   */
3453 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3454
  {
3455
    if (self::is_binary($str)) {
3456
      self::checkForSupport();
3457 3
3458 3
      $maybeUTF32LE = 0;
3459 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3460 3
      if ($test !== false && strlen($test) > 1) {
3461 7
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3462
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3463 3
        if ($test3 === $test) {
3464 3
          $strChars = self::count_chars($str);
3465 3
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3466 3
            if (in_array($test3char, $strChars, true) === true) {
3467 3
              $maybeUTF32LE++;
3468
            }
3469
          }
3470
        }
3471 3
      }
3472
3473 32
      $maybeUTF32BE = 0;
3474
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3475
      if ($test !== false && strlen($test) > 1) {
3476 30
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3477
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3478 28
        if ($test3 === $test) {
3479 28
          $strChars = self::count_chars($str);
3480 28
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3481 28
            if (in_array($test3char, $strChars, true) === true) {
3482
              $maybeUTF32BE++;
3483
            }
3484
          }
3485
        }
3486 28
      }
3487
3488
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3489
        if ($maybeUTF32LE > $maybeUTF32BE) {
3490
          return 1;
3491
        } else {
3492 28
          return 2;
3493 28
        }
3494 28
      }
3495 28
3496
    }
3497 28
3498
    return false;
3499 28
  }
3500 28
3501 5
  /**
3502
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3503
   *
3504 28
   * @see    http://hsivonen.iki.fi/php-utf8/
3505 28
   *
3506 28
   * @param    string $str The string to be checked.
3507 28
   *
3508 28
   * @return   bool
3509
   */
3510
  public static function is_utf8($str)
3511
  {
3512
    $str = (string)$str;
3513 13
3514
    if (!isset($str[0])) {
3515
      return true;
3516 32
    }
3517
3518 14
    if (self::pcre_utf8_support() !== true) {
3519
3520
      // If even just the first character can be matched, when the /u
3521
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3522
      // invalid, nothing at all will match, even if the string contains
3523
      // some valid sequences
3524
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3525
3526
    } else {
3527
3528
      $mState = 0; // cached expected number of octets after the current octet
3529
      // until the beginning of the next UTF8 character sequence
3530
      $mUcs4 = 0; // cached Unicode character
3531
      $mBytes = 1; // cached expected number of octets in the current sequence
3532
      $len = strlen($str);
3533
3534
      /** @noinspection ForeachInvariantsInspection */
3535
      for ($i = 0; $i < $len; $i++) {
3536
        $in = ord($str[$i]);
3537
        if ($mState === 0) {
3538
          // When mState is zero we expect either a US-ASCII character or a
3539
          // multi-octet sequence.
3540
          if (0 === (0x80 & $in)) {
3541
            // US-ASCII, pass straight through.
3542
            $mBytes = 1;
3543 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3544
            // First octet of 2 octet sequence.
3545
            $mUcs4 = $in;
3546
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3547
            $mState = 1;
3548
            $mBytes = 2;
3549
          } elseif (0xE0 === (0xF0 & $in)) {
3550
            // First octet of 3 octet sequence.
3551
            $mUcs4 = $in;
3552
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3553
            $mState = 2;
3554
            $mBytes = 3;
3555 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3556
            // First octet of 4 octet sequence.
3557
            $mUcs4 = $in;
3558 2
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3559
            $mState = 3;
3560 2
            $mBytes = 4;
3561
          } elseif (0xF8 === (0xFC & $in)) {
3562 2
            /* First octet of 5 octet sequence.
3563 2
            *
3564 2
            * This is illegal because the encoded codepoint must be either
3565
            * (a) not the shortest form or
3566
            * (b) outside the Unicode range of 0-0x10FFFF.
3567
            * Rather than trying to resynchronize, we will carry on until the end
3568 2
            * of the sequence and let the later error handling code catch it.
3569
            */
3570
            $mUcs4 = $in;
3571
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3572
            $mState = 4;
3573
            $mBytes = 5;
3574 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3575
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3576
            $mUcs4 = $in;
3577
            $mUcs4 = ($mUcs4 & 1) << 30;
3578
            $mState = 5;
3579
            $mBytes = 6;
3580
          } else {
3581
            /* Current octet is neither in the US-ASCII range nor a legal first
3582
             * octet of a multi-octet sequence.
3583
             */
3584
            return false;
3585
          }
3586
        } else {
3587
          // When mState is non-zero, we expect a continuation of the multi-octet
3588
          // sequence
3589
          if (0x80 === (0xC0 & $in)) {
3590
            // Legal continuation.
3591
            $shift = ($mState - 1) * 6;
3592
            $tmp = $in;
3593
            $tmp = ($tmp & 0x0000003F) << $shift;
3594
            $mUcs4 |= $tmp;
3595
            /**
3596
             * End of the multi-octet sequence. mUcs4 now contains the final
3597
             * Unicode code point to be output
3598
             */
3599
            if (0 === --$mState) {
3600
              /*
3601
              * Check for illegal sequences and code points.
3602
              */
3603
              // From Unicode 3.1, non-shortest form is illegal
3604
              if (
3605
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3606
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3607 1
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3608
                  (4 < $mBytes) ||
3609 1
                  // From Unicode 3.2, surrogate characters are illegal.
3610
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3611 1
                  // Code points outside the Unicode range are illegal.
3612
                  ($mUcs4 > 0x10FFFF)
3613
              ) {
3614 1
                return false;
3615
              }
3616
              // initialize UTF8 cache
3617 1
              $mState = 0;
3618
              $mUcs4 = 0;
3619
              $mBytes = 1;
3620
            }
3621
          } else {
3622
            /**
3623
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3624
             * Incomplete multi-octet sequence.
3625
             */
3626
            return false;
3627 6
          }
3628
        }
3629 6
      }
3630
3631
      return true;
3632
    }
3633
  }
3634
3635
  /**
3636
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3637
   * Decodes a JSON string
3638
   *
3639
   * @link http://php.net/manual/en/function.json-decode.php
3640
   *
3641
   * @param string $json    <p>
3642 24
   *                        The <i>json</i> string being decoded.
3643
   *                        </p>
3644 24
   *                        <p>
3645
   *                        This function only works with UTF-8 encoded strings.
3646 24
   *                        </p>
3647 2
   *                        <p>PHP implements a superset of
3648
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3649
   *                        only supports these values when they are nested inside an array or an object.
3650 23
   *                        </p>
3651
   * @param bool   $assoc   [optional] <p>
3652 23
   *                        When <b>TRUE</b>, returned objects will be converted into
3653
   *                        associative arrays.
3654
   *                        </p>
3655
   * @param int    $depth   [optional] <p>
3656
   *                        User specified recursion depth.
3657
   *                        </p>
3658
   * @param int    $options [optional] <p>
3659
   *                        Bitmask of JSON decode options. Currently only
3660
   *                        <b>JSON_BIGINT_AS_STRING</b>
3661
   *                        is supported (default is to cast large integers as floats)
3662 1
   *                        </p>
3663
   *
3664 1
   * @return mixed the value encoded in <i>json</i> in appropriate
3665
   * PHP type. Values true, false and
3666
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3667
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3668 1
   * <i>json</i> cannot be decoded or if the encoded
3669
   * data is deeper than the recursion limit.
3670
   */
3671
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3672
  {
3673
    $json = self::filter($json);
3674
3675
    if (Bootup::is_php('5.4') === true) {
3676
      $json = json_decode($json, $assoc, $depth, $options);
3677
    } else {
3678
      $json = json_decode($json, $assoc, $depth);
3679 1
    }
3680
3681 1
    return $json;
3682 1
  }
3683 1
3684
  /**
3685 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3686
   * Returns the JSON representation of a value
3687
   *
3688
   * @link http://php.net/manual/en/function.json-encode.php
3689
   *
3690
   * @param mixed $value   <p>
3691
   *                       The <i>value</i> being encoded. Can be any type except
3692
   *                       a resource.
3693
   *                       </p>
3694 2
   *                       <p>
3695
   *                       All string data must be UTF-8 encoded.
3696 2
   *                       </p>
3697
   *                       <p>PHP implements a superset of
3698 2
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3699 2
   *                       only supports these values when they are nested inside an array or an object.
3700 2
   *                       </p>
3701
   * @param int   $options [optional] <p>
3702 2
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3703
   *                       <b>JSON_HEX_TAG</b>,
3704
   *                       <b>JSON_HEX_AMP</b>,
3705
   *                       <b>JSON_HEX_APOS</b>,
3706
   *                       <b>JSON_NUMERIC_CHECK</b>,
3707
   *                       <b>JSON_PRETTY_PRINT</b>,
3708
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3709
   *                       <b>JSON_FORCE_OBJECT</b>,
3710
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3711
   *                       constants is described on
3712 1
   *                       the JSON constants page.
3713
   *                       </p>
3714 1
   * @param int   $depth   [optional] <p>
3715
   *                       Set the maximum depth. Must be greater than zero.
3716
   *                       </p>
3717
   *
3718 1
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3719
   */
3720
  public static function json_encode($value, $options = 0, $depth = 512)
3721
  {
3722
    $value = self::filter($value);
3723
3724
    if (Bootup::is_php('5.5')) {
3725
      $json = json_encode($value, $options, $depth);
3726
    } else {
3727
      $json = json_encode($value, $options);
3728 13
    }
3729
3730 13
    return $json;
3731
  }
3732 13
3733
  /**
3734
   * Makes string's first char lowercase.
3735 13
   *
3736 13
   * @param    string $str The input string
3737 13
   *
3738 13
   * @return   string The resulting string
3739 13
   */
3740 13
  public static function lcfirst($str)
3741 13
  {
3742 13
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3743 13
  }
3744 13
3745 13
  /**
3746 13
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3747 13
   *
3748 13
   * WARNING: This is much slower then "ltrim()" !!!!
3749
   *
3750 13
   * @param    string $str   The string to be trimmed
3751 2
   * @param    string $chars Optional characters to be stripped
3752
   *
3753
   * @return   string The string with unwanted characters stripped from the left
3754 13
   */
3755 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3756
  {
3757
    $str = (string)$str;
3758
3759
    if (!isset($str[0])) {
3760
      return '';
3761
    }
3762
3763
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3764 2
3765
    return preg_replace("/^{$chars}+/u", '', $str);
3766 2
  }
3767 2
3768
  /**
3769 2
   * Returns the UTF-8 character with the maximum code point in the given data.
3770 1
   *
3771 1
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3772 1
   *
3773
   * @return   string The character with the highest code point than others.
3774 2
   */
3775 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3776
  {
3777
    if (is_array($arg)) {
3778
      $arg = implode($arg);
3779
    }
3780
3781
    return self::chr(max(self::codepoints($arg)));
3782
  }
3783
3784
  /**
3785
   * Calculates and returns the maximum number of bytes taken by any
3786 8
   * UTF-8 encoded character in the given string.
3787
   *
3788 8
   * @param    string $str The original Unicode string.
3789 8
   *
3790
   * @return   int An array of byte lengths of each character.
3791 8
   */
3792
  public static function max_chr_width($str)
3793 8
  {
3794
    $bytes = self::chr_size_list($str);
3795 2
    if (count($bytes) > 0) {
3796
      return (int)max($bytes);
3797 2
    } else {
3798
      return 0;
3799 1
    }
3800 1
  }
3801
3802 2
  /**
3803 2
   * checks whether mbstring is available on the server
3804
   *
3805 8
   * @return   bool True if available, False otherwise
3806 8
   */
3807 1
  public static function mbstring_loaded()
3808 1
  {
3809
    $return = extension_loaded('mbstring');
3810 8
3811 8
    if ($return === true) {
3812
      \mb_internal_encoding('UTF-8');
3813 8
    }
3814
3815
    return $return;
3816
  }
3817
3818
  /**
3819
   * Returns the UTF-8 character with the minimum code point in the given data.
3820
   *
3821
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3822
   *
3823
   * @return   string The character with the lowest code point than others.
3824
   */
3825 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3826 1
  {
3827
    if (is_array($arg)) {
3828 1
      $arg = implode($arg);
3829 1
    }
3830
3831
    return self::chr(min(self::codepoints($arg)));
3832
  }
3833
3834
  /**
3835
   * Normalize the encoding-name input.
3836
   *
3837
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3838
   *
3839
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3840
   */
3841
  public static function normalizeEncoding($encoding)
3842 1
  {
3843
    static $staticNormalizeEncodingCache = array();
3844 1
3845
    if (!$encoding) {
3846
      return $encoding;
3847
    }
3848
3849
    if (in_array($encoding, self::$iconvEncoding, true)) {
3850
      return $encoding;
3851
    }
3852
3853
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3854
      return $staticNormalizeEncodingCache[$encoding];
3855 15
    }
3856
3857 15
    $encodingOrig = $encoding;
3858 2
    $encoding = strtoupper($encoding);
3859
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3860
3861 14
    $equivalences = array(
3862 14
        'ISO88591'    => 'ISO-8859-1',
3863
        'ISO8859'     => 'ISO-8859-1',
3864 14
        'ISO'         => 'ISO-8859-1',
3865 2
        'LATIN1'      => 'ISO-8859-1',
3866
        'LATIN'       => 'ISO-8859-1',
3867
        'UTF16'       => 'UTF-16',
3868 13
        'UTF32'       => 'UTF-32',
3869 7
        'UTF8'        => 'UTF-8',
3870
        'UTF'         => 'UTF-8',
3871
        'UTF7'        => 'UTF-7',
3872 12
        'WIN1252'     => 'ISO-8859-1',
3873 8
        'WINDOWS1252' => 'ISO-8859-1',
3874
        '8BIT'        => 'CP850',
3875
        'BINARY'      => 'CP850',
3876 10
    );
3877
3878
    if (!empty($equivalences[$encodingUpperHelper])) {
3879
      $encoding = $equivalences[$encodingUpperHelper];
3880
    }
3881
3882
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3883
3884
    return $encoding;
3885
  }
3886
3887
  /**
3888
   * Normalize MS Word special characters.
3889
   *
3890
   * @param string $str The string to be normalized.
3891
   *
3892
   * @return string
3893
   */
3894
  public static function normalize_msword($str)
3895
  {
3896
    static $utf8MSWordKeys = null;
3897 1
    static $utf8MSWordValues = null;
3898
3899
    if ($utf8MSWordKeys === null) {
3900 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3901
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3902 1
    }
3903
3904 1
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3905 1
  }
3906
3907
  /**
3908
   * Normalize the whitespace.
3909
   *
3910
   * @param string $str                     The string to be normalized.
3911
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3912 33
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3913
   *
3914
   * @return string
3915 33
   */
3916
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3917
  {
3918
    static $whitespaces = array();
3919
    static $bidiUniCodeControls = null;
3920
3921
    $cacheKey = (int)$keepNonBreakingSpace;
3922
3923
    if (!isset($whitespaces[$cacheKey])) {
3924
3925
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3926 1
3927
      if ($keepNonBreakingSpace === true) {
3928 1
        /** @noinspection OffsetOperationsInspection */
3929 1
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3930
      }
3931
3932 1
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3933
    }
3934 1
3935
    if ($keepBidiUnicodeControls === false) {
3936
      if ($bidiUniCodeControls === null) {
3937 1
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3938
      }
3939
3940 1
      $str = str_replace($bidiUniCodeControls, '', $str);
3941
    }
3942
3943
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3944 1
  }
3945
3946 1
  /**
3947
   * Format a number with grouped thousands.
3948
   *
3949 1
   * @param float  $number
3950
   * @param int    $decimals
3951
   * @param string $dec_point
3952 1
   * @param string $thousands_sep
3953
   *
3954
   * @return string
3955
   */
3956 1
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3957
  {
3958 1
    $thousands_sep = (string)$thousands_sep;
3959 1
    $dec_point = (string)$dec_point;
3960 1
3961 1
    if (
3962 1
        isset($thousands_sep[1], $dec_point[1])
3963
        &&
3964
        Bootup::is_php('5.4') === true
3965
    ) {
3966
      return str_replace(
3967
          array(
3968
              '.',
3969
              ',',
3970
          ),
3971
          array(
3972
              $dec_point,
3973
              $thousands_sep,
3974
          ),
3975 7
          number_format($number, $decimals, '.', ',')
3976
      );
3977 7
    }
3978
3979
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3980 7
  }
3981 2
3982 2
  /**
3983 7
   * Calculates Unicode code point of the given UTF-8 encoded character.
3984
   *
3985 7
   * @param    string $s The character of which to calculate code point.
3986
   *
3987
   * @return   int Unicode code point of the given character,<br />
3988 3
   *           0 on invalid UTF-8 byte sequence.
3989 1
   */
3990 1
  public static function ord($s)
3991
  {
3992
    if (!$s && $s !== '0') {
3993
      return 0;
3994 3
    }
3995 1
3996 1
    // init
3997 3
    self::checkForSupport();
3998
3999 7
    if (self::$support['intlChar'] === true) {
4000
      $tmpReturn = \IntlChar::ord($s);
4001
      if ($tmpReturn) {
4002 3
        return $tmpReturn;
4003 1
      }
4004 1
    }
4005
4006
    $s = unpack('C*', substr($s, 0, 4));
4007
    $a = $s ? $s[1] : 0;
4008 3
4009 1
    if (0xF0 <= $a && isset($s[4])) {
4010 1
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
4011 3
    }
4012
4013 7
    if (0xE0 <= $a && isset($s[3])) {
4014
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
4015
    }
4016
4017
    if (0xC0 <= $a && isset($s[2])) {
4018
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
4019
    }
4020
4021
    return $a;
4022
  }
4023
4024 1
  /**
4025
   * Parses the string into variables.
4026 1
   *
4027 1
   * WARNING: This differs from parse_str() by returning the results
4028 1
   *    instead of placing them in the local scope!
4029
   *
4030 1
   * @link http://php.net/manual/en/function.parse-str.php
4031 1
   *
4032 1
   * @param string $str     <p>
4033 1
   *                        The input string.
4034 1
   *                        </p>
4035
   * @param array  $result  <p>
4036 1
   *                        If the second parameter arr is present,
4037
   *                        variables are stored in this variable as array elements instead.
4038
   *                        </p>
4039
   *
4040
   * @return void
4041
   */
4042
  public static function parse_str($str, &$result)
4043
  {
4044
    // init
4045
    self::checkForSupport();
4046
4047
    $str = self::filter($str);
4048
4049
    \mb_parse_str($str, $result);
4050
  }
4051
4052 36
  /**
4053
   * checks if \u modifier is available that enables Unicode support in PCRE.
4054
   *
4055 36
   * @return   bool True if support is available, false otherwise
4056
   */
4057
  public static function pcre_utf8_support()
4058
  {
4059 36
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4060 36
    return (bool)@preg_match('//u', '');
4061 36
  }
4062 36
4063
  /**
4064 36
   * Create an array containing a range of UTF-8 characters.
4065
   *
4066
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4067 36
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4068 36
   *
4069
   * @return   array
4070 36
   */
4071
  public static function range($var1, $var2)
4072
  {
4073
    if (!$var1 || !$var2) {
4074
      return array();
4075
    }
4076
4077 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4078
      $start = (int)$var1;
4079
    } elseif (ctype_xdigit($var1)) {
4080
      $start = (int)self::hex_to_int($var1);
4081 36
    } else {
4082
      $start = self::ord($var1);
4083 36
    }
4084
4085 36
    if (!$start) {
4086 36
      return array();
4087 36
    }
4088
4089 36 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4090 36
      $end = (int)$var2;
4091 36
    } elseif (ctype_xdigit($var2)) {
4092
      $end = (int)self::hex_to_int($var2);
4093 36
    } else {
4094
      $end = self::ord($var2);
4095
    }
4096
4097
    if (!$end) {
4098
      return array();
4099
    }
4100
4101
    return array_map(
4102
        array(
4103
            '\\voku\\helper\\UTF8',
4104
            'chr',
4105
        ),
4106 23
        range($start, $end)
4107
    );
4108 23
  }
4109
4110 23
  /**
4111 5
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4112
   *
4113
   * @param string $str
4114 19
   *
4115
   * @return string
4116 19
   */
4117
  public static function removeBOM($str = '')
4118
  {
4119
    // INFO: https://en.wikipedia.org/wiki/Byte_order_mark
4120
4121
    if (0 === strpos($str, "\xef\xbb\xbf")) { // UTF-8 BOM
4122
      $str = substr($str, 3);
4123
    } elseif (0 === strpos($str, '')) { // UTF-8 BOM as "WINDOWS-1252"
4124
      $str = substr($str, 6); // INFO: one char has (maybe) more then one byte ...
4125
    } elseif (0 === strpos($str, "\x00\x00\xfe\xff")) { // UTF-32 (BE) BOM
4126
      $str = substr($str, 4);
4127 40
    } elseif (0 === strpos($str, "\xff\xfe\x00\x00")) { // UTF-32 (LE) BOM
4128
      $str = substr($str, 4);
4129 40
    } elseif (0 === strpos($str, "\xfe\xff")) { // UTF-16 (BE) BOM
4130
      $str = substr($str, 2);
4131 40
    } elseif (0 === strpos($str, 'þÿ')) { // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4132
      $str = substr($str, 4);
4133 40
    } elseif (0 === strpos($str, "\xff\xfe")) { // UTF-16 (LE) BOM
4134 30
      $str = substr($str, 2);
4135
    } elseif (0 === strpos($str, 'ÿþ')) { // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4136
      $str = substr($str, 4);
4137 16
    }
4138
4139 16
    return $str;
4140 15
  }
4141
4142 15
  /**
4143 14
   * Removes duplicate occurrences of a string in another string.
4144 15
   *
4145 1
   * @param    string       $str  The base string
4146 1
   * @param    string|array $what String to search for in the base string
4147
   *
4148
   * @return   string The result string with removed duplicates
4149 16
   */
4150
  public static function remove_duplicates($str, $what = ' ')
4151 16
  {
4152
    if (is_string($what)) {
4153 16
      $what = array($what);
4154 16
    }
4155 16
4156
    if (is_array($what)) {
4157
      foreach ($what as $item) {
4158
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4159 16
      }
4160
    }
4161 16
4162
    return $str;
4163
  }
4164
4165
  /**
4166
   * Remove Invisible Characters
4167
   *
4168
   * This prevents sandwiching null characters
4169
   * between ascii characters, like Java\0script.
4170
   *
4171
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4172
   *
4173
   * @param  string $str
4174
   * @param  bool   $url_encoded
4175
   *
4176
   * @return  string
4177
   */
4178
  public static function remove_invisible_characters($str, $url_encoded = true)
4179
  {
4180
    // init
4181 2
    $non_displayables = array();
4182
4183 2
    // every control character except newline (dec 10),
4184 1
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4185
    if ($url_encoded) {
4186
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4187 2
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4188
    }
4189
4190
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4191
4192
    do {
4193
      $str = preg_replace($non_displayables, '', $str, -1, $count);
4194
    } while ($count !== 0);
4195
4196
    return $str;
4197
  }
4198
4199 25
  /**
4200
   * replace diamond question mark (�)
4201 25
   *
4202
   * @param string $str
4203 25
   * @param string $unknown
4204 5
   *
4205
   * @return string
4206
   */
4207
  public static function replace_diamond_question_mark($str, $unknown = '?')
4208 24
  {
4209 24
    return str_replace(
4210 24
        array(
4211
            "\xEF\xBF\xBD",
4212 24
            '�',
4213
        ),
4214 24
        array(
4215
            $unknown,
4216
            $unknown,
4217
        ),
4218 24
        $str
4219 24
    );
4220 24
  }
4221 24
4222 24
  /**
4223
   * Strip whitespace or other characters from end of a UTF-8 string.
4224 24
   *
4225
   * WARNING: This is much slower then "rtrim()" !!!!
4226
   *
4227
   * @param    string $str   The string to be trimmed
4228
   * @param    string $chars Optional characters to be stripped
4229
   *
4230
   * @return   string The string with unwanted characters stripped from the right
4231
   */
4232 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4233
  {
4234
    $str = (string)$str;
4235
4236
    if (!isset($str[0])) {
4237
      return '';
4238
    }
4239
4240
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4241
4242
    return preg_replace("/{$chars}+$/u", '', $str);
4243
  }
4244
4245
  /**
4246
   * rxClass
4247
   *
4248
   * @param string $s
4249
   * @param string $class
4250
   *
4251
   * @return string
4252
   */
4253
  protected static function rxClass($s, $class = '')
4254
  {
4255
    static $rxClassCache = array();
4256 24
4257 5
    $cacheKey = $s . $class;
4258
4259 5
    if (isset($rxClassCache[$cacheKey])) {
4260 5
      return $rxClassCache[$cacheKey];
4261
    }
4262 24
4263
    $class = array($class);
4264
4265
    /** @noinspection SuspiciousLoopInspection */
4266 24
    foreach (self::str_split($s) as $s) {
4267
      if ('-' === $s) {
4268
        $class[0] = '-' . $class[0];
4269
      } elseif (!isset($s[2])) {
4270
        $class[0] .= preg_quote($s, '/');
4271
      } elseif (1 === self::strlen($s)) {
4272
        $class[0] .= $s;
4273
      } else {
4274
        $class[] = $s;
4275
      }
4276
    }
4277 3
4278
    $class[0] = '[' . $class[0] . ']';
4279
4280
    if (1 === count($class)) {
4281
      $return = $class[0];
4282
    } else {
4283
      $return = '(?:' . implode('|', $class) . ')';
4284 3
    }
4285 2
4286 1
    $rxClassCache[$cacheKey] = $return;
4287 2
4288 1
    return $return;
4289 2
  }
4290
4291 2
  /**
4292
   * Echo native UTF8-Support libs, e.g. for debugging.
4293
   */
4294 2
  public static function showSupport()
4295
  {
4296
    foreach (self::$support as $utf8Support) {
4297
      echo $utf8Support . "\n<br>";
4298
    }
4299
  }
4300 3
4301 1
  /**
4302
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4303
   *
4304
   * @param    string $chr The Unicode character to be encoded as numbered entity.
4305
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4306
   *
4307
   * @return   string The HTML numbered entity.
4308
   */
4309
  public static function single_chr_html_encode($chr, $keepAsciiChars = false)
4310 3
  {
4311 3
    if (!$chr) {
4312 3
      return '';
4313 3
    }
4314 3
4315 3
    if ($keepAsciiChars === true) {
4316 3
      if (self::isAscii($chr) === true) {
4317 3
        return $chr;
4318
      }
4319
    }
4320 3
4321 3
    return '&#' . self::ord($chr) . ';';
4322 3
  }
4323 3
4324
  /**
4325
   * Convert a string to an array of Unicode characters.
4326
   *
4327
   * @param    string  $str       The string to split into array.
4328
   * @param    int     $length    Max character length of each array element.
4329
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4330
   *
4331
   * @return   array An array containing chunks of the string.
4332
   */
4333
  public static function split($str, $length = 1, $cleanUtf8 = false)
4334
  {
4335
    $str = (string)$str;
4336
4337
    if (!isset($str[0])) {
4338
      return array();
4339
    }
4340
4341
    // init
4342
    self::checkForSupport();
4343
    $str = (string)$str;
4344
    $ret = array();
4345
4346
    if (self::$support['pcre_utf8'] === true) {
4347
4348
      if ($cleanUtf8 === true) {
4349
        $str = self::clean($str);
4350
      }
4351
4352
      preg_match_all('/./us', $str, $retArray);
4353 13
      if (isset($retArray[0])) {
4354
        $ret = $retArray[0];
4355 13
      }
4356
      unset($retArray);
4357
4358 13
    } else {
4359 13
4360 1
      // fallback
4361 1
4362 12
      $len = strlen($str);
4363
4364 13
      /** @noinspection ForeachInvariantsInspection */
4365
      for ($i = 0; $i < $len; $i++) {
4366 13
        if (($str[$i] & "\x80") === "\x00") {
4367 13
          $ret[] = $str[$i];
4368
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4369 13
          if (($str[$i + 1] & "\xC0") === "\x80") {
4370
            $ret[] = $str[$i] . $str[$i + 1];
4371
4372
            $i++;
4373
          }
4374 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4375
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4376
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4377
4378
            $i += 2;
4379
          }
4380
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4381 1 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4382
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4383 1
4384
            $i += 3;
4385
          }
4386
        }
4387 1
      }
4388
    }
4389 1
4390
    if ($length > 1) {
4391
      $ret = array_chunk($ret, $length);
4392
4393 1
      $ret = array_map('implode', $ret);
4394 1
    }
4395
4396
    if (isset($ret[0]) && $ret[0] === '') {
4397 1
      return array();
4398 1
    }
4399 1
4400 1
    return $ret;
4401
  }
4402 1
4403
  /**
4404
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4405 1
   *
4406
   * @param string $str
4407
   *
4408 1
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4409
   *                      otherwise it will return false.
4410
   */
4411
  public static function str_detect_encoding($str)
4412
  {
4413
4414
    //
4415
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4416
    //
4417
4418
    if (self::is_binary($str)) {
4419
      if (self::is_utf16($str) === 1) {
4420
        return 'UTF-16LE';
4421 2
      } elseif (self::is_utf16($str) === 2) {
4422
        return 'UTF-16BE';
4423 2
      } elseif (self::is_utf32($str) === 1) {
4424
        return 'UTF-32LE';
4425 2
      } elseif (self::is_utf32($str) === 2) {
4426 2
        return 'UTF-32BE';
4427
      }
4428 2
    }
4429
4430
    //
4431 2
    // 2.) simple check for ASCII chars
4432 2
    //
4433 2
4434 2
    if (self::is_ascii($str) === true) {
4435 2
      return 'ASCII';
4436
    }
4437 2
4438 2
    //
4439 2
    // 3.) simple check for UTF-8 chars
4440 2
    //
4441 2
4442 2
    if (self::is_utf8($str) === true) {
4443
      return 'UTF-8';
4444 2
    }
4445 2
4446 2
    //
4447 2
    // 4.) check via "\mb_detect_encoding()"
4448 2
    //
4449 2
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4450
4451 2
    $detectOrder = array(
4452
        'windows-1251',
4453
        'ISO-8859-1',
4454 2
        'ASCII',
4455
        'UTF-8',
4456
    );
4457
4458
    self::checkForSupport();
4459
4460
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4461
    if ($encoding) {
4462
      return $encoding;
4463
    }
4464
4465
    //
4466
    // 5.) check via "iconv()"
4467
    //
4468
4469
    $md5 = md5($str);
4470
    foreach (self::$iconvEncoding as $encodingTmp) {
4471
      # INFO: //IGNORE and //TRANSLIT still throw notice
4472
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4473
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4474
        return $encodingTmp;
4475 1
      }
4476
    }
4477 1
4478
    return false;
4479 1
  }
4480
4481
  /**
4482
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4483
   *
4484
   * @link  http://php.net/manual/en/function.str-ireplace.php
4485
   *
4486
   * @param mixed $search  <p>
4487
   *                       Every replacement with search array is
4488
   *                       performed on the result of previous replacement.
4489
   *                       </p>
4490
   * @param mixed $replace <p>
4491
   *                       </p>
4492
   * @param mixed $subject <p>
4493
   *                       If subject is an array, then the search and
4494
   *                       replace is performed with every entry of
4495
   *                       subject, and the return value is an array as
4496
   *                       well.
4497
   *                       </p>
4498
   * @param int   $count   [optional] <p>
4499
   *                       The number of matched and replaced needles will
4500
   *                       be returned in count which is passed by
4501
   *                       reference.
4502
   *                       </p>
4503
   *
4504
   * @return mixed a string or an array of replacements.
4505
   * @since 5.0
4506
   */
4507
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4508
  {
4509
    $search = (array)$search;
4510
4511
    /** @noinspection AlterInForeachInspection */
4512 12
    foreach ($search as &$s) {
4513
      if ('' === $s .= '') {
4514 12
        $s = '/^(?<=.)$/';
4515
      } else {
4516
        $s = '/' . preg_quote($s, '/') . '/ui';
4517
      }
4518
    }
4519
4520
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4521
    $count = $replace;
4522
4523
    return $subject;
4524
  }
4525
4526
  /**
4527
   * Limit the number of characters in a string, but also after the next word.
4528
   *
4529
   * @param  string $str
4530
   * @param  int    $length
4531
   * @param  string $strAddOn
4532
   *
4533
   * @return string
4534
   */
4535
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4536
  {
4537
    $str = (string)$str;
4538
4539
    if (!isset($str[0])) {
4540
      return '';
4541
    }
4542 1
4543
    $length = (int)$length;
4544 1
4545
    if (self::strlen($str) <= $length) {
4546 1
      return $str;
4547 1
    }
4548 1
4549
    if (self::substr($str, $length - 1, 1) === ' ') {
4550 1
      return self::substr($str, 0, $length - 1) . $strAddOn;
4551 1
    }
4552 1
4553 1
    $str = self::substr($str, 0, $length);
4554
    $array = explode(' ', $str);
4555
    array_pop($array);
4556 1
    $new_str = implode(' ', $array);
4557
4558
    if ($new_str === '') {
4559
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4560
    } else {
4561
      $str = $new_str . $strAddOn;
4562
    }
4563
4564
    return $str;
4565
  }
4566
4567 17
  /**
4568
   * Pad a UTF-8 string to given length with another string.
4569
   *
4570 17
   * @param    string $input      The input string
4571
   * @param    int    $pad_length The length of return string
4572 17
   * @param    string $pad_string String to use for padding the input string
4573
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4574
   *
4575
   * @return   string Returns the padded string
4576
   */
4577
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4578 17
  {
4579 17
    $input_length = self::strlen($input);
4580 17
4581 17
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4582 17
      $ps_length = self::strlen($pad_string);
4583 16
4584 16
      $diff = $pad_length - $input_length;
4585 17
4586
      switch ($pad_type) {
4587 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4588
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4589
          $pre = self::substr($pre, 0, $diff);
4590 17
          $post = '';
4591 17
          break;
4592
4593
        case STR_PAD_BOTH:
4594 1
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4595 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
4596
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4597
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4598 1
          break;
4599 1
4600 1
        case STR_PAD_RIGHT:
4601 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4602 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4603
          $post = self::substr($post, 0, $diff);
4604 1
          $pre = '';
4605
      }
4606 1
4607
      return $pre . $input . $post;
4608
    }
4609
4610
    return $input;
4611
  }
4612
4613
  /**
4614
   * Repeat a string.
4615
   *
4616 1
   * @param string $input      <p>
4617
   *                           The string to be repeated.
4618 1
   *                           </p>
4619
   * @param int    $multiplier <p>
4620 1
   *                           Number of time the input string should be
4621
   *                           repeated.
4622
   *                           </p>
4623
   *                           <p>
4624
   *                           multiplier has to be greater than or equal to 0.
4625 1
   *                           If the multiplier is set to 0, the function
4626 1
   *                           will return an empty string.
4627
   *                           </p>
4628
   *
4629 1
   * @return string the repeated string.
4630 1
   */
4631 1
  public static function str_repeat($input, $multiplier)
4632
  {
4633 1
    $input = self::filter($input);
4634
4635
    return str_repeat($input, $multiplier);
4636
  }
4637
4638
  /**
4639
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4640
   *
4641
   * (PHP 4, PHP 5)<br/>
4642
   * Replace all occurrences of the search string with the replacement string
4643
   *
4644
   * @link http://php.net/manual/en/function.str-replace.php
4645
   *
4646
   * @param mixed $search  <p>
4647
   *                       The value being searched for, otherwise known as the needle.
4648
   *                       An array may be used to designate multiple needles.
4649
   *                       </p>
4650
   * @param mixed $replace <p>
4651
   *                       The replacement value that replaces found search
4652
   *                       values. An array may be used to designate multiple replacements.
4653
   *                       </p>
4654 8
   * @param mixed $subject <p>
4655
   *                       The string or array being searched and replaced on,
4656 8
   *                       otherwise known as the haystack.
4657
   *                       </p>
4658 8
   *                       <p>
4659
   *                       If subject is an array, then the search and
4660 8
   *                       replace is performed with every entry of
4661 2
   *                       subject, and the return value is an array as
4662
   *                       well.
4663
   *                       </p>
4664 7
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4665
   *
4666 7
   * @return mixed This function returns a string or an array with the replaced values.
4667 7
   */
4668 7
  public static function str_replace($search, $replace, $subject, &$count = null)
4669
  {
4670 7
    return str_replace($search, $replace, $subject, $count);
4671
  }
4672 7
4673 6
  /**
4674
   * Shuffles all the characters in the string.
4675
   *
4676 4
   * @param    string $str The input string
4677
   *
4678
   * @return   string The shuffled string.
4679 4
   */
4680 4
  public static function str_shuffle($str)
4681 4
  {
4682
    $array = self::split($str);
4683 4
4684 3
    shuffle($array);
4685
4686 3
    return implode('', $array);
4687 3
  }
4688 3
4689
  /**
4690 3
   * Sort all characters according to code points.
4691 1
   *
4692
   * @param    string $str    A UTF-8 string.
4693 1
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4694 1
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4695 1
   *
4696
   * @return   string String of sorted characters
4697 1
   */
4698
  public static function str_sort($str, $unique = false, $desc = false)
4699
  {
4700
    $array = self::codepoints($str);
4701
4702
    if ($unique) {
4703
      $array = array_flip(array_flip($array));
4704
    }
4705
4706
    if ($desc) {
4707
      arsort($array);
4708
    } else {
4709
      asort($array);
4710
    }
4711
4712 1
    return self::string($array);
4713 3
  }
4714
4715 4
  /**
4716
   * Convert a string to an array.
4717
   *
4718
   * @param string $str
4719
   * @param int    $len
4720 4
   *
4721
   * @return array
4722
   */
4723
  public static function str_split($str, $len = 1)
4724
  {
4725 4
    // init
4726 4
    self::checkForSupport();
4727 2
    $len = (int)$len;
4728 2
4729
    if ($len < 1) {
4730 2
      return str_split($str, $len);
4731 2
    }
4732 1
4733
    if (self::$support['intl'] === true) {
4734 2
      $a = array();
4735
      $p = 0;
4736 4
      $l = strlen($str);
4737 4
      while ($p < $l) {
4738 4
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4739 4
      }
4740 1
    } else {
4741
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4742 7
      $a = $a[0];
4743
    }
4744 7
4745
    if ($len === 1) {
4746
      return $a;
4747
    }
4748
4749
    $arrayOutput = array();
4750
    $p = -1;
4751
4752
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4753
    foreach ($a as $l => $a) {
4754
      if ($l % $len) {
4755
        $arrayOutput[$p] .= $a;
4756 1
      } else {
4757
        $arrayOutput[++$p] = $a;
4758 1
      }
4759 1
    }
4760 1
4761 1
    return $arrayOutput;
4762
  }
4763 1
4764
  /**
4765
   * Get a binary representation of a specific character.
4766
   *
4767 1
   * @param   string $str The input character.
4768
   *
4769
   * @return  string
4770
   */
4771
  public static function str_to_binary($str)
4772
  {
4773
    $str = (string)$str;
4774
4775
    if (!isset($str[0])) {
4776 1
      return '';
4777
    }
4778
4779 1
    // init
4780
    $out = null;
4781
    $max = strlen($str);
4782
4783
    /** @noinspection ForeachInvariantsInspection */
4784
    for ($i = 0; $i < $max; ++$i) {
4785
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4786
    }
4787
4788
    return $out;
4789
  }
4790 8
4791
  /**
4792 8
   * US-ASCII transliterations of Unicode text.
4793
   *
4794
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4795
   * Warning: you should only pass this well formed UTF-8!
4796
   * Be aware it works by making a copy of the input string which it appends transliterated
4797
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4798
   * requiring up to the same amount again as the input string
4799
   *
4800
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4801
   *
4802
   * @author <[email protected]>
4803
   *
4804
   * @param string $str     UTF-8 string to convert
4805 8
   * @param string $unknown Character use if character unknown. (default is ?)
4806
   *
4807 8
   * @return string US-ASCII string
4808 5
   */
4809 5
  public static function str_transliterate($str, $unknown = '?')
4810 8
  {
4811
    static $UTF8_TO_ASCII;
4812
4813
    $str = (string)$str;
4814
4815
    if (!isset($str[0])) {
4816
      return '';
4817
    }
4818
4819
    $str = self::clean($str);
4820
4821
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4822
    $chars = $ar[0];
4823 5
    foreach ($chars as &$c) {
4824
4825 5
      $ordC0 = ord($c[0]);
4826
4827
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4828
        continue;
4829 5
      }
4830
4831
      $ordC1 = ord($c[1]);
4832 5
4833
      // ASCII - next please
4834
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4835
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4836 5
      }
4837 5
4838
      if ($ordC0 >= 224) {
4839
        $ordC2 = ord($c[2]);
4840
4841
        if ($ordC0 <= 239) {
4842
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4843
        }
4844
4845
        if ($ordC0 >= 240) {
4846
          $ordC3 = ord($c[3]);
4847
4848
          if ($ordC0 <= 247) {
4849
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4850 2
          }
4851
4852 2
          if ($ordC0 >= 248) {
4853 2
            $ordC4 = ord($c[4]);
4854
4855 2 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4856 2
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4857 2
            }
4858
4859 2
            if ($ordC0 >= 252) {
4860 2
              $ordC5 = ord($c[5]);
4861
4862 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4863
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4864
              }
4865
            }
4866
          }
4867
        }
4868
      }
4869
4870 1
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4871
        $c = $unknown;
4872 1
        continue;
4873
      }
4874
4875
      if (!isset($ord)) {
4876
        $c = $unknown;
4877
        continue;
4878
      }
4879
4880
      $bank = $ord >> 8;
4881
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4882
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4883
        if (file_exists($bankfile)) {
4884
          /** @noinspection PhpIncludeInspection */
4885
          require $bankfile;
4886
        } else {
4887
          $UTF8_TO_ASCII[$bank] = array();
4888
        }
4889
      }
4890
4891
      $newchar = $ord & 255;
4892
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4893
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4894 2
      } else {
4895
        $c = $unknown;
4896
      }
4897 2
    }
4898
4899 2
    return implode('', $chars);
4900
  }
4901
4902
  /**
4903
   * Counts number of words in the UTF-8 string.
4904
   *
4905
   * @param string $str    The input string.
4906
   * @param int    $format <strong>0</strong> => return a number of words<br />
4907
   *                       <strong>1</strong> => return an array of words
4908
   *                       <strong>2</strong> => return an array of words with word-offset as key
4909
   * @param string $charlist
4910
   *
4911
   * @return array|float The number of words in the string
4912
   */
4913
  public static function str_word_count($str, $format = 0, $charlist = '')
4914
  {
4915
    $charlist = self::rxClass($charlist, '\pL');
4916
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4917
4918
    $len = count($strParts);
4919
4920
    if ($format === 1) {
4921
4922
      $numberOfWords = array();
4923
      for ($i = 1; $i < $len; $i += 2) {
4924
        $numberOfWords[] = $strParts[$i];
4925 8
      }
4926
4927 8
    } elseif ($format === 2) {
4928 8
4929
      self::checkForSupport();
4930 8
4931 2
      $numberOfWords = array();
4932
      $offset = self::strlen($strParts[0]);
4933
      for ($i = 1; $i < $len; $i += 2) {
4934
        $numberOfWords[$offset] = $strParts[$i];
4935 7
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4936
      }
4937 7
4938 1
    } else {
4939 1
4940 1
      $numberOfWords = ($len - 1) / 2;
4941
4942
    }
4943 7
4944 1
    return $numberOfWords;
4945 1
  }
4946
4947 7
  /**
4948
   * Case-insensitive string comparison.
4949
   *
4950
   * @param string $str1
4951
   * @param string $str2
4952
   *
4953
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4954
   */
4955
  public static function strcasecmp($str1, $str2)
4956
  {
4957
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4958
  }
4959 7
4960
  /**
4961 7
   * String comparison.
4962 2
   *
4963
   * @param string $str1
4964
   * @param string $str2
4965
   *
4966 5
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4967
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4968 5
   *              <strong>0</strong> if they are equal.
4969
   */
4970
  public static function strcmp($str1, $str2)
4971
  {
4972
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4973
        \Normalizer::normalize($str1, \Normalizer::NFD),
4974
        \Normalizer::normalize($str2, \Normalizer::NFD)
4975
    );
4976
  }
4977
4978
  /**
4979
   * Find length of initial segment not matching mask.
4980
   *
4981
   * @param string $str
4982
   * @param string $charList
4983
   * @param int    $offset
4984
   * @param int    $length
4985 66
   *
4986
   * @return int|null
4987 66
   */
4988
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4989 66
  {
4990 4
    if ('' === $charList .= '') {
4991
      return null;
4992
    }
4993
4994 65
    if ($offset || 2147483647 !== $length) {
4995
      $str = (string)self::substr($str, $offset, $length);
4996
    } else {
4997 65
      $str = (string)$str;
4998
    }
4999
5000
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5001 65
      /** @noinspection OffsetOperationsInspection */
5002
      return self::strlen($length[1]);
5003
    } else {
5004
      return self::strlen($str);
5005 65
    }
5006
  }
5007
5008
  /**
5009
   * Makes a UTF-8 string from code points.
5010
   *
5011
   * @param    array $array Integer or Hexadecimal codepoints
5012
   *
5013
   * @return   string UTF-8 encoded string
5014
   */
5015
  public static function string($array)
5016
  {
5017 1
    return implode(
5018
        array_map(
5019 1
            array(
5020
                '\\voku\\helper\\UTF8',
5021
                'chr',
5022
            ),
5023
            $array
5024
        )
5025
    );
5026
  }
5027
5028
  /**
5029
   * Checks if string starts with "UTF-8 BOM" character.
5030
   *
5031 2
   * @param    string $str The input string.
5032
   *
5033 2
   * @return   bool True if the string has BOM at the start, False otherwise.
5034
   */
5035
  public static function string_has_bom($str)
5036
  {
5037
    return self::is_bom(substr($str, 0, 3));
5038
  }
5039
5040
  /**
5041
   * Strip HTML and PHP tags from a string.
5042
   *
5043
   * @link http://php.net/manual/en/function.strip-tags.php
5044
   *
5045
   * @param string $str            <p>
5046
   *                               The input string.
5047
   *                               </p>
5048
   * @param string $allowable_tags [optional] <p>
5049
   *                               You can use the optional second parameter to specify tags which should
5050
   *                               not be stripped.
5051
   *                               </p>
5052
   *                               <p>
5053
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5054
   *                               can not be changed with allowable_tags.
5055
   *                               </p>
5056
   *
5057
   * @return string the stripped string.
5058
   */
5059
  public static function strip_tags($str, $allowable_tags = null)
5060
  {
5061
    //clean broken utf8
5062
    $str = self::clean($str);
5063
5064
    return strip_tags($str, $allowable_tags);
5065
  }
5066
5067
  /**
5068
   * Finds position of first occurrence of a string within another, case insensitive.
5069
   *
5070
   * @link http://php.net/manual/en/function.mb-stripos.php
5071
   *
5072
   * @param string  $haystack  <p>
5073
   *                           The string from which to get the position of the first occurrence
5074
   *                           of needle
5075
   *                           </p>
5076
   * @param string  $needle    <p>
5077
   *                           The string to find in haystack
5078
   *                           </p>
5079
   * @param int     $offset    [optional] <p>
5080
   *                           The position in haystack
5081
   *                           to start searching
5082
   *                           </p>
5083
   * @param string  $encoding
5084
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5085
   *
5086
   * @return int Return the numeric position of the first occurrence of
5087
   * needle in the haystack
5088
   * string, or false if needle is not found.
5089
   */
5090
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5091
  {
5092
    $haystack = (string)$haystack;
5093
    $needle = (string)$needle;
5094
5095
    if (!isset($haystack[0], $needle[0])) {
5096
      return false;
5097
    }
5098
5099
    // init
5100
    self::checkForSupport();
5101
5102
    if ($cleanUtf8 === true) {
5103 11
      $haystack = self::clean($haystack);
5104
      $needle = self::clean($needle);
5105 11
    }
5106 11
5107
    // INFO: this is only a fallback for old versions
5108 11
    if ($encoding === true || $encoding === false) {
5109 2
      $encoding = 'UTF-8';
5110
    }
5111
5112
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5113 10
  }
5114 10
5115
  /**
5116
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5117
   *
5118 10
   * @param string $str
5119
   * @param string $needle
5120
   * @param bool   $before_needle
5121
   *
5122 10
   * @return false|string
5123
   */
5124
  public static function stristr($str, $needle, $before_needle = false)
5125
  {
5126 1
    if ('' === $needle .= '') {
5127 1
      return false;
5128 1
    }
5129
5130 10
    // init
5131
    self::checkForSupport();
5132
5133 10
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5134 1
  }
5135 1
5136
  /**
5137 10
   * Get the string length, not the byte-length!
5138
   *
5139
   * @link     http://php.net/manual/en/function.mb-strlen.php
5140
   *
5141
   * @param string  $str       The string being checked for length.
5142
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5143
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5144
   *
5145
   * @return int the number of characters in
5146
   *           string str having character encoding
5147
   *           encoding. A multi-byte character is
5148
   *           counted as 1.
5149
   */
5150
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5151
  {
5152
    $str = (string)$str;
5153
5154
    if (!isset($str[0])) {
5155
      return 0;
5156
    }
5157
5158
    // INFO: this is only a fallback for old versions
5159
    if ($encoding === true || $encoding === false) {
5160
      $encoding = 'UTF-8';
5161
    }
5162
5163
    $encoding = self::normalizeEncoding($encoding);
5164
5165
    switch ($encoding) {
5166
      case 'ASCII':
5167
      case 'CP850':
5168
        return strlen($str);
5169
    }
5170
5171
    self::checkForSupport();
5172
5173
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5174
      $str = self::clean($str);
5175
    }
5176
5177
    return \mb_strlen($str, $encoding);
5178
  }
5179
5180
  /**
5181
   * Case insensitive string comparisons using a "natural order" algorithm.
5182
   *
5183
   * @param string $str1
5184
   * @param string $str2
5185
   *
5186 1
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5187
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5188 1
   *             <strong>0</strong> if they are equal
5189
   */
5190 1
  public static function strnatcasecmp($str1, $str2)
5191
  {
5192
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5193
  }
5194
5195
  /**
5196
   * String comparisons using a "natural order" algorithm
5197
   *
5198
   * @link  http://php.net/manual/en/function.strnatcmp.php
5199
   *
5200 4
   * @param string $str1 <p>
5201
   *                     The first string.
5202 4
   *                     </p>
5203
   * @param string $str2 <p>
5204
   *                     The second string.
5205
   *                     </p>
5206
   *
5207
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5208
   * str1 is less than str2; &gt;
5209
   * 0 if str1 is greater than
5210
   * str2, and 0 if they are equal.
5211
   * @since 4.0
5212
   * @since 5.0
5213
   */
5214
  public static function strnatcmp($str1, $str2)
5215
  {
5216
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5217
  }
5218
5219
  /**
5220
   * Binary safe case-insensitive string comparison of the first n characters
5221
   *
5222
   * @link  http://php.net/manual/en/function.strncasecmp.php
5223
   *
5224
   * @param string $str1 <p>
5225
   *                     The first string.
5226
   *                     </p>
5227
   * @param string $str2 <p>
5228
   *                     The second string.
5229
   *                     </p>
5230
   * @param int    $len  <p>
5231
   *                     The length of strings to be used in the comparison.
5232
   *                     </p>
5233 1
   *
5234
   * @return int &lt; 0 if <i>str1</i> is less than
5235 1
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5236
   * greater than <i>str2</i>, and 0 if they are equal.
5237 1
   * @since 4.0.4
5238
   * @since 5.0
5239
   */
5240
  public static function strncasecmp($str1, $str2, $len)
5241
  {
5242
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5243
  }
5244
5245
  /**
5246
   * Binary safe string comparison of the first n characters
5247
   *
5248
   * @link  http://php.net/manual/en/function.strncmp.php
5249 1
   *
5250
   * @param string $str1 <p>
5251 1
   *                     The first string.
5252
   *                     </p>
5253
   * @param string $str2 <p>
5254
   *                     The second string.
5255
   *                     </p>
5256
   * @param int    $len  <p>
5257
   *                     Number of characters to use in the comparison.
5258
   *                     </p>
5259
   *
5260
   * @return int &lt; 0 if <i>str1</i> is less than
5261
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5262
   * is greater than <i>str2</i>, and 0 if they are
5263
   * equal.
5264
   * @since 4.0
5265
   * @since 5.0
5266
   */
5267
  public static function strncmp($str1, $str2, $len)
5268
  {
5269
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5270
  }
5271
5272
  /**
5273
   * Search a string for any of a set of characters
5274
   *
5275
   * @link  http://php.net/manual/en/function.strpbrk.php
5276 10
   *
5277
   * @param string $haystack  <p>
5278 10
   *                          The string where char_list is looked for.
5279 10
   *                          </p>
5280
   * @param string $char_list <p>
5281 10
   *                          This parameter is case sensitive.
5282 2
   *                          </p>
5283
   *
5284
   * @return string a string starting from the character found, or false if it is
5285
   * not found.
5286 9
   * @since 5.0
5287
   */
5288 9
  public static function strpbrk($haystack, $char_list)
5289
  {
5290
    $haystack = (string)$haystack;
5291
    $char_list = (string)$char_list;
5292 9
5293 9
    if (!isset($haystack[0], $char_list[0])) {
5294
      return false;
5295 9
    }
5296
5297
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5298 1
      return substr($haystack, strpos($haystack, $m[0]));
5299 1
    } else {
5300 1
      return false;
5301
    }
5302 9
  }
5303 9
5304
  /**
5305
   * Find position of first occurrence of string in a string.
5306
   *
5307
   * @link http://php.net/manual/en/function.mb-strpos.php
5308
   *
5309
   * @param string  $haystack     <p>
5310
   *                              The string being checked.
5311
   *                              </p>
5312
   * @param string  $needle       <p>
5313
   *                              The position counted from the beginning of haystack.
5314
   *                              </p>
5315
   * @param int     $offset       [optional] <p>
5316
   *                              The search offset. If it is not specified, 0 is used.
5317
   *                              </p>
5318
   * @param string  $encoding
5319
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5320
   *
5321
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5322
   *             If needle is not found it returns false.
5323
   */
5324
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5325
  {
5326
    $haystack = (string)$haystack;
5327
    $needle = (string)$needle;
5328
5329
    if (!isset($haystack[0], $needle[0])) {
5330
      return false;
5331
    }
5332
5333
    // init
5334
    self::checkForSupport();
5335
    $offset = (int)$offset;
5336
5337
    // iconv and mbstring do not support integer $needle
5338
5339 6
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5340
      $needle = self::chr($needle);
5341 6
    }
5342
5343
    if ($cleanUtf8 === true) {
5344
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5345 6
      // iconv_strpos is not tolerant to invalid characters
5346
5347
      $needle = self::clean((string)$needle);
5348
      $haystack = self::clean($haystack);
5349
    }
5350
5351
    if (self::$support['mbstring'] === true) {
5352
5353
      // INFO: this is only a fallback for old versions
5354
      if ($encoding === true || $encoding === false) {
5355
        $encoding = 'UTF-8';
5356
      }
5357
5358
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5359
    }
5360
5361
    if (self::$support['iconv'] === true) {
5362
      // ignore invalid negative offset to keep compatility
5363
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5364
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5365
    }
5366 1
5367
    if ($offset > 0) {
5368 1
      $haystack = self::substr($haystack, $offset);
5369
    }
5370 1
5371 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5372
      $left = substr($haystack, 0, $pos);
5373
5374
      // negative offset not supported in PHP strpos(), ignoring
5375
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5376
    }
5377
5378
    return false;
5379
  }
5380
5381
  /**
5382
   * Finds the last occurrence of a character in a string within another.
5383 10
   *
5384
   * @link http://php.net/manual/en/function.mb-strrchr.php
5385 10
   *
5386 10
   * @param string $haystack <p>
5387 10
   *                         The string from which to get the last occurrence
5388
   *                         of needle
5389 10
   *                         </p>
5390 1
   * @param string $needle   <p>
5391 1
   *                         The string to find in haystack
5392 1
   *                         </p>
5393
   * @param bool   $part     [optional] <p>
5394 10
   *                         Determines which portion of haystack
5395
   *                         this function returns.
5396 10
   *                         If set to true, it returns all of haystack
5397
   *                         from the beginning to the last occurrence of needle.
5398 10
   *                         If set to false, it returns all of haystack
5399 1
   *                         from the last occurrence of needle to the end,
5400 1
   *                         </p>
5401
   * @param string $encoding [optional] <p>
5402
   *                         Character encoding name to use.
5403 10
   *                         If it is omitted, internal character encoding is used.
5404 10
   *                         </p>
5405
   *
5406 10
   * @return string the portion of haystack.
5407
   * or false if needle is not found.
5408 10
   */
5409
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5410
  {
5411
    self::checkForSupport();
5412
5413
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5414
  }
5415
5416
  /**
5417
   * Reverses characters order in the string.
5418
   *
5419
   * @param    string $str The input string
5420
   *
5421
   * @return   string The string with characters in the reverse sequence
5422
   */
5423
  public static function strrev($str)
5424 20
  {
5425
    return implode(array_reverse(self::split($str)));
5426 20
  }
5427
5428 20
  /**
5429 5
   * Finds the last occurrence of a character in a string within another, case insensitive.
5430
   *
5431
   * @link http://php.net/manual/en/function.mb-strrichr.php
5432
   *
5433 18
   * @param string $haystack <p>
5434
   *                         The string from which to get the last occurrence
5435 18
   *                         of needle
5436
   *                         </p>
5437
   * @param string $needle   <p>
5438
   *                         The string to find in haystack
5439
   *                         </p>
5440
   * @param bool   $part     [optional] <p>
5441
   *                         Determines which portion of haystack
5442
   *                         this function returns.
5443
   *                         If set to true, it returns all of haystack
5444
   *                         from the beginning to the last occurrence of needle.
5445 3
   *                         If set to false, it returns all of haystack
5446
   *                         from the last occurrence of needle to the end,
5447 3
   *                         </p>
5448
   * @param string $encoding [optional] <p>
5449
   *                         Character encoding name to use.
5450
   *                         If it is omitted, internal character encoding is used.
5451
   *                         </p>
5452
   *
5453
   * @return string the portion of haystack.
5454
   * or false if needle is not found.
5455
   */
5456
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5457
  {
5458
    self::checkForSupport();
5459
5460
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5461
  }
5462 16
5463
  /**
5464 16
   * Find position of last occurrence of a case-insensitive string.
5465
   *
5466 16
   * @param    string $haystack The string to look in
5467 4
   * @param    string $needle   The string to look for
5468
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5469
   *
5470
   * @return   int The position of offset
5471 15
   */
5472
  public static function strripos($haystack, $needle, $offset = 0)
5473 15
  {
5474 15
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5475
  }
5476
5477
  /**
5478
   * Find position of last occurrence of a string in a string.
5479
   *
5480
   * @link http://php.net/manual/en/function.mb-strrpos.php
5481
   *
5482
   * @param string     $haystack  <p>
5483
   *                              The string being checked, for the last occurrence
5484
   *                              of needle
5485
   *                              </p>
5486
   * @param string|int $needle    <p>
5487
   *                              The string to find in haystack.
5488
   *                              Or a code point as int.
5489
   *                              </p>
5490
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5491
   *                              the string. Negative values will stop searching at an arbitrary point
5492
   *                              prior to the end of the string.
5493
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5494
   *
5495
   * @return int the numeric position of
5496
   * the last occurrence of needle in the
5497
   * haystack string. If
5498
   * needle is not found, it returns false.
5499
   */
5500
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5501
  {
5502
    $haystack = (string)$haystack;
5503 1
5504
    if (((int)$needle) === $needle && ($needle >= 0)) {
5505 1
      $needle = self::chr($needle);
5506
    }
5507
5508
    $needle = (string)$needle;
5509
5510
    if (!isset($haystack[0], $needle[0])) {
5511
      return false;
5512
    }
5513
5514
    // init
5515
    self::checkForSupport();
5516
5517
    $needle = (string)$needle;
5518
    $offset = (int)$offset;
5519
5520 1
    if ($cleanUtf8 === true) {
5521
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5522
5523
      $needle = self::clean($needle);
5524
      $haystack = self::clean($haystack);
5525
    }
5526
5527
    if (self::$support['mbstring'] === true) {
5528
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5529
    }
5530 1
5531
    if (self::$support['iconv'] === true) {
5532
      return \grapheme_strrpos($haystack, $needle, $offset);
5533 1
    }
5534
5535 1
    // fallback
5536
5537
    if ($offset > 0) {
5538
      $haystack = self::substr($haystack, $offset);
5539
    } elseif ($offset < 0) {
5540
      $haystack = self::substr($haystack, 0, $offset);
5541
    }
5542
5543 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5544
      $left = substr($haystack, 0, $pos);
5545
5546
      // negative offset not supported in PHP strpos(), ignoring
5547
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5548
    }
5549
5550
    return false;
5551
  }
5552
5553
  /**
5554
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5555
   * mask.
5556
   *
5557
   * @param string $str
5558 39
   * @param string $mask
5559
   * @param int    $offset
5560 39
   * @param int    $length
5561
   *
5562 39
   * @return int|null
5563 9
   */
5564
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5565
  {
5566
    if ($offset || 2147483647 !== $length) {
5567 37
      $str = self::substr($str, $offset, $length);
5568
    }
5569 37
5570
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5571
  }
5572
5573 1
  /**
5574 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5575
   *
5576 37
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5577 22
   *
5578 22
   * @param string $haystack      <p>
5579 33
   *                              The input string. Must be valid UTF-8.
5580
   *                              </p>
5581
   * @param string $needle        <p>
5582 37
   *                              The string to look for. Must be valid UTF-8.
5583
   *                              </p>
5584
   * @param bool   $before_needle [optional] <p>
5585 37
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5586 1
   *                              haystack before the first occurrence of the needle (excluding the needle).
5587 1
   *                              </p>
5588
   *
5589 37
   * @return string the portion of string, or FALSE if needle is not found.
5590
   */
5591
  public static function strstr($haystack, $needle, $before_needle = false)
5592
  {
5593
    self::checkForSupport();
5594
5595
    return \grapheme_strstr($haystack, $needle, $before_needle);
5596
  }
5597
5598
  /**
5599
   * Unicode transformation for case-less matching.
5600
   *
5601
   * @link http://unicode.org/reports/tr21/tr21-5.html
5602
   *
5603
   * @param string $str
5604
   * @param bool   $full
5605
   *
5606
   * @return string
5607
   */
5608
  public static function strtocasefold($str, $full = true)
5609
  {
5610
    static $fullCaseFold = null;
5611
    static $commonCaseFoldKeys = null;
5612
    static $commonCaseFoldValues = null;
5613
5614
    if ($commonCaseFoldKeys === null) {
5615
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5616
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5617
    }
5618 1
5619
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5620 1
5621 1
    if ($full) {
5622
5623 1
      if ($fullCaseFold === null) {
5624
        $fullCaseFold = self::getData('caseFolding_full');
5625
      }
5626
5627
      /** @noinspection OffsetOperationsInspection */
5628
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5629
    }
5630
5631
    $str = self::clean($str);
5632
5633
    return self::strtolower($str);
5634
  }
5635
5636
  /**
5637
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5638
   * Make a string lowercase.
5639
   *
5640
   * @link http://php.net/manual/en/function.mb-strtolower.php
5641
   *
5642
   * @param string $str <p>
5643
   *                    The string being lowercased.
5644
   *                    </p>
5645
   * @param string $encoding
5646
   *
5647
   * @return string str with all alphabetic characters converted to lowercase.
5648
   */
5649
  public static function strtolower($str, $encoding = 'UTF-8')
5650
  {
5651
    $str = (string)$str;
5652
5653
    if (!isset($str[0])) {
5654
      return '';
5655
    }
5656
5657
    // init
5658
    self::checkForSupport();
5659
5660
    return \mb_strtolower($str, $encoding);
5661
  }
5662
5663
  /**
5664
   * Generic case sensitive transformation for collation matching.
5665 6
   *
5666
   * @param string $s
5667
   *
5668 6
   * @return string
5669 1
   */
5670
  protected static function strtonatfold($s)
5671
  {
5672 1
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5673 1
  }
5674 1
5675 1
  /**
5676
   * Make a string uppercase.
5677
   *
5678
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5679 1
   *
5680 1
   * @param string $str <p>
5681 1
   *                    The string being uppercased.
5682 1
   *                    </p>
5683 1
   * @param string $encoding
5684 1
   *
5685 1
   * @return string str with all alphabetic characters converted to uppercase.
5686 1
   */
5687
  public static function strtoupper($str, $encoding = 'UTF-8')
5688
  {
5689
    $str = (string)$str;
5690 1
5691 1
    if (!isset($str[0])) {
5692 1
      return '';
5693 1
    }
5694 1
5695 1
    // init
5696 1
    self::checkForSupport();
5697 1
5698
    if (self::$support['mbstring'] === true) {
5699
      return \mb_strtoupper($str, $encoding);
5700 1
    } else {
5701 1
5702 1
      // fallback
5703 1
5704
      static $caseTableKeys = null;
5705
      static $caseTableValues = null;
5706
5707 1
      if ($caseTableKeys === null) {
5708
        $caseTable = self::case_table();
5709 6
        $caseTableKeys = array_keys($caseTable);
5710 1
        $caseTableValues = array_values($caseTable);
5711 1
      }
5712 1
5713 1
      $str = self::clean($str);
5714
5715 1
      return str_replace($caseTableKeys, $caseTableValues, $str);
5716
    }
5717
  }
5718 6
5719 6
  /**
5720
   * Translate characters or replace sub-strings.
5721 6
   *
5722 4
   * @link  http://php.net/manual/en/function.strtr.php
5723
   *
5724 4
   * @param string       $str  <p>
5725 4
   *                           The string being translated.
5726
   *                           </p>
5727 6
   * @param string|array $from <p>
5728
   *                           The string replacing from.
5729 6
   *                           </p>
5730
   * @param string|array $to   <p>
5731
   *                           The string being translated to to.
5732
   *                           </p>
5733
   *
5734
   * @return string This function returns a copy of str,
5735
   * translating all occurrences of each character in
5736
   * from to the corresponding character in
5737
   * to.
5738
   * @since 4.0
5739
   * @since 5.0
5740 1
   */
5741
  public static function strtr($str, $from, $to = INF)
5742 1
  {
5743
    if (INF !== $to) {
5744 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5744 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5745 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5745 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5746
      $countFrom = count($from);
5747
      $countTo = count($to);
5748 1
5749
      if ($countFrom > $countTo) {
5750 1
        $from = array_slice($from, 0, $countTo);
5751 1
      } elseif ($countFrom < $countTo) {
5752
        $to = array_slice($to, 0, $countFrom);
5753 1
      }
5754
5755 1
      $from = array_combine($from, $to);
5756 1
    }
5757
5758 1
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5741 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5759
  }
5760 1
5761
  /**
5762 1
   * Return the width of a string.
5763
   *
5764 1
   * @param string $s
5765
   *
5766
   * @return int
5767
   */
5768
  public static function strwidth($s)
5769
  {
5770
    // init
5771
    self::checkForSupport();
5772
5773
    return \mb_strwidth($s, 'UTF-8');
5774
  }
5775 6
5776
  /**
5777 6
   * Get part of a string.
5778
   *
5779
   * @link http://php.net/manual/en/function.mb-substr.php
5780
   *
5781
   * @param string  $str       <p>
5782
   *                           The string being checked.
5783
   *                           </p>
5784
   * @param int     $start     <p>
5785
   *                           The first position used in str.
5786
   *                           </p>
5787
   * @param int     $length    [optional] <p>
5788
   *                           The maximum length of the returned string.
5789
   *                           </p>
5790
   * @param string  $encoding
5791
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5792
   *
5793
   * @return string mb_substr returns the portion of
5794
   * str specified by the start and length parameters.
5795
   */
5796
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5797
  {
5798
    $str = (string)$str;
5799
5800
    if (!isset($str[0])) {
5801
      return '';
5802
    }
5803
5804
    // init
5805
    self::checkForSupport();
5806
5807
    if ($cleanUtf8 === true) {
5808
      // iconv and mbstring are not tolerant to invalid encoding
5809
      // further, their behaviour is inconsistent with that of PHP's substr
5810
5811
      $str = self::clean($str);
5812 7
    }
5813
5814 7
    if ($length === null) {
5815
      $length = (int)self::strlen($str);
5816 7
    } else {
5817
      $length = (int)$length;
5818 7
    }
5819 2
5820
    if (self::$support['mbstring'] === true) {
5821
5822 6
      // INFO: this is only a fallback for old versions
5823
      if ($encoding === true || $encoding === false) {
5824 6
        $encoding = 'UTF-8';
5825 3
      }
5826
5827 3
      return \mb_substr($str, $start, $length, $encoding);
5828
    }
5829 3
5830
    if (self::$support['iconv'] === true) {
5831
      return (string)\grapheme_substr($str, $start, $length);
5832 3
    }
5833
5834 3
    // fallback
5835 3
5836
    // split to array, and remove invalid characters
5837
    $array = self::split($str);
5838 3
5839 3
    // extract relevant part, and join to make sting again
5840 3
    return implode(array_slice($array, $start, $length));
5841
  }
5842
5843
  /**
5844
   * Binary safe comparison of two strings from an offset, up to length characters.
5845
   *
5846
   * @param string  $main_str           The main string being compared.
5847
   * @param string  $str                The secondary string being compared.
5848
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5849
   *                                    end of the string.
5850
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5851
   *                                    the str compared to the length of main_str less the offset.
5852 3
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5853
   *
5854 1
   * @return int
5855 1
   */
5856 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5857
  {
5858 1
    $main_str = self::substr($main_str, $offset, $length);
5859 1
    $str = self::substr($str, 0, self::strlen($main_str));
5860 1
5861 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5862
  }
5863 1
5864
  /**
5865
   * Count the number of substring occurrences
5866 1
   *
5867
   * @link  http://php.net/manual/en/function.substr-count.php
5868
   *
5869 1
   * @param string $haystack <p>
5870
   *                         The string to search in
5871 3
   *                         </p>
5872 1
   * @param string $needle   <p>
5873 1
   *                         The substring to search for
5874
   *                         </p>
5875 3
   * @param int    $offset   [optional] <p>
5876 3
   *                         The offset where to start counting
5877
   *                         </p>
5878 3
   * @param int    $length   [optional] <p>
5879 3
   *                         The maximum length after the specified offset to search for the
5880
   *                         substring. It outputs a warning if the offset plus the length is
5881 6
   *                         greater than the haystack length.
5882
   *                         </p>
5883
   *
5884
   * @return int This functions returns an integer.
5885
   * @since 4.0
5886
   * @since 5.0
5887
   */
5888
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5889
  {
5890
    $haystack = (string)$haystack;
5891
    $needle = (string)$needle;
5892
5893
    if (!isset($haystack[0], $needle[0])) {
5894
      return 0;
5895
    }
5896
5897
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5898
      $offset = (int)$offset;
5899
      $length = (int)$length;
5900
5901
      $haystack = self::substr($haystack, $offset, $length);
5902
    }
5903 2
5904
    self::checkForSupport();
5905 2
5906
    return \mb_substr_count($haystack, $needle);
5907
  }
5908
5909
  /**
5910
   * Replace text within a portion of a string.
5911
   *
5912
   * source: https://gist.github.com/stemar/8287074
5913
   *
5914
   * @param string|array   $str
5915
   * @param string|array   $replacement
5916
   * @param int|array      $start
5917
   * @param null|int|array $length
5918
   *
5919
   * @return array|string
5920
   */
5921
  public static function substr_replace($str, $replacement, $start, $length = null)
5922
  {
5923
    if (is_array($str)) {
5924
      $num = count($str);
5925
5926
      // $replacement
5927
      if (is_array($replacement)) {
5928
        $replacement = array_slice($replacement, 0, $num);
5929 20
      } else {
5930
        $replacement = array_pad(array($replacement), $num, $replacement);
5931 20
      }
5932 2
5933
      // $start
5934 2 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5935 2
        $start = array_slice($start, 0, $num);
5936
        foreach ($start as &$valueTmp) {
5937 2
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5938
        }
5939
        unset($valueTmp);
5940 20
      } else {
5941
        $start = array_pad(array($start), $num, $start);
5942 20
      }
5943 9
5944
      // $length
5945
      if (!isset($length)) {
5946 20
        $length = array_fill(0, $num, 0);
5947 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5948 20
        $length = array_slice($length, 0, $num);
5949
        foreach ($length as &$valueTmpV2) {
5950 20
          if (isset($valueTmpV2)) {
5951 20
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5952
          } else {
5953 20
            $valueTmpV2 = 0;
5954 20
          }
5955 20
        }
5956 20
        unset($valueTmpV2);
5957
      } else {
5958 20
        $length = array_pad(array($length), $num, $length);
5959
      }
5960 18
5961 17
      // Recursive call
5962 17
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5963 17
    } else {
5964 5
      if (is_array($replacement)) {
5965 5
        if (count($replacement) > 0) {
5966 5
          $replacement = $replacement[0];
5967
        } else {
5968
          $replacement = '';
5969 20
        }
5970
      }
5971 18
    }
5972 14
5973 14
    preg_match_all('/./us', (string)$str, $smatches);
5974 14
    preg_match_all('/./us', (string)$replacement, $rmatches);
5975 8
5976 8
    if ($length === null) {
5977 8
      self::checkForSupport();
5978
5979
      $length = \mb_strlen($str);
5980 19
    }
5981
5982 9
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5983 3
5984 3
    return implode($smatches[0], null);
5985 3
  }
5986 6
5987 6
  /**
5988 6
   * Returns a case swapped version of the string.
5989
   *
5990
   * @param string $str
5991 9
   * @param string $encoding
5992 6
   *
5993 6
   * @return string each character's case swapped
5994 6
   */
5995
  public static function swapCase($str, $encoding = 'UTF-8')
5996
  {
5997 20
    $str = (string)$str;
5998
5999 2
    if (!isset($str[0])) {
6000 2
      return '';
6001
    }
6002
6003 2
    $str = self::clean($str);
6004 2
6005 2
    $strSwappedCase = preg_replace_callback(
6006
        '/[\S]/u',
6007
        function ($match) use ($encoding) {
6008 2
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6009 18
6010
          if ($match[0] === $marchToUpper) {
6011 20
            return UTF8::strtolower($match[0], $encoding);
6012
          } else {
6013 20
            return $marchToUpper;
6014
          }
6015
        },
6016 20
        $str
6017 20
    );
6018
6019 3
    return $strSwappedCase;
6020 20
  }
6021
6022 20
  /**
6023
   * alias for "UTF8::to_ascii()"
6024
   *
6025 20
   * @param string $s The input string e.g. a UTF-8 String
6026 20
   * @param string $subst_chr
6027 20
   *
6028 2
   * @return string
6029 20
   */
6030
  public static function toAscii($s, $subst_chr = '?')
6031 20
  {
6032
    return self::to_ascii($s, $subst_chr);
6033 20
  }
6034
6035
  /**
6036
   * alias for "UTF8::to_latin1()"
6037
   *
6038
   * @param $str
6039
   *
6040
   * @return string
6041
   */
6042
  public static function toLatin1($str)
6043 2
  {
6044
    return self::to_latin1($str);
6045 2
  }
6046
6047 1
  /**
6048
   * alias for "UTF8::to_utf8"
6049 1
   *
6050 1
   * @param string $str
6051
   *
6052 1
   * @return string
6053 2
   */
6054 2
  public static function toUTF8($str)
6055
  {
6056
    return self::to_utf8($str);
6057
  }
6058
6059
  /**
6060
   * convert to ASCII
6061
   *
6062
   * @param string $s The input string e.g. a UTF-8 String
6063
   * @param string $subst_chr
6064
   *
6065
   * @return string
6066
   */
6067
  public static function to_ascii($s, $subst_chr = '?')
6068
  {
6069
    static $translitExtra = null;
6070
6071
    $s = (string)$s;
6072
6073 26
    if (!isset($s[0])) {
6074
      return '';
6075 26
    }
6076
6077 26
    $s = self::clean($s);
6078 5
6079
    if (preg_match("/[\x80-\xFF]/", $s)) {
6080
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6081
6082 22
      $glibc = 'glibc' === ICONV_IMPL;
6083 6
6084
      preg_match_all('/./u', $s, $s);
6085
6086 16
      /** @noinspection AlterInForeachInspection */
6087
      foreach ($s[0] as &$c) {
6088
6089
        if (!isset($c[1])) {
6090
          continue;
6091
        }
6092
6093
        if ($glibc) {
6094
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6095
        } else {
6096 14
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6097
6098 14
          if ($t !== false && is_string($t)) {
6099
            if (!isset($t[0])) {
6100
              $t = '?';
6101
            } elseif (isset($t[1])) {
6102
              $t = ltrim($t, '\'`"^~');
6103
            }
6104
          }
6105
        }
6106
6107
        if ('?' === $t) {
6108
6109
          if ($translitExtra === null) {
6110
            $translitExtra = (array)self::getData('translit_extra');
6111
          }
6112
6113
          if (isset($translitExtra[$c])) {
6114
            $t = $translitExtra[$c];
6115
          } else {
6116
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6117
6118
            if ($t[0] < "\x80") {
6119
              $t = $t[0];
6120
            } else {
6121 8
              $t = $subst_chr;
6122
            }
6123 8
          }
6124 2
        }
6125
6126
        if ('?' === $t) {
6127
          $t = self::str_transliterate($c, $subst_chr);
6128 7
        }
6129 7
6130
        $c = $t;
6131 7
      }
6132 1
6133 1
      $s = implode('', $s[0]);
6134 7
    }
6135
6136
    return $s;
6137 7
  }
6138
6139 7
  /**
6140
   * alias for "UTF8::to_win1252()"
6141
   *
6142
   * @param   string $str
6143 1
   *
6144 1
   * @return  array|string
6145 1
   */
6146 7
  public static function to_iso8859($str)
6147 7
  {
6148 7
    return self::to_win1252($str);
6149 7
  }
6150 7
6151
  /**
6152 7
   * alias for "UTF8::to_win1252()"
6153
   *
6154
   * @param string|array $str
6155
   *
6156
   * @return string|array
6157
   */
6158
  public static function to_latin1($str)
6159
  {
6160
    return self::to_win1252($str);
6161
  }
6162
6163
  /**
6164
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6165
   *
6166
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6167
   *
6168
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6169
   *
6170
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6171
   *    are followed by any of these:  ("group B")
6172 1
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6173
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6174 1
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6175
   * is also a valid unicode character, and will be left unchanged.
6176 1
   *
6177 1
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6178
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6179
   *
6180 1
   * @param string|array $str Any string or array.
6181
   *
6182 1
   * @return string The same string, but UTF8 encoded.
6183
   */
6184 1
  public static function to_utf8($str)
6185 1
  {
6186 1
    if (is_array($str)) {
6187 1
      foreach ($str as $k => $v) {
6188
        /** @noinspection AlterInForeachInspection */
6189 1
        $str[$k] = self::to_utf8($v);
6190 1
      }
6191 1
6192
      return $str;
6193 1
    }
6194
6195
    $str = (string)$str;
6196
6197
    if (!isset($str[0])) {
6198
      return $str;
6199
    }
6200
6201
    $max = strlen($str);
6202
    $buf = '';
6203
6204
    /** @noinspection ForeachInvariantsInspection */
6205
    for ($i = 0; $i < $max; $i++) {
6206
      $c1 = $str[$i];
6207
6208
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6209
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6210
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6211
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6212
6213
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6214
6215
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6216
            $buf .= $c1 . $c2;
6217
            $i++;
6218
          } else { // not valid UTF8 - convert it
6219
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6220
            $cc2 = ($c1 & "\x3f") | "\x80";
6221
            $buf .= $cc1 . $cc2;
6222
          }
6223
6224 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6225
6226
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6227
            $buf .= $c1 . $c2 . $c3;
6228
            $i += 2;
6229
          } else { // not valid UTF8 - convert it
6230
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6231
            $cc2 = ($c1 & "\x3f") | "\x80";
6232
            $buf .= $cc1 . $cc2;
6233
          }
6234
6235
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6236
6237 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6238
            $buf .= $c1 . $c2 . $c3 . $c4;
6239
            $i += 3;
6240
          } else { // not valid UTF8 - convert it
6241
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6242
            $cc2 = ($c1 & "\x3f") | "\x80";
6243
            $buf .= $cc1 . $cc2;
6244
          }
6245
6246
        } else { // doesn't look like UTF8, but should be converted
6247
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6248
          $cc2 = (($c1 & "\x3f") | "\x80");
6249
          $buf .= $cc1 . $cc2;
6250
        }
6251
6252
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6253
6254
        $ordC1 = ord($c1);
6255
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6256
          $buf .= self::$win1252ToUtf8[$ordC1];
6257
        } else {
6258
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6259
          $cc2 = (($c1 & "\x3f") | "\x80");
6260
          $buf .= $cc1 . $cc2;
6261
        }
6262
6263
      } else { // it doesn't need conversion
6264
        $buf .= $c1;
6265
      }
6266
    }
6267
6268
    self::checkForSupport();
6269
6270
    // decode unicode escape sequences
6271
    $buf = preg_replace_callback(
6272
        '/\\\\u([0-9a-f]{4})/i',
6273
        function ($match) {
6274
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6275
        },
6276
        $buf
6277
    );
6278
6279
    // decode UTF-8 codepoints
6280
    $buf = preg_replace_callback(
6281
        '/&#\d{2,4};/',
6282
        function ($match) {
6283
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6284
        },
6285
        $buf
6286
    );
6287
6288
    return $buf;
6289
  }
6290
6291
  /**
6292
   * Convert a string into "win1252"-encoding.
6293
   *
6294
   * @param  string|array $str
6295
   *
6296
   * @return string|array
6297
   */
6298
  protected static function to_win1252($str)
6299
  {
6300
    if (is_array($str)) {
6301
6302
      foreach ($str as $k => $v) {
6303
        /** @noinspection AlterInForeachInspection */
6304
        $str[$k] = self::to_win1252($v);
6305
      }
6306
6307
      return $str;
6308
    }
6309
6310
    $str = (string)$str;
6311
6312
    if (!isset($str[0])) {
6313
      return '';
6314
    }
6315
6316
    return self::utf8_decode($str);
6317
  }
6318
6319
  /**
6320
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6321
   *
6322
   * INFO: This is slower then "trim()"
6323
   *
6324
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
6325
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6326
   *
6327
   * @param    string $str   The string to be trimmed
6328
   * @param    string $chars Optional characters to be stripped
6329
   *
6330
   * @return   string The trimmed string
6331
   */
6332
  public static function trim($str = '', $chars = INF)
6333
  {
6334
    $str = (string)$str;
6335
6336
    if (!isset($str[0])) {
6337
      return '';
6338
    }
6339
6340
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6341
    if ($chars === INF || !$chars) {
6342
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6343
    }
6344
6345
    return self::rtrim(self::ltrim($str, $chars), $chars);
6346
  }
6347
6348
  /**
6349
   * Makes string's first char uppercase.
6350
   *
6351
   * @param    string $str The input string
6352
   *
6353
   * @return   string The resulting string
6354
   */
6355
  public static function ucfirst($str)
6356
  {
6357
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6358
  }
6359
6360
  /**
6361
   * alias for "UTF8::ucfirst"
6362
   *
6363
   * @param $str
6364
   *
6365
   * @return string
6366
   */
6367
  public static function ucword($str)
6368
  {
6369
    return self::ucfirst($str);
6370
  }
6371
6372
  /**
6373
   * Uppercase for all words in the string.
6374
   *
6375
   * @param  string $str
6376
   * @param array   $exceptions
6377
   *
6378
   * @return string
6379
   */
6380
  public static function ucwords($str, $exceptions = array())
6381
  {
6382
    if (!$str) {
6383
      return '';
6384
    }
6385
6386
    // init
6387
    $words = explode(' ', $str);
6388
    $newwords = array();
6389
6390
    if (count($exceptions) > 0) {
6391
      $useExceptions = true;
6392
    } else {
6393
      $useExceptions = false;
6394
    }
6395
6396
    foreach ($words as $word) {
6397
      if (
6398
          ($useExceptions === false)
6399
          ||
6400
          (
6401
              $useExceptions === true
6402
              &&
6403
              !in_array($word, $exceptions, true)
6404
          )
6405
      ) {
6406
        $word = self::ucfirst($word);
6407
      }
6408
      $newwords[] = $word;
6409
    }
6410
6411
    return self::ucfirst(implode(' ', $newwords));
6412
  }
6413
6414
  /**
6415
   * Multi decode html entity & fix urlencoded-win1252-chars.
6416
   *
6417
   * e.g:
6418
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6419
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6420
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6421
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6422
   * 'Düsseldorf'                   => 'Düsseldorf'
6423
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6424
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6425
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6426
   *
6427
   * @param string $str
6428
   *
6429
   * @return string
6430
   */
6431
  public static function urldecode($str)
6432
  {
6433
    $str = (string)$str;
6434
6435
    if (!isset($str[0])) {
6436
      return '';
6437
    }
6438
6439
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6440 6
6441
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6442 6
6443 6
    $str = self::fix_simple_utf8(
6444
        rawurldecode(
6445 6
            self::html_entity_decode(
6446
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6447 6
                $flags
6448 5
            )
6449
        )
6450
    );
6451
6452 6
    return (string)$str;
6453
  }
6454 6
6455
  /**
6456 6
   * Return a array with "urlencoded"-win1252 -> UTF-8
6457 1
   *
6458 1
   * @return mixed
6459 1
   */
6460
  public static function urldecode_fix_win1252_chars()
6461 6
  {
6462
    static $array = array(
6463
        '%20' => ' ',
6464
        '%21' => '!',
6465
        '%22' => '"',
6466
        '%23' => '#',
6467
        '%24' => '$',
6468
        '%25' => '%',
6469
        '%26' => '&',
6470
        '%27' => "'",
6471 6
        '%28' => '(',
6472
        '%29' => ')',
6473 6
        '%2A' => '*',
6474
        '%2B' => '+',
6475 6
        '%2C' => ',',
6476 6
        '%2D' => '-',
6477
        '%2E' => '.',
6478
        '%2F' => '/',
6479 5
        '%30' => '0',
6480 5
        '%31' => '1',
6481
        '%32' => '2',
6482 5
        '%33' => '3',
6483 1
        '%34' => '4',
6484 1
        '%35' => '5',
6485 1
        '%36' => '6',
6486
        '%37' => '7',
6487 5
        '%38' => '8',
6488
        '%39' => '9',
6489
        '%3A' => ':',
6490
        '%3B' => ';',
6491
        '%3C' => '<',
6492
        '%3D' => '=',
6493
        '%3E' => '>',
6494
        '%3F' => '?',
6495
        '%40' => '@',
6496
        '%41' => 'A',
6497
        '%42' => 'B',
6498
        '%43' => 'C',
6499
        '%44' => 'D',
6500
        '%45' => 'E',
6501
        '%46' => 'F',
6502
        '%47' => 'G',
6503
        '%48' => 'H',
6504
        '%49' => 'I',
6505
        '%4A' => 'J',
6506
        '%4B' => 'K',
6507
        '%4C' => 'L',
6508
        '%4D' => 'M',
6509
        '%4E' => 'N',
6510
        '%4F' => 'O',
6511
        '%50' => 'P',
6512
        '%51' => 'Q',
6513
        '%52' => 'R',
6514
        '%53' => 'S',
6515
        '%54' => 'T',
6516
        '%55' => 'U',
6517
        '%56' => 'V',
6518
        '%57' => 'W',
6519 1
        '%58' => 'X',
6520
        '%59' => 'Y',
6521 1
        '%5A' => 'Z',
6522
        '%5B' => '[',
6523
        '%5C' => '\\',
6524
        '%5D' => ']',
6525
        '%5E' => '^',
6526
        '%5F' => '_',
6527
        '%60' => '`',
6528
        '%61' => 'a',
6529
        '%62' => 'b',
6530
        '%63' => 'c',
6531
        '%64' => 'd',
6532
        '%65' => 'e',
6533 1
        '%66' => 'f',
6534
        '%67' => 'g',
6535 1
        '%68' => 'h',
6536
        '%69' => 'i',
6537
        '%6A' => 'j',
6538
        '%6B' => 'k',
6539 1
        '%6C' => 'l',
6540
        '%6D' => 'm',
6541 1
        '%6E' => 'n',
6542
        '%6F' => 'o',
6543
        '%70' => 'p',
6544 1
        '%71' => 'q',
6545 1
        '%72' => 'r',
6546 1
        '%73' => 's',
6547 1
        '%74' => 't',
6548 1
        '%75' => 'u',
6549
        '%76' => 'v',
6550
        '%77' => 'w',
6551 1
        '%78' => 'x',
6552
        '%79' => 'y',
6553
        '%7A' => 'z',
6554
        '%7B' => '{',
6555
        '%7C' => '|',
6556
        '%7D' => '}',
6557
        '%7E' => '~',
6558
        '%7F' => '',
6559
        '%80' => '`',
6560
        '%81' => '',
6561
        '%82' => '‚',
6562
        '%83' => 'ƒ',
6563
        '%84' => '„',
6564 4
        '%85' => '…',
6565
        '%86' => '†',
6566 4
        '%87' => '‡',
6567
        '%88' => 'ˆ',
6568
        '%89' => '‰',
6569
        '%8A' => 'Š',
6570 4
        '%8B' => '‹',
6571 4
        '%8C' => 'Œ',
6572 4
        '%8D' => '',
6573
        '%8E' => 'Ž',
6574 4
        '%8F' => '',
6575 4
        '%90' => '',
6576 4
        '%91' => '‘',
6577 4
        '%92' => '’',
6578
        '%93' => '“',
6579 4
        '%94' => '”',
6580
        '%95' => '•',
6581
        '%96' => '–',
6582
        '%97' => '—',
6583
        '%98' => '˜',
6584 4
        '%99' => '™',
6585
        '%9A' => 'š',
6586 4
        '%9B' => '›',
6587
        '%9C' => 'œ',
6588
        '%9D' => '',
6589
        '%9E' => 'ž',
6590
        '%9F' => 'Ÿ',
6591 4
        '%A0' => '',
6592 4
        '%A1' => '¡',
6593
        '%A2' => '¢',
6594 4
        '%A3' => '£',
6595 4
        '%A4' => '¤',
6596 4
        '%A5' => '¥',
6597 4
        '%A6' => '¦',
6598 4
        '%A7' => '§',
6599
        '%A8' => '¨',
6600 4
        '%A9' => '©',
6601 4
        '%AA' => 'ª',
6602 4
        '%AB' => '«',
6603 4
        '%AC' => '¬',
6604
        '%AD' => '',
6605 4
        '%AE' => '®',
6606 3
        '%AF' => '¯',
6607 3
        '%B0' => '°',
6608 3
        '%B1' => '±',
6609 3
        '%B2' => '²',
6610
        '%B3' => '³',
6611 3
        '%B4' => '´',
6612
        '%B5' => 'µ',
6613
        '%B6' => '¶',
6614
        '%B7' => '·',
6615 3
        '%B8' => '¸',
6616 3
        '%B9' => '¹',
6617
        '%BA' => 'º',
6618 4
        '%BB' => '»',
6619
        '%BC' => '¼',
6620
        '%BD' => '½',
6621
        '%BE' => '¾',
6622
        '%BF' => '¿',
6623
        '%C0' => 'À',
6624
        '%C1' => 'Á',
6625
        '%C2' => 'Â',
6626
        '%C3' => 'Ã',
6627
        '%C4' => 'Ä',
6628
        '%C5' => 'Å',
6629
        '%C6' => 'Æ',
6630
        '%C7' => 'Ç',
6631
        '%C8' => 'È',
6632
        '%C9' => 'É',
6633
        '%CA' => 'Ê',
6634
        '%CB' => 'Ë',
6635
        '%CC' => 'Ì',
6636
        '%CD' => 'Í',
6637
        '%CE' => 'Î',
6638
        '%CF' => 'Ï',
6639
        '%D0' => 'Ð',
6640
        '%D1' => 'Ñ',
6641
        '%D2' => 'Ò',
6642
        '%D3' => 'Ó',
6643
        '%D4' => 'Ô',
6644
        '%D5' => 'Õ',
6645
        '%D6' => 'Ö',
6646
        '%D7' => '×',
6647
        '%D8' => 'Ø',
6648
        '%D9' => 'Ù',
6649
        '%DA' => 'Ú',
6650
        '%DB' => 'Û',
6651
        '%DC' => 'Ü',
6652
        '%DD' => 'Ý',
6653
        '%DE' => 'Þ',
6654
        '%DF' => 'ß',
6655
        '%E0' => 'à',
6656
        '%E1' => 'á',
6657
        '%E2' => 'â',
6658
        '%E3' => 'ã',
6659
        '%E4' => 'ä',
6660
        '%E5' => 'å',
6661
        '%E6' => 'æ',
6662
        '%E7' => 'ç',
6663
        '%E8' => 'è',
6664
        '%E9' => 'é',
6665
        '%EA' => 'ê',
6666
        '%EB' => 'ë',
6667
        '%EC' => 'ì',
6668
        '%ED' => 'í',
6669
        '%EE' => 'î',
6670
        '%EF' => 'ï',
6671
        '%F0' => 'ð',
6672
        '%F1' => 'ñ',
6673
        '%F2' => 'ò',
6674
        '%F3' => 'ó',
6675
        '%F4' => 'ô',
6676
        '%F5' => 'õ',
6677
        '%F6' => 'ö',
6678
        '%F7' => '÷',
6679
        '%F8' => 'ø',
6680
        '%F9' => 'ù',
6681
        '%FA' => 'ú',
6682
        '%FB' => 'û',
6683
        '%FC' => 'ü',
6684
        '%FD' => 'ý',
6685
        '%FE' => 'þ',
6686
        '%FF' => 'ÿ',
6687
    );
6688
6689
    return $array;
6690
  }
6691
6692
  /**
6693
   * Decodes an UTF-8 string to ISO-8859-1.
6694
   *
6695
   * @param string $str
6696
   *
6697
   * @return string
6698
   */
6699
  public static function utf8_decode($str)
6700
  {
6701
    static $utf8ToWin1252Keys = null;
6702
    static $utf8ToWin1252Values = null;
6703
6704
    $str = (string)$str;
6705
6706
    if (!isset($str[0])) {
6707
      return '';
6708
    }
6709
6710
    // init
6711
    self::checkForSupport();
6712
6713
    $str = self::to_utf8($str);
6714
6715
    if ($utf8ToWin1252Keys === null) {
6716
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6717
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6718
    }
6719
6720
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6721
  }
6722
6723
  /**
6724
   * Encodes an ISO-8859-1 string to UTF-8.
6725
   *
6726
   * @param string $str
6727
   *
6728
   * @return string
6729
   */
6730
  public static function utf8_encode($str)
6731
  {
6732
    $str = \utf8_encode($str);
6733
6734
    if (false === strpos($str, "\xC2")) {
6735
      return $str;
6736
    } else {
6737
6738
      static $cp1252ToUtf8Keys = null;
6739
      static $cp1252ToUtf8Values = null;
6740
6741
      if ($cp1252ToUtf8Keys === null) {
6742
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6743
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6744
      }
6745
6746
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6747
    }
6748
  }
6749
6750
  /**
6751
   * fix -> utf8-win1252 chars
6752
   *
6753
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6754
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6755
   * See: http://en.wikipedia.org/wiki/Windows-1252
6756
   *
6757
   * @deprecated use "UTF8::fix_simple_utf8()"
6758
   *
6759
   * @param   string $str
6760
   *
6761
   * @return  string
6762
   */
6763
  public static function utf8_fix_win1252_chars($str)
6764
  {
6765
    return self::fix_simple_utf8($str);
6766
  }
6767
6768
  /**
6769
   * Returns an array with all utf8 whitespace characters.
6770
   *
6771
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6772
   *
6773
   * @author: Derek E. [email protected]
6774
   *
6775
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6776
   *         as defined in above URL
6777
   */
6778
  public static function whitespace_table()
6779
  {
6780
    return self::$whitespaceTable;
6781
  }
6782
6783
  /**
6784
   * Limit the number of words in a string.
6785
   *
6786
   * @param  string $str
6787
   * @param  int    $words
6788
   * @param  string $strAddOn
6789
   *
6790
   * @return string
6791
   */
6792
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6793
  {
6794
    $str = (string)$str;
6795
6796
    if (!isset($str[0])) {
6797
      return '';
6798
    }
6799
6800
    $words = (int)$words;
6801
6802
    if ($words < 1) {
6803
      return '';
6804
    }
6805
6806
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6807
6808
    if (
6809
        !isset($matches[0])
6810
        ||
6811
        self::strlen($str) === self::strlen($matches[0])
6812
    ) {
6813
      return $str;
6814
    }
6815
6816
    return self::rtrim($matches[0]) . $strAddOn;
6817
  }
6818
6819
  /**
6820
   * Wraps a string to a given number of characters
6821
   *
6822
   * @link  http://php.net/manual/en/function.wordwrap.php
6823
   *
6824
   * @param string $str   <p>
6825
   *                      The input string.
6826
   *                      </p>
6827
   * @param int    $width [optional] <p>
6828
   *                      The column width.
6829
   *                      </p>
6830
   * @param string $break [optional] <p>
6831
   *                      The line is broken using the optional
6832
   *                      break parameter.
6833
   *                      </p>
6834
   * @param bool   $cut   [optional] <p>
6835
   *                      If the cut is set to true, the string is
6836
   *                      always wrapped at or before the specified width. So if you have
6837
   *                      a word that is larger than the given width, it is broken apart.
6838
   *                      (See second example).
6839
   *                      </p>
6840
   *
6841
   * @return string the given string wrapped at the specified column.
6842
   * @since 4.0.2
6843
   * @since 5.0
6844
   */
6845
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6846
  {
6847
    $str = (string)$str;
6848
    $break = (string)$break;
6849
6850
    if (!isset($str[0], $break[0])) {
6851
      return '';
6852
    }
6853
6854
    $w = '';
6855
    $strSplit = explode($break, $str);
6856
    $count = count($strSplit);
6857
6858
    if (1 === $count && '' === $strSplit[0]) {
6859
      return '';
6860
    }
6861
6862
    $chars = array();
6863
    /** @noinspection ForeachInvariantsInspection */
6864
    for ($i = 0; $i < $count; ++$i) {
6865
6866
      if ($i) {
6867
        $chars[] = $break;
6868
        $w .= '#';
6869
      }
6870
6871
      $c = $strSplit[$i];
6872
      unset($strSplit[$i]);
6873
6874
      foreach (self::split($c) as $c) {
6875
        $chars[] = $c;
6876
        $w .= ' ' === $c ? ' ' : '?';
6877
      }
6878
    }
6879
6880
    $strReturn = '';
6881
    $j = 0;
6882
    $b = $i = -1;
6883
    $w = wordwrap($w, $width, '#', $cut);
6884
6885
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6886
      for (++$i; $i < $b; ++$i) {
6887
        $strReturn .= $chars[$j];
6888
        unset($chars[$j++]);
6889
      }
6890
6891
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6892
        unset($chars[$j++]);
6893
      }
6894
6895
      $strReturn .= $break;
6896
    }
6897
6898
    return $strReturn . implode('', $chars);
6899
  }
6900
6901
  /**
6902
   * Returns an array of Unicode White Space characters.
6903
   *
6904
   * @return   array An array with numeric code point as key and White Space Character as value.
6905
   */
6906
  public static function ws()
6907
  {
6908
    return self::$whitespace;
6909
  }
6910
6911
}
6912