Completed
Push — master ( d6ace5...c7d356 )
by Lars
04:09
created

UTF8::removeBOM()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 10
Bugs 4 Features 2
Metric Value
c 10
b 4
f 2
dl 0
loc 10
ccs 0
cts 5
cp 0
rs 9.4285
cc 3
eloc 5
nc 3
nop 1
crap 12
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  protected static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  protected static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  protected static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  protected static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  protected static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  protected static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  protected static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  protected static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790 1
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792 1
      'ISO-IR-230',
793 1
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803 1
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809
    self::checkForSupport();
810
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820
  public static function access($str, $pos)
821
  {
822
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833 2
   */
834
  public static function add_bom_to_string($str)
835 2
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Returns the UTF-8 Byte Order Mark Character.
845
   *
846 1
   * @return string UTF-8 Byte Order Mark
847
   */
848 1
  public static function bom()
849
  {
850
    return "\xEF\xBB\xBF";
851
  }
852
853
  /**
854
   * @alias of UTF8::chr_map()
855
   *
856
   * @param string|array $callback
857
   * @param string       $str
858
   *
859
   * @return array
860
   */
861
  public static function callback($callback, $str)
862
  {
863
    return self::chr_map($callback, $str);
864
  }
865
866
  /**
867
   * Returns an array of all lower and upper case UTF-8 encoded characters.
868
   *
869
   * @return   string An array with lower case chars as keys and upper chars as values.
870
   */
871
  protected static function case_table()
872
  {
873
    static $case = array(
874
875
      // lower => upper
876
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
877
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
878
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
879
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
880
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
881
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
882
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
883
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
884
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
885
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
886
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
887
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
888
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
889
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
890
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
891
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
892
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
893
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
894
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
895
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
896
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
897
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
898
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
899
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
900
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
901
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
902
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
903
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
904
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
905
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
906
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
907
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
908
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
909
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
910
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
911
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
912
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
913
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
914
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
915
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
916
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
917
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
918
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
919
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
920
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
921
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
922
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
923
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
924
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
925
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
926
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
927
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
928
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
929
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
930
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
931
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
932
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
933
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
934
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
935
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
936
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
937
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
938
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
939
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
940
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
941
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
942
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
943
      "\xea\x9e\x87"     => "\xea\x9e\x86",
944
      "\xea\x9e\x85"     => "\xea\x9e\x84",
945
      "\xea\x9e\x83"     => "\xea\x9e\x82",
946
      "\xea\x9e\x81"     => "\xea\x9e\x80",
947
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
948
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
949
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
950
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
951
      "\xea\x9d\xad"     => "\xea\x9d\xac",
952
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
953
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
954
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
955
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
956
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
957
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
958
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
959
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
960
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
961
      "\xea\x9d\x99"     => "\xea\x9d\x98",
962
      "\xea\x9d\x97"     => "\xea\x9d\x96",
963
      "\xea\x9d\x95"     => "\xea\x9d\x94",
964
      "\xea\x9d\x93"     => "\xea\x9d\x92",
965
      "\xea\x9d\x91"     => "\xea\x9d\x90",
966
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
967
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
968
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
969
      "\xea\x9d\x89"     => "\xea\x9d\x88",
970
      "\xea\x9d\x87"     => "\xea\x9d\x86",
971
      "\xea\x9d\x85"     => "\xea\x9d\x84",
972
      "\xea\x9d\x83"     => "\xea\x9d\x82",
973
      "\xea\x9d\x81"     => "\xea\x9d\x80",
974
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
975
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
976
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
977
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
978
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
979
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
980
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
981
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
982
      "\xea\x9c\xad"     => "\xea\x9c\xac",
983
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
984
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
985
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
986
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
987
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
988
      "\xea\x9a\x97"     => "\xea\x9a\x96",
989
      "\xea\x9a\x95"     => "\xea\x9a\x94",
990
      "\xea\x9a\x93"     => "\xea\x9a\x92",
991
      "\xea\x9a\x91"     => "\xea\x9a\x90",
992
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
993
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
994
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
995
      "\xea\x9a\x89"     => "\xea\x9a\x88",
996
      "\xea\x9a\x87"     => "\xea\x9a\x86",
997
      "\xea\x9a\x85"     => "\xea\x9a\x84",
998
      "\xea\x9a\x83"     => "\xea\x9a\x82",
999
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1000
      "\xea\x99\xad"     => "\xea\x99\xac",
1001
      "\xea\x99\xab"     => "\xea\x99\xaa",
1002
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1003
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1004
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1005
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1006
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1007
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1008
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1009
      "\xea\x99\x99"     => "\xea\x99\x98",
1010
      "\xea\x99\x97"     => "\xea\x99\x96",
1011
      "\xea\x99\x95"     => "\xea\x99\x94",
1012
      "\xea\x99\x93"     => "\xea\x99\x92",
1013
      "\xea\x99\x91"     => "\xea\x99\x90",
1014
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1015
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1016
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1017
      "\xea\x99\x89"     => "\xea\x99\x88",
1018
      "\xea\x99\x87"     => "\xea\x99\x86",
1019
      "\xea\x99\x85"     => "\xea\x99\x84",
1020
      "\xea\x99\x83"     => "\xea\x99\x82",
1021
      "\xea\x99\x81"     => "\xea\x99\x80",
1022
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1023
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1024
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1025
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1026
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1027
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1028
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1029
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1030
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1031
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1032
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1033
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1034
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1035
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1036
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1037
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1038
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1039
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1040
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1041
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1042
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1043
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1044
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1045
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1046
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1047
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1048
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1049
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1050
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1051
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1052
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1053
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1054
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1055
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1056
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1057
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1058
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1059
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1060
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1061
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1062
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1063
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1064
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1065
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1066
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1067
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1068
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1069
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1070
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1071
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1072
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1073
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1074
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1075
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1076
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1077
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1078
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1079
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1080
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1081
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1082
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1083
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1084
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1085
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1086
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1087
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1088
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1089
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1090
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1091
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1092
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1093
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1094
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1095
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1096
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1097
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1098
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1099
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1100
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1101
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1102
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1103
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1104
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1105
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1106
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1107
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1108
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1109
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1110
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1111
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1112
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1113
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1114
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1115
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1116
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1117
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1118
      "\xe2\xb1\xa5"     => "\xc8\xba",
1119
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1120
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1121
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1122
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1123
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1124
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1125
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1126
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1127
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1128
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1129
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1130
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1131
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1132
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1133
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1134
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1135
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1136
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1137
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1138
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1139
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1140
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1141
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1142
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1143
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1144
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1145
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1146
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1147
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1148
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1149
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1150
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1151
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1152
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1153
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1154
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1155
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1156
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1157
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1158
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1159
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1160
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1161
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1162
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1163
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1164
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1165
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1166
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1167
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1168
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1169
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1170
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1171
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1172
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1173
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1174
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1175
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1176
      "\xe1\xbe\xbe"     => "\xce\x99",
1177
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1178
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1179
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1180
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1181
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1182
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1183
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1184
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1185
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1186
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1187
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1188
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1189
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1190
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1191
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1192
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1193
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1194
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1195
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1196
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1197
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1198
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1199
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1200
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1201
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1202
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1203
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1204
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1205
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1206
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1207
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1208
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1209
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1210
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1211
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1212
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1213
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1214
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1215
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1216
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1217
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1218
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1219
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1220
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1221
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1222
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1223
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1224
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1225
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1226
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1227
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1228
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1229
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1230
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1231
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1232
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1233
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1234
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1235
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1236
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1237
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1238
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1239
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1240
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1241
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1242
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1243
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1244
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1245
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1246
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1247
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1248
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1249
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1250
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1251
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1252
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1253
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1254
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1255
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1256
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1257
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1258
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1259
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1260
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1261
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1262
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1263
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1264
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1265
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1266
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1267
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1268
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1269
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1270
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1271
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1272
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1273
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1274
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1275
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1276
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1277
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1278
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1279
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1280
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1281
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1282
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1283
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1284
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1285
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1286
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1287
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1288
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1289
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1290
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1291
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1292
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1293
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1294
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1295
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1296
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1297
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1298
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1299
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1300
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1301
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1302
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1303
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1304
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1305
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1306
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1307
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1308
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1309
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1310
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1311
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1312
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1313
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1314
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1315
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1316
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1317
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1318
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1319
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1320
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1321
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1322
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1323
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1324
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1325
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1326
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1327
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1328
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1329
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1330
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1331
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1332
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1333
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1334
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1335
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1336
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1337
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1338
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1339
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1340
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1341
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1342
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1343
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1344
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1345
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1346
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1347
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1348
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1349
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1350
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1351
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1352
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1353
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1354
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1355
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1356
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1357
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1358
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1359
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1360
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1361
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1362
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1363
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1364
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1365
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1366
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1367
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1368
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1369
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1370
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1371
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1372
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1373
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1374
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1375
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1376
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1377
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1378
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1379
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1380
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1381
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1382
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1383
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1384
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1385
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1386
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1387
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1388
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1389
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1390
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1391
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1392
      "\xd6\x86"         => "\xd5\x96",
1393
      "\xd6\x85"         => "\xd5\x95",
1394
      "\xd6\x84"         => "\xd5\x94",
1395
      "\xd6\x83"         => "\xd5\x93",
1396
      "\xd6\x82"         => "\xd5\x92",
1397
      "\xd6\x81"         => "\xd5\x91",
1398
      "\xd6\x80"         => "\xd5\x90",
1399
      "\xd5\xbf"         => "\xd5\x8f",
1400
      "\xd5\xbe"         => "\xd5\x8e",
1401
      "\xd5\xbd"         => "\xd5\x8d",
1402
      "\xd5\xbc"         => "\xd5\x8c",
1403
      "\xd5\xbb"         => "\xd5\x8b",
1404
      "\xd5\xba"         => "\xd5\x8a",
1405
      "\xd5\xb9"         => "\xd5\x89",
1406
      "\xd5\xb8"         => "\xd5\x88",
1407
      "\xd5\xb7"         => "\xd5\x87",
1408
      "\xd5\xb6"         => "\xd5\x86",
1409
      "\xd5\xb5"         => "\xd5\x85",
1410
      "\xd5\xb4"         => "\xd5\x84",
1411
      "\xd5\xb3"         => "\xd5\x83",
1412
      "\xd5\xb2"         => "\xd5\x82",
1413
      "\xd5\xb1"         => "\xd5\x81",
1414
      "\xd5\xb0"         => "\xd5\x80",
1415
      "\xd5\xaf"         => "\xd4\xbf",
1416
      "\xd5\xae"         => "\xd4\xbe",
1417
      "\xd5\xad"         => "\xd4\xbd",
1418
      "\xd5\xac"         => "\xd4\xbc",
1419
      "\xd5\xab"         => "\xd4\xbb",
1420
      "\xd5\xaa"         => "\xd4\xba",
1421
      "\xd5\xa9"         => "\xd4\xb9",
1422
      "\xd5\xa8"         => "\xd4\xb8",
1423
      "\xd5\xa7"         => "\xd4\xb7",
1424
      "\xd5\xa6"         => "\xd4\xb6",
1425
      "\xd5\xa5"         => "\xd4\xb5",
1426
      "\xd5\xa4"         => "\xd4\xb4",
1427
      "\xd5\xa3"         => "\xd4\xb3",
1428
      "\xd5\xa2"         => "\xd4\xb2",
1429
      "\xd5\xa1"         => "\xd4\xb1",
1430
      "\xd4\xa5"         => "\xd4\xa4",
1431
      "\xd4\xa3"         => "\xd4\xa2",
1432
      "\xd4\xa1"         => "\xd4\xa0",
1433
      "\xd4\x9f"         => "\xd4\x9e",
1434
      "\xd4\x9d"         => "\xd4\x9c",
1435
      "\xd4\x9b"         => "\xd4\x9a",
1436
      "\xd4\x99"         => "\xd4\x98",
1437
      "\xd4\x97"         => "\xd4\x96",
1438
      "\xd4\x95"         => "\xd4\x94",
1439
      "\xd4\x93"         => "\xd4\x92",
1440
      "\xd4\x91"         => "\xd4\x90",
1441
      "\xd4\x8f"         => "\xd4\x8e",
1442
      "\xd4\x8d"         => "\xd4\x8c",
1443
      "\xd4\x8b"         => "\xd4\x8a",
1444
      "\xd4\x89"         => "\xd4\x88",
1445
      "\xd4\x87"         => "\xd4\x86",
1446
      "\xd4\x85"         => "\xd4\x84",
1447
      "\xd4\x83"         => "\xd4\x82",
1448
      "\xd4\x81"         => "\xd4\x80",
1449
      "\xd3\xbf"         => "\xd3\xbe",
1450
      "\xd3\xbd"         => "\xd3\xbc",
1451
      "\xd3\xbb"         => "\xd3\xba",
1452
      "\xd3\xb9"         => "\xd3\xb8",
1453
      "\xd3\xb7"         => "\xd3\xb6",
1454
      "\xd3\xb5"         => "\xd3\xb4",
1455
      "\xd3\xb3"         => "\xd3\xb2",
1456
      "\xd3\xb1"         => "\xd3\xb0",
1457
      "\xd3\xaf"         => "\xd3\xae",
1458
      "\xd3\xad"         => "\xd3\xac",
1459
      "\xd3\xab"         => "\xd3\xaa",
1460
      "\xd3\xa9"         => "\xd3\xa8",
1461
      "\xd3\xa7"         => "\xd3\xa6",
1462
      "\xd3\xa5"         => "\xd3\xa4",
1463
      "\xd3\xa3"         => "\xd3\xa2",
1464
      "\xd3\xa1"         => "\xd3\xa0",
1465
      "\xd3\x9f"         => "\xd3\x9e",
1466
      "\xd3\x9d"         => "\xd3\x9c",
1467
      "\xd3\x9b"         => "\xd3\x9a",
1468
      "\xd3\x99"         => "\xd3\x98",
1469
      "\xd3\x97"         => "\xd3\x96",
1470
      "\xd3\x95"         => "\xd3\x94",
1471
      "\xd3\x93"         => "\xd3\x92",
1472
      "\xd3\x91"         => "\xd3\x90",
1473
      "\xd3\x8f"         => "\xd3\x80",
1474
      "\xd3\x8e"         => "\xd3\x8d",
1475
      "\xd3\x8c"         => "\xd3\x8b",
1476
      "\xd3\x8a"         => "\xd3\x89",
1477
      "\xd3\x88"         => "\xd3\x87",
1478
      "\xd3\x86"         => "\xd3\x85",
1479
      "\xd3\x84"         => "\xd3\x83",
1480
      "\xd3\x82"         => "\xd3\x81",
1481
      "\xd2\xbf"         => "\xd2\xbe",
1482
      "\xd2\xbd"         => "\xd2\xbc",
1483
      "\xd2\xbb"         => "\xd2\xba",
1484
      "\xd2\xb9"         => "\xd2\xb8",
1485
      "\xd2\xb7"         => "\xd2\xb6",
1486
      "\xd2\xb5"         => "\xd2\xb4",
1487
      "\xd2\xb3"         => "\xd2\xb2",
1488
      "\xd2\xb1"         => "\xd2\xb0",
1489
      "\xd2\xaf"         => "\xd2\xae",
1490
      "\xd2\xad"         => "\xd2\xac",
1491
      "\xd2\xab"         => "\xd2\xaa",
1492
      "\xd2\xa9"         => "\xd2\xa8",
1493
      "\xd2\xa7"         => "\xd2\xa6",
1494
      "\xd2\xa5"         => "\xd2\xa4",
1495
      "\xd2\xa3"         => "\xd2\xa2",
1496
      "\xd2\xa1"         => "\xd2\xa0",
1497
      "\xd2\x9f"         => "\xd2\x9e",
1498
      "\xd2\x9d"         => "\xd2\x9c",
1499
      "\xd2\x9b"         => "\xd2\x9a",
1500
      "\xd2\x99"         => "\xd2\x98",
1501
      "\xd2\x97"         => "\xd2\x96",
1502
      "\xd2\x95"         => "\xd2\x94",
1503
      "\xd2\x93"         => "\xd2\x92",
1504
      "\xd2\x91"         => "\xd2\x90",
1505
      "\xd2\x8f"         => "\xd2\x8e",
1506
      "\xd2\x8d"         => "\xd2\x8c",
1507
      "\xd2\x8b"         => "\xd2\x8a",
1508
      "\xd2\x81"         => "\xd2\x80",
1509
      "\xd1\xbf"         => "\xd1\xbe",
1510
      "\xd1\xbd"         => "\xd1\xbc",
1511
      "\xd1\xbb"         => "\xd1\xba",
1512
      "\xd1\xb9"         => "\xd1\xb8",
1513
      "\xd1\xb7"         => "\xd1\xb6",
1514
      "\xd1\xb5"         => "\xd1\xb4",
1515
      "\xd1\xb3"         => "\xd1\xb2",
1516
      "\xd1\xb1"         => "\xd1\xb0",
1517
      "\xd1\xaf"         => "\xd1\xae",
1518
      "\xd1\xad"         => "\xd1\xac",
1519
      "\xd1\xab"         => "\xd1\xaa",
1520
      "\xd1\xa9"         => "\xd1\xa8",
1521
      "\xd1\xa7"         => "\xd1\xa6",
1522
      "\xd1\xa5"         => "\xd1\xa4",
1523
      "\xd1\xa3"         => "\xd1\xa2",
1524
      "\xd1\xa1"         => "\xd1\xa0",
1525
      "\xd1\x9f"         => "\xd0\x8f",
1526
      "\xd1\x9e"         => "\xd0\x8e",
1527
      "\xd1\x9d"         => "\xd0\x8d",
1528
      "\xd1\x9c"         => "\xd0\x8c",
1529
      "\xd1\x9b"         => "\xd0\x8b",
1530
      "\xd1\x9a"         => "\xd0\x8a",
1531
      "\xd1\x99"         => "\xd0\x89",
1532
      "\xd1\x98"         => "\xd0\x88",
1533
      "\xd1\x97"         => "\xd0\x87",
1534
      "\xd1\x96"         => "\xd0\x86",
1535
      "\xd1\x95"         => "\xd0\x85",
1536
      "\xd1\x94"         => "\xd0\x84",
1537
      "\xd1\x93"         => "\xd0\x83",
1538
      "\xd1\x92"         => "\xd0\x82",
1539
      "\xd1\x91"         => "\xd0\x81",
1540
      "\xd1\x90"         => "\xd0\x80",
1541
      "\xd1\x8f"         => "\xd0\xaf",
1542
      "\xd1\x8e"         => "\xd0\xae",
1543
      "\xd1\x8d"         => "\xd0\xad",
1544
      "\xd1\x8c"         => "\xd0\xac",
1545
      "\xd1\x8b"         => "\xd0\xab",
1546
      "\xd1\x8a"         => "\xd0\xaa",
1547
      "\xd1\x89"         => "\xd0\xa9",
1548
      "\xd1\x88"         => "\xd0\xa8",
1549
      "\xd1\x87"         => "\xd0\xa7",
1550
      "\xd1\x86"         => "\xd0\xa6",
1551
      "\xd1\x85"         => "\xd0\xa5",
1552
      "\xd1\x84"         => "\xd0\xa4",
1553
      "\xd1\x83"         => "\xd0\xa3",
1554
      "\xd1\x82"         => "\xd0\xa2",
1555
      "\xd1\x81"         => "\xd0\xa1",
1556
      "\xd1\x80"         => "\xd0\xa0",
1557
      "\xd0\xbf"         => "\xd0\x9f",
1558
      "\xd0\xbe"         => "\xd0\x9e",
1559
      "\xd0\xbd"         => "\xd0\x9d",
1560
      "\xd0\xbc"         => "\xd0\x9c",
1561
      "\xd0\xbb"         => "\xd0\x9b",
1562
      "\xd0\xba"         => "\xd0\x9a",
1563
      "\xd0\xb9"         => "\xd0\x99",
1564
      "\xd0\xb8"         => "\xd0\x98",
1565
      "\xd0\xb7"         => "\xd0\x97",
1566
      "\xd0\xb6"         => "\xd0\x96",
1567
      "\xd0\xb5"         => "\xd0\x95",
1568
      "\xd0\xb4"         => "\xd0\x94",
1569
      "\xd0\xb3"         => "\xd0\x93",
1570
      "\xd0\xb2"         => "\xd0\x92",
1571
      "\xd0\xb1"         => "\xd0\x91",
1572
      "\xd0\xb0"         => "\xd0\x90",
1573
      "\xcf\xbb"         => "\xcf\xba",
1574
      "\xcf\xb8"         => "\xcf\xb7",
1575
      "\xcf\xb5"         => "\xce\x95",
1576
      "\xcf\xb2"         => "\xcf\xb9",
1577
      "\xcf\xb1"         => "\xce\xa1",
1578
      "\xcf\xb0"         => "\xce\x9a",
1579
      "\xcf\xaf"         => "\xcf\xae",
1580
      "\xcf\xad"         => "\xcf\xac",
1581
      "\xcf\xab"         => "\xcf\xaa",
1582
      "\xcf\xa9"         => "\xcf\xa8",
1583
      "\xcf\xa7"         => "\xcf\xa6",
1584
      "\xcf\xa5"         => "\xcf\xa4",
1585
      "\xcf\xa3"         => "\xcf\xa2",
1586
      "\xcf\xa1"         => "\xcf\xa0",
1587
      "\xcf\x9f"         => "\xcf\x9e",
1588
      "\xcf\x9d"         => "\xcf\x9c",
1589
      "\xcf\x9b"         => "\xcf\x9a",
1590
      "\xcf\x99"         => "\xcf\x98",
1591
      "\xcf\x97"         => "\xcf\x8f",
1592
      "\xcf\x96"         => "\xce\xa0",
1593
      "\xcf\x95"         => "\xce\xa6",
1594
      "\xcf\x91"         => "\xce\x98",
1595
      "\xcf\x90"         => "\xce\x92",
1596
      "\xcf\x8e"         => "\xce\x8f",
1597
      "\xcf\x8d"         => "\xce\x8e",
1598
      "\xcf\x8c"         => "\xce\x8c",
1599
      "\xcf\x8b"         => "\xce\xab",
1600
      "\xcf\x8a"         => "\xce\xaa",
1601
      "\xcf\x89"         => "\xce\xa9",
1602
      "\xcf\x88"         => "\xce\xa8",
1603
      "\xcf\x87"         => "\xce\xa7",
1604
      "\xcf\x86"         => "\xce\xa6",
1605
      "\xcf\x85"         => "\xce\xa5",
1606
      "\xcf\x84"         => "\xce\xa4",
1607
      "\xcf\x83"         => "\xce\xa3",
1608
      "\xcf\x82"         => "\xce\xa3",
1609
      "\xcf\x81"         => "\xce\xa1",
1610
      "\xcf\x80"         => "\xce\xa0",
1611
      "\xce\xbf"         => "\xce\x9f",
1612
      "\xce\xbe"         => "\xce\x9e",
1613
      "\xce\xbd"         => "\xce\x9d",
1614
      "\xce\xbc"         => "\xce\x9c",
1615
      "\xce\xbb"         => "\xce\x9b",
1616
      "\xce\xba"         => "\xce\x9a",
1617
      "\xce\xb9"         => "\xce\x99",
1618
      "\xce\xb8"         => "\xce\x98",
1619
      "\xce\xb7"         => "\xce\x97",
1620
      "\xce\xb6"         => "\xce\x96",
1621
      "\xce\xb5"         => "\xce\x95",
1622
      "\xce\xb4"         => "\xce\x94",
1623
      "\xce\xb3"         => "\xce\x93",
1624
      "\xce\xb2"         => "\xce\x92",
1625
      "\xce\xb1"         => "\xce\x91",
1626
      "\xce\xaf"         => "\xce\x8a",
1627
      "\xce\xae"         => "\xce\x89",
1628
      "\xce\xad"         => "\xce\x88",
1629
      "\xce\xac"         => "\xce\x86",
1630
      "\xcd\xbd"         => "\xcf\xbf",
1631
      "\xcd\xbc"         => "\xcf\xbe",
1632
      "\xcd\xbb"         => "\xcf\xbd",
1633
      "\xcd\xb7"         => "\xcd\xb6",
1634
      "\xcd\xb3"         => "\xcd\xb2",
1635
      "\xcd\xb1"         => "\xcd\xb0",
1636
      "\xca\x92"         => "\xc6\xb7",
1637
      "\xca\x8c"         => "\xc9\x85",
1638
      "\xca\x8b"         => "\xc6\xb2",
1639
      "\xca\x8a"         => "\xc6\xb1",
1640
      "\xca\x89"         => "\xc9\x84",
1641
      "\xca\x88"         => "\xc6\xae",
1642
      "\xca\x83"         => "\xc6\xa9",
1643
      "\xca\x80"         => "\xc6\xa6",
1644
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1645
      "\xc9\xb5"         => "\xc6\x9f",
1646
      "\xc9\xb2"         => "\xc6\x9d",
1647
      "\xc9\xb1"         => "\xe2\xb1\xae",
1648
      "\xc9\xaf"         => "\xc6\x9c",
1649
      "\xc9\xab"         => "\xe2\xb1\xa2",
1650
      "\xc9\xa9"         => "\xc6\x96",
1651
      "\xc9\xa8"         => "\xc6\x97",
1652
      "\xc9\xa5"         => "\xea\x9e\x8d",
1653
      "\xc9\xa3"         => "\xc6\x94",
1654
      "\xc9\xa0"         => "\xc6\x93",
1655
      "\xc9\x9b"         => "\xc6\x90",
1656
      "\xc9\x99"         => "\xc6\x8f",
1657
      "\xc9\x97"         => "\xc6\x8a",
1658
      "\xc9\x96"         => "\xc6\x89",
1659
      "\xc9\x94"         => "\xc6\x86",
1660
      "\xc9\x93"         => "\xc6\x81",
1661
      "\xc9\x92"         => "\xe2\xb1\xb0",
1662
      "\xc9\x91"         => "\xe2\xb1\xad",
1663
      "\xc9\x90"         => "\xe2\xb1\xaf",
1664
      "\xc9\x8f"         => "\xc9\x8e",
1665
      "\xc9\x8d"         => "\xc9\x8c",
1666
      "\xc9\x8b"         => "\xc9\x8a",
1667
      "\xc9\x89"         => "\xc9\x88",
1668
      "\xc9\x87"         => "\xc9\x86",
1669
      "\xc9\x82"         => "\xc9\x81",
1670
      "\xc9\x80"         => "\xe2\xb1\xbf",
1671
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1672
      "\xc8\xbc"         => "\xc8\xbb",
1673
      "\xc8\xb3"         => "\xc8\xb2",
1674
      "\xc8\xb1"         => "\xc8\xb0",
1675
      "\xc8\xaf"         => "\xc8\xae",
1676
      "\xc8\xad"         => "\xc8\xac",
1677
      "\xc8\xab"         => "\xc8\xaa",
1678
      "\xc8\xa9"         => "\xc8\xa8",
1679
      "\xc8\xa7"         => "\xc8\xa6",
1680
      "\xc8\xa5"         => "\xc8\xa4",
1681
      "\xc8\xa3"         => "\xc8\xa2",
1682
      "\xc8\x9f"         => "\xc8\x9e",
1683
      "\xc8\x9d"         => "\xc8\x9c",
1684
      "\xc8\x9b"         => "\xc8\x9a",
1685
      "\xc8\x99"         => "\xc8\x98",
1686
      "\xc8\x97"         => "\xc8\x96",
1687
      "\xc8\x95"         => "\xc8\x94",
1688
      "\xc8\x93"         => "\xc8\x92",
1689
      "\xc8\x91"         => "\xc8\x90",
1690
      "\xc8\x8f"         => "\xc8\x8e",
1691
      "\xc8\x8d"         => "\xc8\x8c",
1692
      "\xc8\x8b"         => "\xc8\x8a",
1693
      "\xc8\x89"         => "\xc8\x88",
1694
      "\xc8\x87"         => "\xc8\x86",
1695
      "\xc8\x85"         => "\xc8\x84",
1696
      "\xc8\x83"         => "\xc8\x82",
1697
      "\xc8\x81"         => "\xc8\x80",
1698
      "\xc7\xbf"         => "\xc7\xbe",
1699
      "\xc7\xbd"         => "\xc7\xbc",
1700
      "\xc7\xbb"         => "\xc7\xba",
1701
      "\xc7\xb9"         => "\xc7\xb8",
1702
      "\xc7\xb5"         => "\xc7\xb4",
1703
      "\xc7\xb3"         => "\xc7\xb2",
1704
      "\xc7\xaf"         => "\xc7\xae",
1705
      "\xc7\xad"         => "\xc7\xac",
1706
      "\xc7\xab"         => "\xc7\xaa",
1707
      "\xc7\xa9"         => "\xc7\xa8",
1708
      "\xc7\xa7"         => "\xc7\xa6",
1709
      "\xc7\xa5"         => "\xc7\xa4",
1710
      "\xc7\xa3"         => "\xc7\xa2",
1711
      "\xc7\xa1"         => "\xc7\xa0",
1712
      "\xc7\x9f"         => "\xc7\x9e",
1713
      "\xc7\x9d"         => "\xc6\x8e",
1714
      "\xc7\x9c"         => "\xc7\x9b",
1715
      "\xc7\x9a"         => "\xc7\x99",
1716
      "\xc7\x98"         => "\xc7\x97",
1717
      "\xc7\x96"         => "\xc7\x95",
1718
      "\xc7\x94"         => "\xc7\x93",
1719
      "\xc7\x92"         => "\xc7\x91",
1720
      "\xc7\x90"         => "\xc7\x8f",
1721
      "\xc7\x8e"         => "\xc7\x8d",
1722
      "\xc7\x8c"         => "\xc7\x8b",
1723
      "\xc7\x89"         => "\xc7\x88",
1724
      "\xc7\x86"         => "\xc7\x85",
1725
      "\xc6\xbf"         => "\xc7\xb7",
1726
      "\xc6\xbd"         => "\xc6\xbc",
1727
      "\xc6\xb9"         => "\xc6\xb8",
1728
      "\xc6\xb6"         => "\xc6\xb5",
1729
      "\xc6\xb4"         => "\xc6\xb3",
1730
      "\xc6\xb0"         => "\xc6\xaf",
1731
      "\xc6\xad"         => "\xc6\xac",
1732
      "\xc6\xa8"         => "\xc6\xa7",
1733
      "\xc6\xa5"         => "\xc6\xa4",
1734
      "\xc6\xa3"         => "\xc6\xa2",
1735
      "\xc6\xa1"         => "\xc6\xa0",
1736
      "\xc6\x9e"         => "\xc8\xa0",
1737
      "\xc6\x9a"         => "\xc8\xbd",
1738
      "\xc6\x99"         => "\xc6\x98",
1739
      "\xc6\x95"         => "\xc7\xb6",
1740
      "\xc6\x92"         => "\xc6\x91",
1741
      "\xc6\x8c"         => "\xc6\x8b",
1742
      "\xc6\x88"         => "\xc6\x87",
1743
      "\xc6\x85"         => "\xc6\x84",
1744
      "\xc6\x83"         => "\xc6\x82",
1745
      "\xc6\x80"         => "\xc9\x83",
1746
      "\xc5\xbf"         => "\x53",
1747
      "\xc5\xbe"         => "\xc5\xbd",
1748
      "\xc5\xbc"         => "\xc5\xbb",
1749
      "\xc5\xba"         => "\xc5\xb9",
1750
      "\xc5\xb7"         => "\xc5\xb6",
1751
      "\xc5\xb5"         => "\xc5\xb4",
1752
      "\xc5\xb3"         => "\xc5\xb2",
1753
      "\xc5\xb1"         => "\xc5\xb0",
1754
      "\xc5\xaf"         => "\xc5\xae",
1755
      "\xc5\xad"         => "\xc5\xac",
1756
      "\xc5\xab"         => "\xc5\xaa",
1757
      "\xc5\xa9"         => "\xc5\xa8",
1758
      "\xc5\xa7"         => "\xc5\xa6",
1759
      "\xc5\xa5"         => "\xc5\xa4",
1760
      "\xc5\xa3"         => "\xc5\xa2",
1761
      "\xc5\xa1"         => "\xc5\xa0",
1762
      "\xc5\x9f"         => "\xc5\x9e",
1763
      "\xc5\x9d"         => "\xc5\x9c",
1764
      "\xc5\x9b"         => "\xc5\x9a",
1765
      "\xc5\x99"         => "\xc5\x98",
1766
      "\xc5\x97"         => "\xc5\x96",
1767
      "\xc5\x95"         => "\xc5\x94",
1768
      "\xc5\x93"         => "\xc5\x92",
1769
      "\xc5\x91"         => "\xc5\x90",
1770
      "\xc5\x8f"         => "\xc5\x8e",
1771
      "\xc5\x8d"         => "\xc5\x8c",
1772
      "\xc5\x8b"         => "\xc5\x8a",
1773
      "\xc5\x88"         => "\xc5\x87",
1774
      "\xc5\x86"         => "\xc5\x85",
1775
      "\xc5\x84"         => "\xc5\x83",
1776
      "\xc5\x82"         => "\xc5\x81",
1777
      "\xc5\x80"         => "\xc4\xbf",
1778
      "\xc4\xbe"         => "\xc4\xbd",
1779
      "\xc4\xbc"         => "\xc4\xbb",
1780
      "\xc4\xba"         => "\xc4\xb9",
1781
      "\xc4\xb7"         => "\xc4\xb6",
1782
      "\xc4\xb5"         => "\xc4\xb4",
1783
      "\xc4\xb3"         => "\xc4\xb2",
1784
      "\xc4\xb1"         => "\x49",
1785
      "\xc4\xaf"         => "\xc4\xae",
1786
      "\xc4\xad"         => "\xc4\xac",
1787
      "\xc4\xab"         => "\xc4\xaa",
1788
      "\xc4\xa9"         => "\xc4\xa8",
1789
      "\xc4\xa7"         => "\xc4\xa6",
1790
      "\xc4\xa5"         => "\xc4\xa4",
1791
      "\xc4\xa3"         => "\xc4\xa2",
1792
      "\xc4\xa1"         => "\xc4\xa0",
1793
      "\xc4\x9f"         => "\xc4\x9e",
1794
      "\xc4\x9d"         => "\xc4\x9c",
1795
      "\xc4\x9b"         => "\xc4\x9a",
1796
      "\xc4\x99"         => "\xc4\x98",
1797
      "\xc4\x97"         => "\xc4\x96",
1798
      "\xc4\x95"         => "\xc4\x94",
1799
      "\xc4\x93"         => "\xc4\x92",
1800
      "\xc4\x91"         => "\xc4\x90",
1801
      "\xc4\x8f"         => "\xc4\x8e",
1802
      "\xc4\x8d"         => "\xc4\x8c",
1803
      "\xc4\x8b"         => "\xc4\x8a",
1804
      "\xc4\x89"         => "\xc4\x88",
1805
      "\xc4\x87"         => "\xc4\x86",
1806
      "\xc4\x85"         => "\xc4\x84",
1807
      "\xc4\x83"         => "\xc4\x82",
1808
      "\xc4\x81"         => "\xc4\x80",
1809
      "\xc3\xbf"         => "\xc5\xb8",
1810
      "\xc3\xbe"         => "\xc3\x9e",
1811
      "\xc3\xbd"         => "\xc3\x9d",
1812
      "\xc3\xbc"         => "\xc3\x9c",
1813
      "\xc3\xbb"         => "\xc3\x9b",
1814
      "\xc3\xba"         => "\xc3\x9a",
1815
      "\xc3\xb9"         => "\xc3\x99",
1816
      "\xc3\xb8"         => "\xc3\x98",
1817
      "\xc3\xb6"         => "\xc3\x96",
1818
      "\xc3\xb5"         => "\xc3\x95",
1819
      "\xc3\xb4"         => "\xc3\x94",
1820
      "\xc3\xb3"         => "\xc3\x93",
1821
      "\xc3\xb2"         => "\xc3\x92",
1822
      "\xc3\xb1"         => "\xc3\x91",
1823
      "\xc3\xb0"         => "\xc3\x90",
1824
      "\xc3\xaf"         => "\xc3\x8f",
1825
      "\xc3\xae"         => "\xc3\x8e",
1826
      "\xc3\xad"         => "\xc3\x8d",
1827
      "\xc3\xac"         => "\xc3\x8c",
1828
      "\xc3\xab"         => "\xc3\x8b",
1829
      "\xc3\xaa"         => "\xc3\x8a",
1830
      "\xc3\xa9"         => "\xc3\x89",
1831
      "\xc3\xa8"         => "\xc3\x88",
1832
      "\xc3\xa7"         => "\xc3\x87",
1833
      "\xc3\xa6"         => "\xc3\x86",
1834
      "\xc3\xa5"         => "\xc3\x85",
1835
      "\xc3\xa4"         => "\xc3\x84",
1836
      "\xc3\xa3"         => "\xc3\x83",
1837
      "\xc3\xa2"         => "\xc3\x82",
1838
      "\xc3\xa1"         => "\xc3\x81",
1839
      "\xc3\xa0"         => "\xc3\x80",
1840
      "\xc2\xb5"         => "\xce\x9c",
1841
      "\x7a"             => "\x5a",
1842
      "\x79"             => "\x59",
1843
      "\x78"             => "\x58",
1844
      "\x77"             => "\x57",
1845
      "\x76"             => "\x56",
1846
      "\x75"             => "\x55",
1847
      "\x74"             => "\x54",
1848
      "\x73"             => "\x53",
1849
      "\x72"             => "\x52",
1850
      "\x71"             => "\x51",
1851
      "\x70"             => "\x50",
1852
      "\x6f"             => "\x4f",
1853
      "\x6e"             => "\x4e",
1854
      "\x6d"             => "\x4d",
1855
      "\x6c"             => "\x4c",
1856
      "\x6b"             => "\x4b",
1857
      "\x6a"             => "\x4a",
1858
      "\x69"             => "\x49",
1859
      "\x68"             => "\x48",
1860
      "\x67"             => "\x47",
1861 157
      "\x66"             => "\x46",
1862
      "\x65"             => "\x45",
1863 157
      "\x64"             => "\x44",
1864
      "\x63"             => "\x43",
1865 1
      "\x62"             => "\x42",
1866 1
      "\x61"             => "\x41",
1867 1
1868 1
    );
1869 1
1870 157
    return $case;
1871
  }
1872
1873
  /**
1874
   * This method will auto-detect your server environment for UTF-8 support.
1875
   *
1876
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1877
   */
1878
  public static function checkForSupport()
1879 8
  {
1880
    if (!isset(self::$support['mbstring'])) {
1881 8
1882
      self::$support['mbstring'] = self::mbstring_loaded();
1883 8
      self::$support['iconv'] = self::iconv_loaded();
1884
      self::$support['intl'] = self::intl_loaded();
1885
      self::$support['intlChar'] = self::intlChar_loaded();
1886
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1887
    }
1888
  }
1889
1890 8
  /**
1891
   * Generates a UTF-8 encoded character from the given code point.
1892
   *
1893
   * @param    int $code_point The code point for which to generate a character.
1894
   *
1895
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1896
   */
1897
  public static function chr($code_point)
1898
  {
1899
    self::checkForSupport();
1900
1901
    $i = (int)$code_point;
1902 1
1903
    if (self::$support['intlChar'] === true) {
1904 1
      return \IntlChar::chr($code_point);
1905
    }
1906 1
1907
    if ($i !== $code_point) {
1908
      $i = (int)self::hex_to_int($code_point);
1909
    }
1910
1911
    if (!$i) {
1912
      return '';
1913
    }
1914
1915
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1916
  }
1917
1918
  /**
1919
   * Applies callback to all characters of a string.
1920
   *
1921 2
   * @param  string|array $callback The callback function.
1922
   * @param  string       $str      UTF-8 string to run callback on.
1923 2
   *
1924 2
   * @return array The outcome of callback.
1925
   */
1926
  public static function chr_map($callback, $str)
1927 2
  {
1928
    $chars = self::split($str);
1929
1930
    return array_map($callback, $chars);
1931
  }
1932
1933
  /**
1934
   * Generates an array of byte length of each character of a Unicode string.
1935
   *
1936
   * 1 byte => U+0000  - U+007F
1937 2
   * 2 byte => U+0080  - U+07FF
1938
   * 3 byte => U+0800  - U+FFFF
1939 2
   * 4 byte => U+10000 - U+10FFFF
1940 2
   *
1941 2
   * @param    string $str The original Unicode string.
1942
   *
1943 2
   * @return   array An array of byte lengths of each character.
1944
   */
1945 2
  public static function chr_size_list($str)
1946
  {
1947
    if (!$str) {
1948 2
      return array();
1949
    }
1950 2
1951 2
    return array_map('strlen', self::split($str));
1952 2
  }
1953
1954 1
  /**
1955 1
   * Get a decimal code representation of a specific character.
1956 1
   *
1957
   * @param   string $chr The input character
1958
   *
1959
   * @return  int
1960
   */
1961
  public static function chr_to_decimal($chr)
1962 2
  {
1963
    $chr = (string)$chr;
1964 2
    $code = self::ord($chr[0]);
1965 2
    $bytes = 1;
1966
1967 2
    if (!($code & 0x80)) {
1968
      // 0xxxxxxx
1969
      return $code;
1970
    }
1971
1972
    if (($code & 0xe0) === 0xc0) {
1973
      // 110xxxxx
1974
      $bytes = 2;
1975
      $code &= ~0xc0;
1976
    } elseif (($code & 0xf0) === 0xe0) {
1977
      // 1110xxxx
1978
      $bytes = 3;
1979
      $code &= ~0xe0;
1980
    } elseif (($code & 0xf8) === 0xf0) {
1981
      // 11110xxx
1982
      $bytes = 4;
1983
      $code &= ~0xf0;
1984
    }
1985
1986
    for ($i = 2; $i <= $bytes; $i++) {
1987
      // 10xxxxxx
1988
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1989
    }
1990
1991
    return $code;
1992
  }
1993 1
1994
  /**
1995 1
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1996
   *
1997
   * @param    string $chr The input character
1998
   * @param    string $pfix
1999
   *
2000
   * @return   string The code point encoded as U+xxxx
2001
   */
2002
  public static function chr_to_hex($chr, $pfix = 'U+')
2003
  {
2004
    return self::int_to_hex(self::ord($chr), $pfix);
2005
  }
2006
2007
  /**
2008
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2009 35
   *
2010
   * @param    string $body     The original string to be split.
2011
   * @param    int    $chunklen The maximum character length of a chunk.
2012
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2013
   *
2014
   * @return   string The chunked string
2015
   */
2016
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2017
  {
2018
    return implode($end, self::split($body, $chunklen));
2019
  }
2020
2021
  /**
2022
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2023
   *
2024 35
   * @param string $str                     The string to be sanitized.
2025 35
   * @param bool   $remove_bom
2026
   * @param bool   $normalize_whitespace
2027 35
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2028 35
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2029
   *
2030 35
   * @return string Clean UTF-8 encoded string
2031 7
   */
2032 7
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2033
  {
2034 35
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2035 1
    // caused connection reset problem on larger strings
2036 1
2037
    $regx = '/
2038 35
      (
2039 4
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2040 4
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2041
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2042 35
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2043
        ){1,100}                      # ...one or more times
2044
      )
2045
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2046
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2047
    /x';
2048
    $str = preg_replace($regx, '$1', $str);
2049
2050
    $str = self::replace_diamond_question_mark($str, '');
2051
    $str = self::remove_invisible_characters($str);
2052 3
2053
    if ($normalize_whitespace === true) {
2054 3
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2055
    }
2056 3
2057 1
    if ($normalize_msword === true) {
2058
      $str = self::normalize_msword($str);
2059
    }
2060
2061 3
    if ($remove_bom === true) {
2062
      $str = self::removeBOM($str);
2063
    }
2064
2065
    return $str;
2066
  }
2067
2068 3
  /**
2069
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2070 3
   *
2071
   * @param string $str
2072
   *
2073
   * @return string
2074
   */
2075
  public static function cleanup($str)
2076
  {
2077
    $str = (string)$str;
2078
2079
    if (!isset($str[0])) {
2080
      return '';
2081
    }
2082 3
2083
    // fixed ISO <-> UTF-8 Errors
2084 3
    $str = self::fix_simple_utf8($str);
2085 3
2086 3
    // remove all none UTF-8 symbols
2087
    // && remove diamond question mark (�)
2088 3
    // && remove remove invisible characters (e.g. "\0")
2089
    // && remove BOM
2090 3
    // && normalize whitespace chars (but keep non-breaking-spaces)
2091 3
    $str = self::clean($str, true, true, false, true);
2092 3
2093
    return (string)$str;
2094 3
  }
2095
2096 3
  /**
2097
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2098
   *
2099
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2100
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2101
   *                                    default, code points will be returned as integers.
2102
   *
2103
   * @return   array The array of code points
2104
   */
2105
  public static function codepoints($arg, $u_style = false)
2106 3
  {
2107
    if (is_string($arg)) {
2108
      $arg = self::split($arg);
2109
    }
2110
2111
    $arg = array_map(
2112
        array(
2113
            '\\voku\\helper\\UTF8',
2114
            'ord',
2115
        ),
2116
        $arg
2117 3
    );
2118
2119 3
    if ($u_style) {
2120
      $arg = array_map(
2121 3
          array(
2122
              '\\voku\\helper\\UTF8',
2123 3
              'int_to_hex',
2124
          ),
2125
          $arg
2126
      );
2127
    }
2128
2129
    return $arg;
2130
  }
2131
2132
  /**
2133 1
   * Returns count of characters used in a string.
2134
   *
2135 1
   * @param    string $str The input string.
2136
   *
2137 1
   * @return   array An associative array of Character as keys and
2138 1
   *           their count as values.
2139 1
   */
2140
  public static function count_chars($str)
2141 1
  {
2142
    return array_count_values(self::split($str));
2143
  }
2144
2145
  /**
2146
   * Get a UTF-8 character from its decimal code representation.
2147
   *
2148
   * @param   int $code Code.
2149
   *
2150
   * @return  string
2151
   */
2152
  public static function decimal_to_chr($code)
2153
  {
2154
    self::checkForSupport();
2155 11
2156
    return \mb_convert_encoding(
2157 11
        '&#x' . dechex($code) . ';',
2158
        'UTF-8',
2159 11
        'HTML-ENTITIES'
2160 11
    );
2161
  }
2162
2163 1
  /**
2164 1
   * Encode a string with a new charset-encoding.
2165
   *
2166
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2167
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2168
   *
2169
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2170
   * @param string $str      the string
2171
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2172
   *                         otherwise we auto-detect the current string-encoding
2173
   *
2174
   * @return string
2175
   */
2176
  public static function encode($encoding, $str, $force = true)
2177
  {
2178
    $str = (string)$str;
2179
    $encoding = (string)$encoding;
2180
2181
    if (!isset($str[0], $encoding[0])) {
2182
      return $str;
2183
    }
2184
2185
    $encoding = self::normalizeEncoding($encoding);
2186
    $encodingDetected = self::str_detect_encoding($str);
2187
2188
    if (
2189
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2190
        &&
2191
        (
2192
            $force === true
2193
            ||
2194
            $encodingDetected !== $encoding
2195
        )
2196
    ) {
2197
      self::checkForSupport();
2198
2199 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2200
          $encoding === 'UTF-8'
2201
          &&
2202
          (
2203
              $force === true
2204
              || $encodingDetected === 'UTF-8'
2205
              || $encodingDetected === 'WINDOWS-1252'
2206
              || $encodingDetected === 'ISO-8859-1'
2207
          )
2208
      ) {
2209
        return self::to_utf8($str);
2210
      }
2211
2212 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2213
          $encoding === 'ISO-8859-1'
2214
          &&
2215
          (
2216
              $force === true
2217
              || $encodingDetected === 'ISO-8859-1'
2218
              || $encodingDetected === 'UTF-8'
2219
          )
2220
      ) {
2221
        return self::to_win1252($str);
2222
      }
2223
2224
      $strEncoded = \mb_convert_encoding(
2225
          $str,
2226
          $encoding,
2227
          $encodingDetected
2228
      );
2229
2230
      if ($strEncoded) {
2231
        return $strEncoded;
2232
      }
2233
    }
2234
2235
    return $str;
2236
  }
2237
2238
  /**
2239
   * Callback function for preg_replace_callback use.
2240
   *
2241
   * @internal used for "UTF8::html_entity_decode()"
2242
   *
2243
   * @param  array $matches PREG matches
2244
   *
2245
   * @return string
2246
   */
2247
  protected static function entityCallback($matches)
2248
  {
2249
    self::checkForSupport();
2250
2251
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2252 2
2253
    if ($return === "'") {
2254
      return '&#x27;';
2255 2
    }
2256 2
2257
    return $return;
2258 2
  }
2259 2
2260
  /**
2261
   * Reads entire file into a string.
2262
   *
2263 2
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2264 2
   *
2265
   * @link http://php.net/manual/en/function.file-get-contents.php
2266 2
   *
2267 2
   * @param string        $filename      <p>
2268
   *                                     Name of the file to read.
2269 2
   *                                     </p>
2270 1
   * @param int|null      $flags         [optional] <p>
2271 1
   *                                     Prior to PHP 6, this parameter is called
2272 2
   *                                     use_include_path and is a bool.
2273
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2274
   *                                     to trigger include path
2275
   *                                     search.
2276 2
   *                                     </p>
2277
   *                                     <p>
2278
   *                                     The value of flags can be any combination of
2279
   *                                     the following flags (with some restrictions), joined with the
2280 2
   *                                     binary OR (|)
2281 2
   *                                     operator.
2282
   *                                     </p>
2283 2
   *                                     <p>
2284
   *                                     <table>
2285 2
   *                                     Available flags
2286 1
   *                                     <tr valign="top">
2287 1
   *                                     <td>Flag</td>
2288 1
   *                                     <td>Description</td>
2289 1
   *                                     </tr>
2290 1
   *                                     <tr valign="top">
2291 1
   *                                     <td>
2292
   *                                     FILE_USE_INCLUDE_PATH
2293 2
   *                                     </td>
2294 2
   *                                     <td>
2295 2
   *                                     Search for filename in the include directory.
2296 2
   *                                     See include_path for more
2297
   *                                     information.
2298
   *                                     </td>
2299 2
   *                                     </tr>
2300
   *                                     <tr valign="top">
2301
   *                                     <td>
2302
   *                                     FILE_TEXT
2303
   *                                     </td>
2304
   *                                     <td>
2305
   *                                     As of PHP 6, the default encoding of the read
2306
   *                                     data is UTF-8. You can specify a different encoding by creating a
2307
   *                                     custom context or by changing the default using
2308
   *                                     stream_default_encoding. This flag cannot be
2309 1
   *                                     used with FILE_BINARY.
2310
   *                                     </td>
2311 1
   *                                     </tr>
2312
   *                                     <tr valign="top">
2313
   *                                     <td>
2314
   *                                     FILE_BINARY
2315
   *                                     </td>
2316
   *                                     <td>
2317
   *                                     With this flag, the file is read in binary mode. This is the default
2318
   *                                     setting and cannot be used with FILE_TEXT.
2319
   *                                     </td>
2320
   *                                     </tr>
2321
   *                                     </table>
2322
   *                                     </p>
2323 7
   * @param resource|null $context       [optional] <p>
2324
   *                                     A valid context resource created with
2325 7
   *                                     stream_context_create. If you don't need to use a
2326 7
   *                                     custom context, you can skip this parameter by &null;.
2327 2
   *                                     </p>
2328
   * @param int|null      $offset        [optional] <p>
2329 1
   *                                     The offset where the reading starts.
2330 2
   *                                     </p>
2331 2
   * @param int|null      $maxlen        [optional] <p>
2332 7
   *                                     Maximum length of data read. The default is to read until end
2333 1
   *                                     of file is reached.
2334 1
   *                                     </p>
2335 1
   * @param int           $timeout
2336 1
   *
2337 7
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2338 7
   *                                     non default utf-8 chars
2339
   *
2340
   * @return string The function returns the read data or false on failure.
2341
   */
2342 7
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2343 7
  {
2344 1
    // init
2345 1
    $timeout = (int)$timeout;
2346 7
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2347
2348 7
    if ($timeout && $context === null) {
2349 5
      $context = stream_context_create(
2350 5
          array(
2351 4
              'http' =>
2352
                  array(
2353
                      'timeout' => $timeout,
2354
                  ),
2355 7
          )
2356
      );
2357
    }
2358
2359
    if (is_int($maxlen)) {
2360 7
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2361 7
    } else {
2362 7
      $data = file_get_contents($filename, $flags, $context, $offset);
2363
    }
2364 7
2365
    // return false on error
2366
    if ($data === false) {
2367
      return false;
2368
    }
2369
2370
    if ($convertToUtf8 === true) {
2371
      self::checkForSupport();
2372
2373
      $data = self::encode('UTF-8', $data, false);
2374
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2375
    }
2376
2377
    // clean utf-8 string
2378
    return $data;
2379
  }
2380
2381
  /**
2382
   * Checks if a file starts with BOM (Byte Order Mark) character.
2383
   *
2384
   * @param    string $file_path Path to a valid file.
2385
   *
2386
   * @return   bool True if the file has BOM at the start, False otherwise.
2387
   */
2388
  public static function file_has_bom($file_path)
2389
  {
2390
    return self::string_has_bom(file_get_contents($file_path));
2391
  }
2392
2393
  /**
2394
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2395
   *
2396
   * @param mixed  $var
2397
   * @param int    $normalization_form
2398
   * @param string $leading_combining
2399
   *
2400
   * @return mixed
2401
   */
2402
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2403
  {
2404
    switch (gettype($var)) {
2405 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2406
        foreach ($var as $k => $v) {
2407
          /** @noinspection AlterInForeachInspection */
2408
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2409
        }
2410
        break;
2411 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2412
        foreach ($var as $k => $v) {
2413
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2414
        }
2415
        break;
2416
      case 'string':
2417 1 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2418
          // Workaround https://bugs.php.net/65732
2419 1
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2420 1
        }
2421 1 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2422 1
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2423
            $n = '-';
2424
          } else {
2425 1
            $n = \Normalizer::normalize($var, $normalization_form);
2426
2427
            if (isset($n[0])) {
2428
              $var = $n;
2429
            } else {
2430
              $var = self::encode('UTF-8', $var);
2431
            }
2432
2433
          }
2434
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2435
            // Prevent leading combining chars
2436
            // for NFC-safe concatenations.
2437 1
            $var = $leading_combining . $var;
2438
          }
2439 1
        }
2440 1
        break;
2441 1
    }
2442 1
2443
    return $var;
2444
  }
2445 1
2446
  /**
2447
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2448
   *
2449
   * @param int    $type
2450
   * @param string $var
2451
   * @param int    $filter
2452
   * @param mixed  $option
2453
   *
2454
   * @return mixed
2455
   */
2456 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2457 1
  {
2458
    if (4 > func_num_args()) {
2459 1
      $var = filter_input($type, $var, $filter);
2460
    } else {
2461
      $var = filter_input($type, $var, $filter, $option);
2462
    }
2463
2464
    return self::filter($var);
2465
  }
2466
2467
  /**
2468
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2469 8
   *
2470
   * @param int   $type
2471 8
   * @param mixed $definition
2472 8
   * @param bool  $add_empty
2473
   *
2474 8
   * @return mixed
2475
   */
2476 8 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2477 2
  {
2478
    if (2 > func_num_args()) {
2479
      $a = filter_input_array($type);
2480 8
    } else {
2481 1
      $a = filter_input_array($type, $definition, $add_empty);
2482 1
    }
2483 1
2484
    return self::filter($a);
2485 8
  }
2486
2487
  /**
2488
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2489
   *
2490
   * @param mixed $var
2491
   * @param int   $filter
2492
   * @param mixed $option
2493
   *
2494
   * @return mixed
2495 1
   */
2496 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2497 1
  {
2498
    if (3 > func_num_args()) {
2499
      $var = filter_var($var, $filter);
2500
    } else {
2501
      $var = filter_var($var, $filter, $option);
2502
    }
2503
2504
    return self::filter($var);
2505
  }
2506
2507 1
  /**
2508 1
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2509 1
   *
2510 1
   * @param array $data
2511 1
   * @param mixed $definition
2512
   * @param bool  $add_empty
2513 1
   *
2514
   * @return mixed
2515
   */
2516 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2517
  {
2518
    if (2 > func_num_args()) {
2519
      $a = filter_var_array($data);
2520
    } else {
2521
      $a = filter_var_array($data, $definition, $add_empty);
2522
    }
2523 1
2524
    return self::filter($a);
2525 1
  }
2526
2527 1
  /**
2528 1
   * Checks if the number of Unicode characters in a string are not
2529
   * more than the specified integer.
2530
   *
2531 1
   * @param    string $str      The original string to be checked.
2532
   * @param    int    $box_size The size in number of chars to be checked against string.
2533 1
   *
2534 1
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2535 1
   */
2536 1
  public static function fits_inside($str, $box_size)
2537 1
  {
2538 1
    return (self::strlen($str) <= $box_size);
2539 1
  }
2540 1
2541 1
  /**
2542 1
   * Fixing a broken UTF-8 string.
2543 1
   *
2544
   * @param string $str
2545
   *
2546
   * @return string
2547
   */
2548
  public static function fix_simple_utf8($str)
2549
  {
2550
    static $brokenUtf8ToUtf8Keys = null;
2551
    static $brokenUtf8ToUtf8Values = null;
2552
2553
    $str = (string)$str;
2554
2555
    if (!isset($str[0])) {
2556
      return '';
2557
    }
2558
2559
    if ($brokenUtf8ToUtf8Keys === null) {
2560
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2561
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2562
    }
2563 1
2564 1
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2565
  }
2566
2567
  /**
2568
   * Fix a double (or multiple) encoded UTF8 string.
2569
   *
2570
   * @param array|string $str
2571
   *
2572
   * @return string
2573
   */
2574
  public static function fix_utf8($str)
2575
  {
2576
    if (is_array($str)) {
2577
2578
      foreach ($str as $k => $v) {
2579
        /** @noinspection AlterInForeachInspection */
2580
        $str[$k] = self::fix_utf8($v);
2581
      }
2582
2583
      return $str;
2584
    }
2585
2586
    $last = '';
2587
    while ($last !== $str) {
2588
      $last = $str;
2589
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2589 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2590
    }
2591
2592
    return $str;
2593
  }
2594
2595
  /**
2596
   * Get character of a specific character.
2597
   *
2598
   * @param   string $char Character.
2599
   *
2600
   * @return  string 'RTL' or 'LTR'
2601
   */
2602
  public static function getCharDirection($char)
2603
  {
2604
    // init
2605
    self::checkForSupport();
2606
2607
    if (self::$support['intlChar'] === true) {
2608
      $tmpReturn = \IntlChar::charDirection($char);
2609
2610
      // from "IntlChar"-Class
2611
      $charDirection = array(
2612
          'RTL' => array(1, 13, 14, 15, 21),
2613
          'LTR' => array(0, 11, 12, 20),
2614
      );
2615
2616
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2617
        return 'LTR';
2618
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2619
        return 'RTL';
2620
      }
2621
    }
2622
2623 2
    $c = static::chr_to_decimal($char);
2624
2625 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2626 2
      return 'LTR';
2627 2
    }
2628
2629
    if (0x85e >= $c) {
2630
2631
      if (0x5be === $c ||
2632
          0x5c0 === $c ||
2633
          0x5c3 === $c ||
2634
          0x5c6 === $c ||
2635
          (0x5d0 <= $c && 0x5ea >= $c) ||
2636
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2637
          0x608 === $c ||
2638
          0x60b === $c ||
2639
          0x60d === $c ||
2640 1
          0x61b === $c ||
2641
          (0x61e <= $c && 0x64a >= $c) ||
2642 1
          (0x66d <= $c && 0x66f >= $c) ||
2643 1
          (0x671 <= $c && 0x6d5 >= $c) ||
2644
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2645 1
          (0x6ee <= $c && 0x6ef >= $c) ||
2646 1
          (0x6fa <= $c && 0x70d >= $c) ||
2647
          0x710 === $c ||
2648
          (0x712 <= $c && 0x72f >= $c) ||
2649
          (0x74d <= $c && 0x7a5 >= $c) ||
2650 1
          0x7b1 === $c ||
2651
          (0x7c0 <= $c && 0x7ea >= $c) ||
2652 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2653 1
          0x7fa === $c ||
2654 1
          (0x800 <= $c && 0x815 >= $c) ||
2655
          0x81a === $c ||
2656 1
          0x824 === $c ||
2657 1
          0x828 === $c ||
2658 1
          (0x830 <= $c && 0x83e >= $c) ||
2659 1
          (0x840 <= $c && 0x858 >= $c) ||
2660 1
          0x85e === $c
2661
      ) {
2662 1
        return 'RTL';
2663
      }
2664 1
2665 1
    } elseif (0x200f === $c) {
2666
2667
      return 'RTL';
2668
2669 1
    } elseif (0xfb1d <= $c) {
2670 1
2671
      if (0xfb1d === $c ||
2672 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2673
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2674 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2675 1
          0xfb3e === $c ||
2676 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2677
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2678 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2679
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2680
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2681
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2682
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2683
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2684
          (0xfe76 <= $c && 0xfefc >= $c) ||
2685
          (0x10800 <= $c && 0x10805 >= $c) ||
2686
          0x10808 === $c ||
2687
          (0x1080a <= $c && 0x10835 >= $c) ||
2688
          (0x10837 <= $c && 0x10838 >= $c) ||
2689
          0x1083c === $c ||
2690
          (0x1083f <= $c && 0x10855 >= $c) ||
2691
          (0x10857 <= $c && 0x1085f >= $c) ||
2692
          (0x10900 <= $c && 0x1091b >= $c) ||
2693
          (0x10920 <= $c && 0x10939 >= $c) ||
2694
          0x1093f === $c ||
2695
          0x10a00 === $c ||
2696
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2697
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2698
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2699
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2700
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2701
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2702
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2703
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2704
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2705
          (0x10b78 <= $c && 0x10b7f >= $c)
2706
      ) {
2707
        return 'RTL';
2708 1
      }
2709
    }
2710 1
2711 1
    return 'LTR';
2712
  }
2713 1
2714 1
  /**
2715 1
   * get data from "/data/*.ser"
2716 1
   *
2717 1
   * @param string $file
2718 1
   *
2719
   * @return bool|string|array|int false on error
2720
   */
2721
  protected static function getData($file)
2722
  {
2723
    $file = __DIR__ . '/data/' . $file . '.php';
2724
    if (file_exists($file)) {
2725
      /** @noinspection PhpIncludeInspection */
2726
      return require $file;
2727
    } else {
2728
      return false;
2729
    }
2730
  }
2731
2732
  /**
2733
   * Creates a random string of UTF-8 characters.
2734
   *
2735
   * @param    int $len The length of string in characters.
2736
   *
2737
   * @return   string String consisting of random characters.
2738
   */
2739
  public static function hash($len = 8)
2740
  {
2741
    static $chars = array();
2742
    static $chars_len = null;
2743
2744
    if ($len <= 0) {
2745
      return '';
2746
    }
2747
2748
    // init
2749
    self::checkForSupport();
2750
2751
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2752
      if (self::$support['pcre_utf8'] === true) {
2753
        $chars = array_map(
2754
            array(
2755
                '\\voku\\helper\\UTF8',
2756
                'chr',
2757
            ),
2758
            range(48, 79)
2759
        );
2760
2761
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2762
2763
        $chars = array_values(array_filter($chars));
2764
      } else {
2765
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2766
      }
2767
2768
      $chars_len = count($chars);
2769
    }
2770
2771
    $hash = '';
2772
2773
    for (; $len; --$len) {
2774
      $hash .= $chars[mt_rand() % $chars_len];
2775
    }
2776
2777
    return $hash;
2778
  }
2779
2780
  /**
2781
   * Converts hexadecimal U+xxxx code point representation to Integer.
2782
   *
2783
   * INFO: opposite to UTF8::int_to_hex( )
2784
   *
2785
   * @param    string $str The hexadecimal code point representation.
2786
   *
2787
   * @return   int The code point, or 0 on failure.
2788
   */
2789
  public static function hex_to_int($str)
2790 15
  {
2791
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2792 15
      return intval($match[1], 16);
2793
    }
2794 15
2795 3
    return 0;
2796
  }
2797
2798 15
  /**
2799 4
   * alias for "UTF8::html_entity_decode()"
2800
   *
2801
   * @param string $str
2802 15
   * @param int    $flags
2803 3
   * @param string $encoding
2804 3
   *
2805 3
   * @return string
2806
   */
2807
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2808 3
  {
2809
    return self::html_entity_decode($str, $flags, $encoding);
2810
  }
2811 15
2812
  /**
2813 15
   * Converts a UTF-8 string to a series of HTML numbered entities.
2814
   *
2815
   * e.g.: &#123;&#39;&#1740;
2816 15
   *
2817 15
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2818 15
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2819
   * @param  string $encoding
2820 15
   *
2821
   * @return string HTML numbered entities.
2822 15
   */
2823
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2824 15
  {
2825
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2826
    if (function_exists('mb_encode_numericentity')) {
2827
2828
      $startCode = 0x00;
2829
      if ($keepAsciiChars === true) {
2830
        $startCode = 0x80;
2831
      }
2832
2833
      return mb_encode_numericentity(
2834 12
          $str,
2835
          array($startCode, 0xffff, 0, 0xffff,),
2836 12
          $encoding
2837
      );
2838 12
    }
2839
2840 12
    return implode(
2841 5
        array_map(
2842
            function ($data) use ($keepAsciiChars) {
2843
              return self::single_chr_html_encode($data, $keepAsciiChars);
2844 11
            },
2845
            self::split($str)
2846
        )
2847
    );
2848
  }
2849
2850
  /**
2851
   * UTF-8 version of html_entity_decode()
2852
   *
2853
   * The reason we are not using html_entity_decode() by itself is because
2854
   * while it is not technically correct to leave out the semicolon
2855
   * at the end of an entity most browsers will still interpret the entity
2856
   * correctly. html_entity_decode() does not convert entities without
2857
   * semicolons, so we are left with our own little solution here. Bummer.
2858
   *
2859
   * Convert all HTML entities to their applicable characters
2860
   *
2861
   * @link http://php.net/manual/en/function.html-entity-decode.php
2862
   *
2863
   * @param string $str      <p>
2864
   *                         The input string.
2865
   *                         </p>
2866
   * @param int    $flags    [optional] <p>
2867
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2868
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2869
   *                         <table>
2870
   *                         Available <i>flags</i> constants
2871
   *                         <tr valign="top">
2872
   *                         <td>Constant Name</td>
2873
   *                         <td>Description</td>
2874
   *                         </tr>
2875
   *                         <tr valign="top">
2876
   *                         <td><b>ENT_COMPAT</b></td>
2877
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2878
   *                         </tr>
2879
   *                         <tr valign="top">
2880
   *                         <td><b>ENT_QUOTES</b></td>
2881
   *                         <td>Will convert both double and single quotes.</td>
2882
   *                         </tr>
2883
   *                         <tr valign="top">
2884
   *                         <td><b>ENT_NOQUOTES</b></td>
2885
   *                         <td>Will leave both double and single quotes unconverted.</td>
2886
   *                         </tr>
2887
   *                         <tr valign="top">
2888
   *                         <td><b>ENT_HTML401</b></td>
2889
   *                         <td>
2890
   *                         Handle code as HTML 4.01.
2891
   *                         </td>
2892
   *                         </tr>
2893
   *                         <tr valign="top">
2894
   *                         <td><b>ENT_XML1</b></td>
2895
   *                         <td>
2896
   *                         Handle code as XML 1.
2897
   *                         </td>
2898
   *                         </tr>
2899
   *                         <tr valign="top">
2900
   *                         <td><b>ENT_XHTML</b></td>
2901
   *                         <td>
2902
   *                         Handle code as XHTML.
2903
   *                         </td>
2904
   *                         </tr>
2905
   *                         <tr valign="top">
2906
   *                         <td><b>ENT_HTML5</b></td>
2907
   *                         <td>
2908
   *                         Handle code as HTML 5.
2909
   *                         </td>
2910
   *                         </tr>
2911
   *                         </table>
2912
   *                         </p>
2913
   * @param string $encoding [optional] <p>
2914
   *                         Encoding to use.
2915
   *                         </p>
2916
   *
2917
   * @return string the decoded string.
2918
   */
2919
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2920
  {
2921
    $str = (string)$str;
2922
2923
    if (!isset($str[0])) {
2924
      return '';
2925
    }
2926
2927
    if (strpos($str, '&') === false) {
2928
      return $str;
2929
    }
2930
2931
    $encoding = self::normalizeEncoding($encoding);
2932
2933
    if ($flags === null) {
2934
      if (Bootup::is_php('5.4') === true) {
2935
        $flags = ENT_COMPAT | ENT_HTML5;
2936
      } else {
2937
        $flags = ENT_COMPAT;
2938
      }
2939
    }
2940
2941
    do {
2942
      $str_compare = $str;
2943
2944
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2945
2946
      // decode numeric & UTF16 two byte entities
2947
      $str = html_entity_decode(
2948
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2949
          $flags,
2950 2
          $encoding
2951
      );
2952 2
2953
    } while ($str_compare !== $str);
2954
2955
    return $str;
2956
  }
2957
2958
  /**
2959
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2960
   *
2961
   * @link http://php.net/manual/en/function.htmlentities.php
2962
   *
2963
   * @param string $str           <p>
2964
   *                              The input string.
2965
   *                              </p>
2966
   * @param int    $flags         [optional] <p>
2967
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2968
   *                              invalid code unit sequences and the used document type. The default is
2969
   *                              ENT_COMPAT | ENT_HTML401.
2970
   *                              <table>
2971
   *                              Available <i>flags</i> constants
2972
   *                              <tr valign="top">
2973
   *                              <td>Constant Name</td>
2974
   *                              <td>Description</td>
2975
   *                              </tr>
2976
   *                              <tr valign="top">
2977
   *                              <td><b>ENT_COMPAT</b></td>
2978
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2979
   *                              </tr>
2980
   *                              <tr valign="top">
2981
   *                              <td><b>ENT_QUOTES</b></td>
2982
   *                              <td>Will convert both double and single quotes.</td>
2983
   *                              </tr>
2984
   *                              <tr valign="top">
2985
   *                              <td><b>ENT_NOQUOTES</b></td>
2986
   *                              <td>Will leave both double and single quotes unconverted.</td>
2987
   *                              </tr>
2988
   *                              <tr valign="top">
2989
   *                              <td><b>ENT_IGNORE</b></td>
2990
   *                              <td>
2991
   *                              Silently discard invalid code unit sequences instead of returning
2992
   *                              an empty string. Using this flag is discouraged as it
2993
   *                              may have security implications.
2994
   *                              </td>
2995
   *                              </tr>
2996
   *                              <tr valign="top">
2997
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2998
   *                              <td>
2999
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3000
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3001
   *                              </td>
3002
   *                              </tr>
3003
   *                              <tr valign="top">
3004
   *                              <td><b>ENT_DISALLOWED</b></td>
3005
   *                              <td>
3006
   *                              Replace invalid code points for the given document type with a
3007
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3008
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3009
   *                              instance, to ensure the well-formedness of XML documents with
3010
   *                              embedded external content.
3011
   *                              </td>
3012
   *                              </tr>
3013
   *                              <tr valign="top">
3014
   *                              <td><b>ENT_HTML401</b></td>
3015
   *                              <td>
3016
   *                              Handle code as HTML 4.01.
3017
   *                              </td>
3018
   *                              </tr>
3019
   *                              <tr valign="top">
3020
   *                              <td><b>ENT_XML1</b></td>
3021
   *                              <td>
3022
   *                              Handle code as XML 1.
3023
   *                              </td>
3024
   *                              </tr>
3025
   *                              <tr valign="top">
3026
   *                              <td><b>ENT_XHTML</b></td>
3027
   *                              <td>
3028
   *                              Handle code as XHTML.
3029
   *                              </td>
3030
   *                              </tr>
3031
   *                              <tr valign="top">
3032
   *                              <td><b>ENT_HTML5</b></td>
3033
   *                              <td>
3034
   *                              Handle code as HTML 5.
3035
   *                              </td>
3036
   *                              </tr>
3037
   *                              </table>
3038
   *                              </p>
3039
   * @param string $encoding      [optional] <p>
3040
   *                              Like <b>htmlspecialchars</b>,
3041
   *                              <b>htmlentities</b> takes an optional third argument
3042
   *                              <i>encoding</i> which defines encoding used in
3043
   *                              conversion.
3044
   *                              Although this argument is technically optional, you are highly
3045
   *                              encouraged to specify the correct value for your code.
3046
   *                              </p>
3047
   * @param bool   $double_encode [optional] <p>
3048
   *                              When <i>double_encode</i> is turned off PHP will not
3049
   *                              encode existing html entities. The default is to convert everything.
3050
   *                              </p>
3051
   *
3052
   *
3053
   * @return string the encoded string.
3054
   * </p>
3055
   * <p>
3056
   * If the input <i>string</i> contains an invalid code unit
3057
   * sequence within the given <i>encoding</i> an empty string
3058
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3059
   * <b>ENT_SUBSTITUTE</b> flags are set.
3060
   */
3061
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3062 1
  {
3063
    $encoding = self::normalizeEncoding($encoding);
3064 1
3065
    return htmlentities($str, $flags, $encoding, $double_encode);
3066
  }
3067
3068
  /**
3069
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
3070
   *
3071
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3072 1
   *
3073
   * @param string $str           <p>
3074 1
   *                              The string being converted.
3075
   *                              </p>
3076
   * @param int    $flags         [optional] <p>
3077
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3078
   *                              invalid code unit sequences and the used document type. The default is
3079
   *                              ENT_COMPAT | ENT_HTML401.
3080
   *                              <table>
3081
   *                              Available <i>flags</i> constants
3082
   *                              <tr valign="top">
3083
   *                              <td>Constant Name</td>
3084
   *                              <td>Description</td>
3085
   *                              </tr>
3086
   *                              <tr valign="top">
3087
   *                              <td><b>ENT_COMPAT</b></td>
3088
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3089
   *                              </tr>
3090
   *                              <tr valign="top">
3091
   *                              <td><b>ENT_QUOTES</b></td>
3092
   *                              <td>Will convert both double and single quotes.</td>
3093
   *                              </tr>
3094
   *                              <tr valign="top">
3095
   *                              <td><b>ENT_NOQUOTES</b></td>
3096
   *                              <td>Will leave both double and single quotes unconverted.</td>
3097
   *                              </tr>
3098
   *                              <tr valign="top">
3099
   *                              <td><b>ENT_IGNORE</b></td>
3100
   *                              <td>
3101
   *                              Silently discard invalid code unit sequences instead of returning
3102
   *                              an empty string. Using this flag is discouraged as it
3103 1
   *                              may have security implications.
3104
   *                              </td>
3105 1
   *                              </tr>
3106
   *                              <tr valign="top">
3107
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3108
   *                              <td>
3109
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3110
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3111
   *                              </td>
3112
   *                              </tr>
3113
   *                              <tr valign="top">
3114
   *                              <td><b>ENT_DISALLOWED</b></td>
3115 1
   *                              <td>
3116
   *                              Replace invalid code points for the given document type with a
3117 1
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3118
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3119
   *                              instance, to ensure the well-formedness of XML documents with
3120
   *                              embedded external content.
3121
   *                              </td>
3122
   *                              </tr>
3123
   *                              <tr valign="top">
3124
   *                              <td><b>ENT_HTML401</b></td>
3125
   *                              <td>
3126
   *                              Handle code as HTML 4.01.
3127 1
   *                              </td>
3128
   *                              </tr>
3129 1
   *                              <tr valign="top">
3130
   *                              <td><b>ENT_XML1</b></td>
3131
   *                              <td>
3132
   *                              Handle code as XML 1.
3133
   *                              </td>
3134
   *                              </tr>
3135
   *                              <tr valign="top">
3136
   *                              <td><b>ENT_XHTML</b></td>
3137
   *                              <td>
3138
   *                              Handle code as XHTML.
3139
   *                              </td>
3140
   *                              </tr>
3141
   *                              <tr valign="top">
3142
   *                              <td><b>ENT_HTML5</b></td>
3143
   *                              <td>
3144
   *                              Handle code as HTML 5.
3145
   *                              </td>
3146
   *                              </tr>
3147
   *                              </table>
3148
   *                              </p>
3149
   * @param string $encoding      [optional] <p>
3150
   *                              Defines encoding used in conversion.
3151
   *                              </p>
3152
   *                              <p>
3153
   *                              For the purposes of this function, the encodings
3154
   *                              ISO-8859-1, ISO-8859-15,
3155
   *                              UTF-8, cp866,
3156
   *                              cp1251, cp1252, and
3157
   *                              KOI8-R are effectively equivalent, provided the
3158
   *                              <i>string</i> itself is valid for the encoding, as
3159
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3160
   *                              the same positions in all of these encodings.
3161
   *                              </p>
3162
   * @param bool   $double_encode [optional] <p>
3163
   *                              When <i>double_encode</i> is turned off PHP will not
3164
   *                              encode existing html entities, the default is to convert everything.
3165
   *                              </p>
3166
   *
3167
   * @return string The converted string.
3168
   * </p>
3169
   * <p>
3170
   * If the input <i>string</i> contains an invalid code unit
3171
   * sequence within the given <i>encoding</i> an empty string
3172
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3173
   * <b>ENT_SUBSTITUTE</b> flags are set.
3174
   */
3175
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3176
  {
3177
    $encoding = self::normalizeEncoding($encoding);
3178
3179 16
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3180
  }
3181 16
3182
  /**
3183
   * checks whether iconv is available on the server
3184
   *
3185
   * @return   bool True if available, False otherwise
3186
   */
3187
  public static function iconv_loaded()
3188
  {
3189
    return extension_loaded('iconv') ? true : false;
3190
  }
3191
3192 4
  /**
3193
   * Converts Integer to hexadecimal U+xxxx code point representation.
3194 4
   *
3195
   * @param    int    $int The integer to be converted to hexadecimal code point.
3196
   * @param    string $pfix
3197
   *
3198
   * @return   string The code point, or empty string on failure.
3199
   */
3200
  public static function int_to_hex($int, $pfix = 'U+')
3201
  {
3202
    if (ctype_digit((string)$int)) {
3203
      $hex = dechex((int)$int);
3204 1
3205
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3206 1
3207
      return $pfix . $hex;
3208 1
    }
3209 1
3210
    return '';
3211
  }
3212 1
3213 1
  /**
3214
   * checks whether intl is available on the server
3215 1
   *
3216
   * @return   bool True if available, False otherwise
3217
   */
3218
  public static function intl_loaded()
3219
  {
3220
    return extension_loaded('intl') ? true : false;
3221
  }
3222
3223
  /**
3224
   * checks whether intl-char is available on the server
3225
   *
3226 4
   * @return   bool True if available, False otherwise
3227
   */
3228
  public static function intlChar_loaded()
3229 4
  {
3230
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3231
  }
3232 4
3233
  /**
3234 4
   * alias for "UTF8::is_ascii()"
3235 4
   *
3236 4
   * @param string $str
3237 4
   *
3238 3
   * @return boolean
3239
   */
3240 4
  public static function isAscii($str)
3241
  {
3242
    return self::is_ascii($str);
3243
  }
3244
3245
  /**
3246
   * alias for "UTF8::is_base64"
3247
   *
3248
   * @param string $str
3249
   *
3250
   * @return bool
3251
   */
3252
  public static function isBase64($str)
3253
  {
3254
    return self::is_base64($str);
3255
  }
3256
3257
  /**
3258
   * alias for "UTF8::is_bom"
3259
   *
3260
   * @param string $utf8_chr
3261
   *
3262
   * @return boolean
3263
   */
3264
  public static function isBom($utf8_chr)
3265
  {
3266
    return self::is_bom($utf8_chr);
3267
  }
3268
3269
  /**
3270
   * Try to check if a string is a json-string...
3271
   *
3272
   * @param $str
3273 2
   *
3274
   * @return bool
3275 2
   */
3276
  public static function isJson($str)
3277
  {
3278
    $str = (string)$str;
3279
3280
    if (!isset($str[0])) {
3281
      return false;
3282
    }
3283
3284
    if (
3285 2
        is_object(json_decode($str))
3286
        &&
3287 2
        json_last_error() === JSON_ERROR_NONE
3288 2
    ) {
3289
      return true;
3290 2
    } else {
3291 2
      return false;
3292 2
    }
3293 2
  }
3294 2
3295 2
  /**
3296 2
   * check if string contains any html-tags <lall>
3297 2
   *
3298 2
   * @param string $str
3299 1
   *
3300 1
   * @return boolean
3301 2
   */
3302 2
  public static function isHtml($str)
3303 2
  {
3304
    $str = (string)$str;
3305 2
3306 2
    if (!isset($str[0])) {
3307 2
      return false;
3308 2
    }
3309 2
3310 2
    // init
3311 2
    $matches = array();
3312 2
3313 2
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3314 1
3315 1
    if (count($matches) == 0) {
3316 2
      return false;
3317 2
    } else {
3318 2
      return true;
3319
    }
3320 2
  }
3321 1
3322 1
  /**
3323
   * alias for "UTF8::is_utf8"
3324 1
   *
3325
   * @param string $str
3326
   *
3327
   * @return bool
3328 2
   */
3329
  public static function isUtf8($str)
3330 2
  {
3331
    return self::is_utf8($str);
3332
  }
3333
3334
  /**
3335
   * Checks if a string is 7 bit ASCII.
3336
   *
3337
   * @param    string $str The string to check.
3338
   *
3339
   * @return   bool <strong>true</strong> if it is ASCII<br />
3340 2
   *                <strong>false</strong> otherwise
3341
   */
3342 2
  public static function is_ascii($str)
3343 2
  {
3344
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3345 2
  }
3346 2
3347 2
  /**
3348 2
   * Returns true if the string is base64 encoded, false otherwise.
3349 2
   *
3350 2
   * @param string $str
3351 2
   *
3352 2
   * @return bool Whether or not $str is base64 encoded
3353 2
   */
3354
  public static function is_base64($str)
3355
  {
3356 2
    $str = (string)$str;
3357 2
3358 2
    if (!isset($str[0])) {
3359
      return false;
3360 2
    }
3361 2
3362 2
    if (base64_encode(base64_decode($str, true)) === $str) {
3363 1
      return true;
3364 1
    } else {
3365 1
      return false;
3366 1
    }
3367 1
  }
3368 1
3369
  /**
3370
   * Check if the input is binary... (is look like a hack)
3371 1
   *
3372 1
   * @param string $input
3373 1
   *
3374
   * @return bool
3375 2
   */
3376
  public static function is_binary($input)
3377
  {
3378
3379
    $testLength = strlen($input);
3380
3381
    if (
3382
        preg_match('~^[01]+$~', $input)
3383 2
        ||
3384
        substr_count($input, "\x00") > 0
3385 2
        ||
3386
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3387
    ) {
3388
      return true;
3389
    } else {
3390
      return false;
3391
    }
3392
  }
3393
3394
  /**
3395
   * Check if the file is binary.
3396
   *
3397 34
   * @param string $file
3398
   *
3399 34
   * @return boolean
3400
   */
3401 34
  public static function is_binary_file($file)
3402 3
  {
3403
    try {
3404
      $fp = fopen($file, 'r');
3405 32
      $block = fread($fp, 512);
3406
      fclose($fp);
3407
    } catch (\Exception $e) {
3408
      $block = '';
3409
    }
3410
3411
    return self::is_binary($block);
3412
  }
3413
3414
  /**
3415 32
   * Checks if the given string is an "Byte Order Mark".
3416
   *
3417 32
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3418 32
   *
3419 32
   * @param    string $str The input string.
3420
   *
3421
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3422 32
   */
3423 32
  public static function is_bom($str)
3424 32
  {
3425
    foreach (self::$bom as $bomString => $bomByteLength) {
3426
      if ($str === $bomString) {
3427 32
        return true;
3428
      }
3429 30
    }
3430 32
3431
    return false;
3432 28
  }
3433 28
3434 28
  /**
3435 28
   * Check if the string is UTF-16.
3436 30
   *
3437
   * @param string $str
3438 13
   *
3439 13
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3440 13
   */
3441 13 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3442 23
  {
3443
    if (self::is_binary($str)) {
3444 6
      self::checkForSupport();
3445 6
3446 6
      $maybeUTF16LE = 0;
3447 6
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3448 12
      if ($test !== false && strlen($test) > 1) {
3449
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3450
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3451
        if ($test3 === $test) {
3452
          $strChars = self::count_chars($str);
3453
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3454
            if (in_array($test3char, $strChars, true) === true) {
3455
              $maybeUTF16LE++;
3456
            }
3457 3
          }
3458 3
        }
3459 3
      }
3460 3
3461 7
      $maybeUTF16BE = 0;
3462
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3463 3
      if ($test !== false && strlen($test) > 1) {
3464 3
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3465 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3466 3
        if ($test3 === $test) {
3467 3
          $strChars = self::count_chars($str);
3468
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3469
            if (in_array($test3char, $strChars, true) === true) {
3470
              $maybeUTF16BE++;
3471 3
            }
3472
          }
3473 32
        }
3474
      }
3475
3476 30
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3477
        if ($maybeUTF16LE > $maybeUTF16BE) {
3478 28
          return 1;
3479 28
        } else {
3480 28
          return 2;
3481 28
        }
3482
      }
3483
3484
    }
3485
3486 28
    return false;
3487
  }
3488
3489
  /**
3490
   * Check if the string is UTF-32.
3491
   *
3492 28
   * @param string $str
3493 28
   *
3494 28
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3495 28
   */
3496 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3497 28
  {
3498
    if (self::is_binary($str)) {
3499 28
      self::checkForSupport();
3500 28
3501 5
      $maybeUTF32LE = 0;
3502
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3503
      if ($test !== false && strlen($test) > 1) {
3504 28
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3505 28
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3506 28
        if ($test3 === $test) {
3507 28
          $strChars = self::count_chars($str);
3508 28
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3509
            if (in_array($test3char, $strChars, true) === true) {
3510
              $maybeUTF32LE++;
3511
            }
3512
          }
3513 13
        }
3514
      }
3515
3516 32
      $maybeUTF32BE = 0;
3517
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3518 14
      if ($test !== false && strlen($test) > 1) {
3519
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3520
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3521
        if ($test3 === $test) {
3522
          $strChars = self::count_chars($str);
3523
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3524
            if (in_array($test3char, $strChars, true) === true) {
3525
              $maybeUTF32BE++;
3526
            }
3527
          }
3528
        }
3529
      }
3530
3531
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3532
        if ($maybeUTF32LE > $maybeUTF32BE) {
3533
          return 1;
3534
        } else {
3535
          return 2;
3536
        }
3537
      }
3538
3539
    }
3540
3541
    return false;
3542
  }
3543
3544
  /**
3545
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3546
   *
3547
   * @see    http://hsivonen.iki.fi/php-utf8/
3548
   *
3549
   * @param    string $str The string to be checked.
3550
   *
3551
   * @return   bool
3552
   */
3553
  public static function is_utf8($str)
3554
  {
3555
    $str = (string)$str;
3556
3557
    if (!isset($str[0])) {
3558 2
      return true;
3559
    }
3560 2
3561
    if (self::pcre_utf8_support() !== true) {
3562 2
3563 2
      // If even just the first character can be matched, when the /u
3564 2
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3565
      // invalid, nothing at all will match, even if the string contains
3566
      // some valid sequences
3567
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3568 2
3569
    } else {
3570
3571
      $mState = 0; // cached expected number of octets after the current octet
3572
      // until the beginning of the next UTF8 character sequence
3573
      $mUcs4 = 0; // cached Unicode character
3574
      $mBytes = 1; // cached expected number of octets in the current sequence
3575
      $len = strlen($str);
3576
3577
      /** @noinspection ForeachInvariantsInspection */
3578
      for ($i = 0; $i < $len; $i++) {
3579
        $in = ord($str[$i]);
3580
        if ($mState === 0) {
3581
          // When mState is zero we expect either a US-ASCII character or a
3582
          // multi-octet sequence.
3583
          if (0 === (0x80 & $in)) {
3584
            // US-ASCII, pass straight through.
3585
            $mBytes = 1;
3586 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3587
            // First octet of 2 octet sequence.
3588
            $mUcs4 = $in;
3589
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3590
            $mState = 1;
3591
            $mBytes = 2;
3592
          } elseif (0xE0 === (0xF0 & $in)) {
3593
            // First octet of 3 octet sequence.
3594
            $mUcs4 = $in;
3595
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3596
            $mState = 2;
3597
            $mBytes = 3;
3598 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3599
            // First octet of 4 octet sequence.
3600
            $mUcs4 = $in;
3601
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3602
            $mState = 3;
3603
            $mBytes = 4;
3604
          } elseif (0xF8 === (0xFC & $in)) {
3605
            /* First octet of 5 octet sequence.
3606
            *
3607 1
            * This is illegal because the encoded codepoint must be either
3608
            * (a) not the shortest form or
3609 1
            * (b) outside the Unicode range of 0-0x10FFFF.
3610
            * Rather than trying to resynchronize, we will carry on until the end
3611 1
            * of the sequence and let the later error handling code catch it.
3612
            */
3613
            $mUcs4 = $in;
3614 1
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3615
            $mState = 4;
3616
            $mBytes = 5;
3617 1 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3618
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3619
            $mUcs4 = $in;
3620
            $mUcs4 = ($mUcs4 & 1) << 30;
3621
            $mState = 5;
3622
            $mBytes = 6;
3623
          } else {
3624
            /* Current octet is neither in the US-ASCII range nor a legal first
3625
             * octet of a multi-octet sequence.
3626
             */
3627 6
            return false;
3628
          }
3629 6
        } else {
3630
          // When mState is non-zero, we expect a continuation of the multi-octet
3631
          // sequence
3632
          if (0x80 === (0xC0 & $in)) {
3633
            // Legal continuation.
3634
            $shift = ($mState - 1) * 6;
3635
            $tmp = $in;
3636
            $tmp = ($tmp & 0x0000003F) << $shift;
3637
            $mUcs4 |= $tmp;
3638
            /**
3639
             * End of the multi-octet sequence. mUcs4 now contains the final
3640
             * Unicode code point to be output
3641
             */
3642 24
            if (0 === --$mState) {
3643
              /*
3644 24
              * Check for illegal sequences and code points.
3645
              */
3646 24
              // From Unicode 3.1, non-shortest form is illegal
3647 2
              if (
3648
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3649
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3650 23
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3651
                  (4 < $mBytes) ||
3652 23
                  // From Unicode 3.2, surrogate characters are illegal.
3653
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3654
                  // Code points outside the Unicode range are illegal.
3655
                  ($mUcs4 > 0x10FFFF)
3656
              ) {
3657
                return false;
3658
              }
3659
              // initialize UTF8 cache
3660
              $mState = 0;
3661
              $mUcs4 = 0;
3662 1
              $mBytes = 1;
3663
            }
3664 1
          } else {
3665
            /**
3666
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3667
             * Incomplete multi-octet sequence.
3668 1
             */
3669
            return false;
3670
          }
3671
        }
3672
      }
3673
3674
      return true;
3675
    }
3676
  }
3677
3678
  /**
3679 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3680
   * Decodes a JSON string
3681 1
   *
3682 1
   * @link http://php.net/manual/en/function.json-decode.php
3683 1
   *
3684
   * @param string $json    <p>
3685 1
   *                        The <i>json</i> string being decoded.
3686
   *                        </p>
3687
   *                        <p>
3688
   *                        This function only works with UTF-8 encoded strings.
3689
   *                        </p>
3690
   *                        <p>PHP implements a superset of
3691
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3692
   *                        only supports these values when they are nested inside an array or an object.
3693
   *                        </p>
3694 2
   * @param bool   $assoc   [optional] <p>
3695
   *                        When <b>TRUE</b>, returned objects will be converted into
3696 2
   *                        associative arrays.
3697
   *                        </p>
3698 2
   * @param int    $depth   [optional] <p>
3699 2
   *                        User specified recursion depth.
3700 2
   *                        </p>
3701
   * @param int    $options [optional] <p>
3702 2
   *                        Bitmask of JSON decode options. Currently only
3703
   *                        <b>JSON_BIGINT_AS_STRING</b>
3704
   *                        is supported (default is to cast large integers as floats)
3705
   *                        </p>
3706
   *
3707
   * @return mixed the value encoded in <i>json</i> in appropriate
3708
   * PHP type. Values true, false and
3709
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3710
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3711
   * <i>json</i> cannot be decoded or if the encoded
3712 1
   * data is deeper than the recursion limit.
3713
   */
3714 1
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3715
  {
3716
    $json = self::filter($json);
3717
3718 1
    if (Bootup::is_php('5.4') === true) {
3719
      $json = json_decode($json, $assoc, $depth, $options);
3720
    } else {
3721
      $json = json_decode($json, $assoc, $depth);
3722
    }
3723
3724
    return $json;
3725
  }
3726
3727
  /**
3728 13
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3729
   * Returns the JSON representation of a value
3730 13
   *
3731
   * @link http://php.net/manual/en/function.json-encode.php
3732 13
   *
3733
   * @param mixed $value   <p>
3734
   *                       The <i>value</i> being encoded. Can be any type except
3735 13
   *                       a resource.
3736 13
   *                       </p>
3737 13
   *                       <p>
3738 13
   *                       All string data must be UTF-8 encoded.
3739 13
   *                       </p>
3740 13
   *                       <p>PHP implements a superset of
3741 13
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3742 13
   *                       only supports these values when they are nested inside an array or an object.
3743 13
   *                       </p>
3744 13
   * @param int   $options [optional] <p>
3745 13
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3746 13
   *                       <b>JSON_HEX_TAG</b>,
3747 13
   *                       <b>JSON_HEX_AMP</b>,
3748 13
   *                       <b>JSON_HEX_APOS</b>,
3749
   *                       <b>JSON_NUMERIC_CHECK</b>,
3750 13
   *                       <b>JSON_PRETTY_PRINT</b>,
3751 2
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3752
   *                       <b>JSON_FORCE_OBJECT</b>,
3753
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3754 13
   *                       constants is described on
3755
   *                       the JSON constants page.
3756
   *                       </p>
3757
   * @param int   $depth   [optional] <p>
3758
   *                       Set the maximum depth. Must be greater than zero.
3759
   *                       </p>
3760
   *
3761
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3762
   */
3763
  public static function json_encode($value, $options = 0, $depth = 512)
3764 2
  {
3765
    $value = self::filter($value);
3766 2
3767 2
    if (Bootup::is_php('5.5')) {
3768
      $json = json_encode($value, $options, $depth);
3769 2
    } else {
3770 1
      $json = json_encode($value, $options);
3771 1
    }
3772 1
3773
    return $json;
3774 2
  }
3775
3776
  /**
3777
   * Makes string's first char lowercase.
3778
   *
3779
   * @param    string $str The input string
3780
   *
3781
   * @return   string The resulting string
3782
   */
3783
  public static function lcfirst($str)
3784
  {
3785
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3786 8
  }
3787
3788 8
  /**
3789 8
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3790
   *
3791 8
   * WARNING: This is much slower then "ltrim()" !!!!
3792
   *
3793 8
   * @param    string $str   The string to be trimmed
3794
   * @param    string $chars Optional characters to be stripped
3795 2
   *
3796
   * @return   string The string with unwanted characters stripped from the left
3797 2
   */
3798 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3799 1
  {
3800 1
    $str = (string)$str;
3801
3802 2
    if (!isset($str[0])) {
3803 2
      return '';
3804
    }
3805 8
3806 8
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3807 1
3808 1
    return preg_replace("/^{$chars}+/u", '', $str);
3809
  }
3810 8
3811 8
  /**
3812
   * Returns the UTF-8 character with the maximum code point in the given data.
3813 8
   *
3814
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3815
   *
3816
   * @return   string The character with the highest code point than others.
3817
   */
3818 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3819
  {
3820
    if (is_array($arg)) {
3821
      $arg = implode($arg);
3822
    }
3823
3824
    return self::chr(max(self::codepoints($arg)));
3825
  }
3826 1
3827
  /**
3828 1
   * Calculates and returns the maximum number of bytes taken by any
3829 1
   * UTF-8 encoded character in the given string.
3830
   *
3831
   * @param    string $str The original Unicode string.
3832
   *
3833
   * @return   int An array of byte lengths of each character.
3834
   */
3835
  public static function max_chr_width($str)
3836
  {
3837
    $bytes = self::chr_size_list($str);
3838
    if (count($bytes) > 0) {
3839
      return (int)max($bytes);
3840
    } else {
3841
      return 0;
3842 1
    }
3843
  }
3844 1
3845
  /**
3846
   * checks whether mbstring is available on the server
3847
   *
3848
   * @return   bool True if available, False otherwise
3849
   */
3850
  public static function mbstring_loaded()
3851
  {
3852
    $return = extension_loaded('mbstring');
3853
3854
    if ($return === true) {
3855 15
      \mb_internal_encoding('UTF-8');
3856
    }
3857 15
3858 2
    return $return;
3859
  }
3860
3861 14
  /**
3862 14
   * Returns the UTF-8 character with the minimum code point in the given data.
3863
   *
3864 14
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3865 2
   *
3866
   * @return   string The character with the lowest code point than others.
3867
   */
3868 13 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3869 7
  {
3870
    if (is_array($arg)) {
3871
      $arg = implode($arg);
3872 12
    }
3873 8
3874
    return self::chr(min(self::codepoints($arg)));
3875
  }
3876 10
3877
  /**
3878
   * Normalize the encoding-name input.
3879
   *
3880
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3881
   *
3882
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3883
   */
3884
  public static function normalizeEncoding($encoding)
3885
  {
3886
    static $staticNormalizeEncodingCache = array();
3887
3888
    if (!$encoding) {
3889
      return $encoding;
3890
    }
3891
3892
    if ('UTF-8' === $encoding) {
3893
      return $encoding;
3894
    }
3895
3896
    if (in_array($encoding, self::$iconvEncoding, true)) {
3897 1
      return $encoding;
3898
    }
3899
3900 1
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3901
      return $staticNormalizeEncodingCache[$encoding];
3902 1
    }
3903
3904 1
    $encodingOrig = $encoding;
3905 1
    $encoding = strtoupper($encoding);
3906
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3907
3908
    $equivalences = array(
3909
        'ISO88591'    => 'ISO-8859-1',
3910
        'ISO8859'     => 'ISO-8859-1',
3911
        'ISO'         => 'ISO-8859-1',
3912 33
        'LATIN1'      => 'ISO-8859-1',
3913
        'LATIN'       => 'ISO-8859-1',
3914
        'UTF16'       => 'UTF-16',
3915 33
        'UTF32'       => 'UTF-32',
3916
        'UTF8'        => 'UTF-8',
3917
        'UTF'         => 'UTF-8',
3918
        'UTF7'        => 'UTF-7',
3919
        'WIN1252'     => 'ISO-8859-1',
3920
        'WINDOWS1252' => 'ISO-8859-1',
3921
        '8BIT'        => 'CP850',
3922
        'BINARY'      => 'CP850',
3923
    );
3924
3925
    if (!empty($equivalences[$encodingUpperHelper])) {
3926 1
      $encoding = $equivalences[$encodingUpperHelper];
3927
    }
3928 1
3929 1
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3930
3931
    return $encoding;
3932 1
  }
3933
3934 1
  /**
3935
   * Normalize MS Word special characters.
3936
   *
3937 1
   * @param string $str The string to be normalized.
3938
   *
3939
   * @return string
3940 1
   */
3941
  public static function normalize_msword($str)
3942
  {
3943
    static $utf8MSWordKeys = null;
3944 1
    static $utf8MSWordValues = null;
3945
3946 1
    if ($utf8MSWordKeys === null) {
3947
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3948
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3949 1
    }
3950
3951
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3952 1
  }
3953
3954
  /**
3955
   * Normalize the whitespace.
3956 1
   *
3957
   * @param string $str                     The string to be normalized.
3958 1
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3959 1
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3960 1
   *
3961 1
   * @return string
3962 1
   */
3963
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3964
  {
3965
    static $whitespaces = array();
3966
    static $bidiUniCodeControls = null;
3967
3968
    $cacheKey = (int)$keepNonBreakingSpace;
3969
3970
    if (!isset($whitespaces[$cacheKey])) {
3971
3972
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3973
3974
      if ($keepNonBreakingSpace === true) {
3975 7
        /** @noinspection OffsetOperationsInspection */
3976
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3977 7
      }
3978
3979
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3980 7
    }
3981 2
3982 2
    if ($keepBidiUnicodeControls === false) {
3983 7
      if ($bidiUniCodeControls === null) {
3984
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3985 7
      }
3986
3987
      $str = str_replace($bidiUniCodeControls, '', $str);
3988 3
    }
3989 1
3990 1
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3991
  }
3992
3993
  /**
3994 3
   * Format a number with grouped thousands.
3995 1
   *
3996 1
   * @param float  $number
3997 3
   * @param int    $decimals
3998
   * @param string $dec_point
3999 7
   * @param string $thousands_sep
4000
   *
4001
   * @return string
4002 3
   */
4003 1
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4004 1
  {
4005
    $thousands_sep = (string)$thousands_sep;
4006
    $dec_point = (string)$dec_point;
4007
4008 3
    if (
4009 1
        isset($thousands_sep[1], $dec_point[1])
4010 1
        &&
4011 3
        Bootup::is_php('5.4') === true
4012
    ) {
4013 7
      return str_replace(
4014
          array(
4015
              '.',
4016
              ',',
4017
          ),
4018
          array(
4019
              $dec_point,
4020
              $thousands_sep,
4021
          ),
4022
          number_format($number, $decimals, '.', ',')
4023
      );
4024 1
    }
4025
4026 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4027 1
  }
4028 1
4029
  /**
4030 1
   * Calculates Unicode code point of the given UTF-8 encoded character.
4031 1
   *
4032 1
   * @param    string $s The character of which to calculate code point.
4033 1
   *
4034 1
   * @return   int Unicode code point of the given character,<br />
4035
   *           0 on invalid UTF-8 byte sequence.
4036 1
   */
4037
  public static function ord($s)
4038
  {
4039
    if (!$s && $s !== '0') {
4040
      return 0;
4041
    }
4042
4043
    // init
4044
    self::checkForSupport();
4045
4046
    if (self::$support['intlChar'] === true) {
4047
      $tmpReturn = \IntlChar::ord($s);
4048
      if ($tmpReturn) {
4049
        return $tmpReturn;
4050
      }
4051
    }
4052 36
4053
    $s = unpack('C*', substr($s, 0, 4));
4054
    $a = $s ? $s[1] : 0;
4055 36
4056
    if (0xF0 <= $a && isset($s[4])) {
4057
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
4058
    }
4059 36
4060 36
    if (0xE0 <= $a && isset($s[3])) {
4061 36
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
4062 36
    }
4063
4064 36
    if (0xC0 <= $a && isset($s[2])) {
4065
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
4066
    }
4067 36
4068 36
    return $a;
4069
  }
4070 36
4071
  /**
4072
   * Parses the string into variables.
4073
   *
4074
   * WARNING: This differs from parse_str() by returning the results
4075
   *    instead of placing them in the local scope!
4076
   *
4077
   * @link http://php.net/manual/en/function.parse-str.php
4078
   *
4079
   * @param string $str     <p>
4080
   *                        The input string.
4081 36
   *                        </p>
4082
   * @param array  $result  <p>
4083 36
   *                        If the second parameter arr is present,
4084
   *                        variables are stored in this variable as array elements instead.
4085 36
   *                        </p>
4086 36
   *
4087 36
   * @return void
4088
   */
4089 36
  public static function parse_str($str, &$result)
4090 36
  {
4091 36
    // init
4092
    self::checkForSupport();
4093 36
4094
    $str = self::filter($str);
4095
4096
    \mb_parse_str($str, $result);
4097
  }
4098
4099
  /**
4100
   * checks if \u modifier is available that enables Unicode support in PCRE.
4101
   *
4102
   * @return   bool True if support is available, false otherwise
4103
   */
4104
  public static function pcre_utf8_support()
4105
  {
4106 23
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4107
    return (bool)@preg_match('//u', '');
4108 23
  }
4109
4110 23
  /**
4111 5
   * Create an array containing a range of UTF-8 characters.
4112
   *
4113
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4114 19
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4115
   *
4116 19
   * @return   array
4117
   */
4118
  public static function range($var1, $var2)
4119
  {
4120
    if (!$var1 || !$var2) {
4121
      return array();
4122
    }
4123
4124 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4125
      $start = (int)$var1;
4126
    } elseif (ctype_xdigit($var1)) {
4127 40
      $start = (int)self::hex_to_int($var1);
4128
    } else {
4129 40
      $start = self::ord($var1);
4130
    }
4131 40
4132
    if (!$start) {
4133 40
      return array();
4134 30
    }
4135
4136 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4137 16
      $end = (int)$var2;
4138
    } elseif (ctype_xdigit($var2)) {
4139 16
      $end = (int)self::hex_to_int($var2);
4140 15
    } else {
4141
      $end = self::ord($var2);
4142 15
    }
4143 14
4144 15
    if (!$end) {
4145 1
      return array();
4146 1
    }
4147
4148
    return array_map(
4149 16
        array(
4150
            '\\voku\\helper\\UTF8',
4151 16
            'chr',
4152
        ),
4153 16
        range($start, $end)
4154 16
    );
4155 16
  }
4156
4157
  /**
4158
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4159 16
   *
4160
   * @param string $str
4161 16
   *
4162
   * @return string
4163
   */
4164
  public static function removeBOM($str = '')
4165
  {
4166
    foreach (self::$bom as $bomString => $bomByteLength) {
4167
      if (0 === strpos($str, $bomString)) {
4168
        $str = substr($str, $bomByteLength);
4169
      }
4170
    }
4171
4172
    return $str;
4173
  }
4174
4175
  /**
4176
   * Removes duplicate occurrences of a string in another string.
4177
   *
4178
   * @param    string       $str  The base string
4179
   * @param    string|array $what String to search for in the base string
4180
   *
4181 2
   * @return   string The result string with removed duplicates
4182
   */
4183 2
  public static function remove_duplicates($str, $what = ' ')
4184 1
  {
4185
    if (is_string($what)) {
4186
      $what = array($what);
4187 2
    }
4188
4189
    if (is_array($what)) {
4190
      foreach ($what as $item) {
4191
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4192
      }
4193
    }
4194
4195
    return $str;
4196
  }
4197
4198
  /**
4199 25
   * Remove Invisible Characters
4200
   *
4201 25
   * This prevents sandwiching null characters
4202
   * between ascii characters, like Java\0script.
4203 25
   *
4204 5
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4205
   *
4206
   * @param  string $str
4207
   * @param  bool   $url_encoded
4208 24
   * @param  string $replacement
4209 24
   *
4210 24
   * @return  string
4211
   */
4212 24
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4213
  {
4214 24
    // init
4215
    $non_displayables = array();
4216
4217
    // every control character except newline (dec 10),
4218 24
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4219 24
    if ($url_encoded) {
4220 24
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4221 24
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4222 24
    }
4223
4224 24
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4225
4226
    do {
4227
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4228
    } while ($count !== 0);
4229
4230
    return $str;
4231
  }
4232
4233
  /**
4234
   * replace diamond question mark (�)
4235
   *
4236
   * @param string $str
4237
   * @param string $unknown
4238
   *
4239
   * @return string
4240
   */
4241
  public static function replace_diamond_question_mark($str, $unknown = '?')
4242
  {
4243
    return str_replace(
4244
        array(
4245
            "\xEF\xBF\xBD",
4246
            '�',
4247
        ),
4248
        array(
4249
            $unknown,
4250
            $unknown,
4251
        ),
4252
        $str
4253
    );
4254
  }
4255
4256 24
  /**
4257 5
   * Strip whitespace or other characters from end of a UTF-8 string.
4258
   *
4259 5
   * WARNING: This is much slower then "rtrim()" !!!!
4260 5
   *
4261
   * @param    string $str   The string to be trimmed
4262 24
   * @param    string $chars Optional characters to be stripped
4263
   *
4264
   * @return   string The string with unwanted characters stripped from the right
4265
   */
4266 24 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4267
  {
4268
    $str = (string)$str;
4269
4270
    if (!isset($str[0])) {
4271
      return '';
4272
    }
4273
4274
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4275
4276
    return preg_replace("/{$chars}+$/u", '', $str);
4277 3
  }
4278
4279
  /**
4280
   * rxClass
4281
   *
4282
   * @param string $s
4283
   * @param string $class
4284 3
   *
4285 2
   * @return string
4286 1
   */
4287 2
  protected static function rxClass($s, $class = '')
4288 1
  {
4289 2
    static $rxClassCache = array();
4290
4291 2
    $cacheKey = $s . $class;
4292
4293
    if (isset($rxClassCache[$cacheKey])) {
4294 2
      return $rxClassCache[$cacheKey];
4295
    }
4296
4297
    $class = array($class);
4298
4299
    /** @noinspection SuspiciousLoopInspection */
4300 3
    foreach (self::str_split($s) as $s) {
4301 1
      if ('-' === $s) {
4302
        $class[0] = '-' . $class[0];
4303
      } elseif (!isset($s[2])) {
4304
        $class[0] .= preg_quote($s, '/');
4305
      } elseif (1 === self::strlen($s)) {
4306
        $class[0] .= $s;
4307
      } else {
4308
        $class[] = $s;
4309
      }
4310 3
    }
4311 3
4312 3
    $class[0] = '[' . $class[0] . ']';
4313 3
4314 3
    if (1 === count($class)) {
4315 3
      $return = $class[0];
4316 3
    } else {
4317 3
      $return = '(?:' . implode('|', $class) . ')';
4318
    }
4319
4320 3
    $rxClassCache[$cacheKey] = $return;
4321 3
4322 3
    return $return;
4323 3
  }
4324
4325
  /**
4326
   * Echo native UTF8-Support libs, e.g. for debugging.
4327
   */
4328
  public static function showSupport()
4329
  {
4330
    foreach (self::$support as $utf8Support) {
4331
      echo $utf8Support . "\n<br>";
4332
    }
4333
  }
4334
4335
  /**
4336
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4337
   *
4338
   * @param    string $chr            The Unicode character to be encoded as numbered entity.
4339
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4340
   *
4341
   * @return   string The HTML numbered entity.
4342
   */
4343
  public static function single_chr_html_encode($chr, $keepAsciiChars = false)
4344
  {
4345
    if (!$chr) {
4346
      return '';
4347
    }
4348
4349
    if ($keepAsciiChars === true) {
4350
      if (self::isAscii($chr) === true) {
4351
        return $chr;
4352
      }
4353 13
    }
4354
4355 13
    return '&#' . self::ord($chr) . ';';
4356
  }
4357
4358 13
  /**
4359 13
   * Convert a string to an array of Unicode characters.
4360 1
   *
4361 1
   * @param    string  $str       The string to split into array.
4362 12
   * @param    int     $length    Max character length of each array element.
4363
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4364 13
   *
4365
   * @return   array An array containing chunks of the string.
4366 13
   */
4367 13
  public static function split($str, $length = 1, $cleanUtf8 = false)
4368
  {
4369 13
    $str = (string)$str;
4370
4371
    if (!isset($str[0])) {
4372
      return array();
4373
    }
4374
4375
    // init
4376
    self::checkForSupport();
4377
    $str = (string)$str;
4378
    $ret = array();
4379
4380
    if (self::$support['pcre_utf8'] === true) {
4381 1
4382
      if ($cleanUtf8 === true) {
4383 1
        $str = self::clean($str);
4384
      }
4385
4386
      preg_match_all('/./us', $str, $retArray);
4387 1
      if (isset($retArray[0])) {
4388
        $ret = $retArray[0];
4389 1
      }
4390
      unset($retArray);
4391
4392
    } else {
4393 1
4394 1
      // fallback
4395
4396
      $len = strlen($str);
4397 1
4398 1
      /** @noinspection ForeachInvariantsInspection */
4399 1
      for ($i = 0; $i < $len; $i++) {
4400 1
        if (($str[$i] & "\x80") === "\x00") {
4401
          $ret[] = $str[$i];
4402 1
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4403
          if (($str[$i + 1] & "\xC0") === "\x80") {
4404
            $ret[] = $str[$i] . $str[$i + 1];
4405 1
4406
            $i++;
4407
          }
4408 1 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4409
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4410
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4411
4412
            $i += 2;
4413
          }
4414
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4415 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4416
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4417
4418
            $i += 3;
4419
          }
4420
        }
4421 2
      }
4422
    }
4423 2
4424
    if ($length > 1) {
4425 2
      $ret = array_chunk($ret, $length);
4426 2
4427
      $ret = array_map('implode', $ret);
4428 2
    }
4429
4430
    if (isset($ret[0]) && $ret[0] === '') {
4431 2
      return array();
4432 2
    }
4433 2
4434 2
    return $ret;
4435 2
  }
4436
4437 2
  /**
4438 2
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4439 2
   *
4440 2
   * @param string $str
4441 2
   *
4442 2
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4443
   *                      otherwise it will return false.
4444 2
   */
4445 2
  public static function str_detect_encoding($str)
4446 2
  {
4447 2
4448 2
    //
4449 2
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4450
    //
4451 2
4452
    if (self::is_binary($str)) {
4453
      if (self::is_utf16($str) === 1) {
4454 2
        return 'UTF-16LE';
4455
      } elseif (self::is_utf16($str) === 2) {
4456
        return 'UTF-16BE';
4457
      } elseif (self::is_utf32($str) === 1) {
4458
        return 'UTF-32LE';
4459
      } elseif (self::is_utf32($str) === 2) {
4460
        return 'UTF-32BE';
4461
      }
4462
    }
4463
4464
    //
4465
    // 2.) simple check for ASCII chars
4466
    //
4467
4468
    if (self::is_ascii($str) === true) {
4469
      return 'ASCII';
4470
    }
4471
4472
    //
4473
    // 3.) simple check for UTF-8 chars
4474
    //
4475 1
4476
    if (self::is_utf8($str) === true) {
4477 1
      return 'UTF-8';
4478
    }
4479 1
4480
    //
4481
    // 4.) check via "\mb_detect_encoding()"
4482
    //
4483
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4484
4485
    $detectOrder = array(
4486
        'windows-1251',
4487
        'ISO-8859-1',
4488
        'ASCII',
4489
        'UTF-8',
4490
    );
4491
4492
    self::checkForSupport();
4493
4494
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4495
    if ($encoding) {
4496
      return $encoding;
4497
    }
4498
4499
    //
4500
    // 5.) check via "iconv()"
4501
    //
4502
4503
    $md5 = md5($str);
4504
    foreach (self::$iconvEncoding as $encodingTmp) {
4505
      # INFO: //IGNORE and //TRANSLIT still throw notice
4506
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4507
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4508
        return $encodingTmp;
4509
      }
4510
    }
4511
4512 12
    return false;
4513
  }
4514 12
4515
  /**
4516
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4517
   *
4518
   * @link  http://php.net/manual/en/function.str-ireplace.php
4519
   *
4520
   * @param mixed $search  <p>
4521
   *                       Every replacement with search array is
4522
   *                       performed on the result of previous replacement.
4523
   *                       </p>
4524
   * @param mixed $replace <p>
4525
   *                       </p>
4526
   * @param mixed $subject <p>
4527
   *                       If subject is an array, then the search and
4528
   *                       replace is performed with every entry of
4529
   *                       subject, and the return value is an array as
4530
   *                       well.
4531
   *                       </p>
4532
   * @param int   $count   [optional] <p>
4533
   *                       The number of matched and replaced needles will
4534
   *                       be returned in count which is passed by
4535
   *                       reference.
4536
   *                       </p>
4537
   *
4538
   * @return mixed a string or an array of replacements.
4539
   * @since 5.0
4540
   */
4541
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4542 1
  {
4543
    $search = (array)$search;
4544 1
4545
    /** @noinspection AlterInForeachInspection */
4546 1
    foreach ($search as &$s) {
4547 1
      if ('' === $s .= '') {
4548 1
        $s = '/^(?<=.)$/';
4549
      } else {
4550 1
        $s = '/' . preg_quote($s, '/') . '/ui';
4551 1
      }
4552 1
    }
4553 1
4554
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4555
    $count = $replace;
4556 1
4557
    return $subject;
4558
  }
4559
4560
  /**
4561
   * Limit the number of characters in a string, but also after the next word.
4562
   *
4563
   * @param  string $str
4564
   * @param  int    $length
4565
   * @param  string $strAddOn
4566
   *
4567 17
   * @return string
4568
   */
4569
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4570 17
  {
4571
    $str = (string)$str;
4572 17
4573
    if (!isset($str[0])) {
4574
      return '';
4575
    }
4576
4577
    $length = (int)$length;
4578 17
4579 17
    if (self::strlen($str) <= $length) {
4580 17
      return $str;
4581 17
    }
4582 17
4583 16
    if (self::substr($str, $length - 1, 1) === ' ') {
4584 16
      return self::substr($str, 0, $length - 1) . $strAddOn;
4585 17
    }
4586
4587
    $str = self::substr($str, 0, $length);
4588
    $array = explode(' ', $str);
4589
    array_pop($array);
4590 17
    $new_str = implode(' ', $array);
4591 17
4592
    if ($new_str === '') {
4593
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4594 1
    } else {
4595 1
      $str = $new_str . $strAddOn;
4596
    }
4597
4598 1
    return $str;
4599 1
  }
4600 1
4601 1
  /**
4602 1
   * Pad a UTF-8 string to given length with another string.
4603
   *
4604 1
   * @param    string $input      The input string
4605
   * @param    int    $pad_length The length of return string
4606 1
   * @param    string $pad_string String to use for padding the input string
4607
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4608
   *
4609
   * @return   string Returns the padded string
4610
   */
4611
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4612
  {
4613
    $input_length = self::strlen($input);
4614
4615
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4616 1
      $ps_length = self::strlen($pad_string);
4617
4618 1
      $diff = $pad_length - $input_length;
4619
4620 1
      switch ($pad_type) {
4621 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4622
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4623
          $pre = self::substr($pre, 0, $diff);
4624
          $post = '';
4625 1
          break;
4626 1
4627
        case STR_PAD_BOTH:
4628
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4629 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
4630 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4631 1
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4632
          break;
4633 1
4634
        case STR_PAD_RIGHT:
4635 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4636
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4637
          $post = self::substr($post, 0, $diff);
4638
          $pre = '';
4639
      }
4640
4641
      return $pre . $input . $post;
4642
    }
4643
4644
    return $input;
4645
  }
4646
4647
  /**
4648
   * Repeat a string.
4649
   *
4650
   * @param string $input      <p>
4651
   *                           The string to be repeated.
4652
   *                           </p>
4653
   * @param int    $multiplier <p>
4654 8
   *                           Number of time the input string should be
4655
   *                           repeated.
4656 8
   *                           </p>
4657
   *                           <p>
4658 8
   *                           multiplier has to be greater than or equal to 0.
4659
   *                           If the multiplier is set to 0, the function
4660 8
   *                           will return an empty string.
4661 2
   *                           </p>
4662
   *
4663
   * @return string the repeated string.
4664 7
   */
4665
  public static function str_repeat($input, $multiplier)
4666 7
  {
4667 7
    $input = self::filter($input);
4668 7
4669
    return str_repeat($input, $multiplier);
4670 7
  }
4671
4672 7
  /**
4673 6
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4674
   *
4675
   * (PHP 4, PHP 5)<br/>
4676 4
   * Replace all occurrences of the search string with the replacement string
4677
   *
4678
   * @link http://php.net/manual/en/function.str-replace.php
4679 4
   *
4680 4
   * @param mixed $search  <p>
4681 4
   *                       The value being searched for, otherwise known as the needle.
4682
   *                       An array may be used to designate multiple needles.
4683 4
   *                       </p>
4684 3
   * @param mixed $replace <p>
4685
   *                       The replacement value that replaces found search
4686 3
   *                       values. An array may be used to designate multiple replacements.
4687 3
   *                       </p>
4688 3
   * @param mixed $subject <p>
4689
   *                       The string or array being searched and replaced on,
4690 3
   *                       otherwise known as the haystack.
4691 1
   *                       </p>
4692
   *                       <p>
4693 1
   *                       If subject is an array, then the search and
4694 1
   *                       replace is performed with every entry of
4695 1
   *                       subject, and the return value is an array as
4696
   *                       well.
4697 1
   *                       </p>
4698
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4699
   *
4700
   * @return mixed This function returns a string or an array with the replaced values.
4701
   */
4702
  public static function str_replace($search, $replace, $subject, &$count = null)
4703
  {
4704
    return str_replace($search, $replace, $subject, $count);
4705
  }
4706
4707
  /**
4708
   * Shuffles all the characters in the string.
4709
   *
4710
   * @param    string $str The input string
4711
   *
4712 1
   * @return   string The shuffled string.
4713 3
   */
4714
  public static function str_shuffle($str)
4715 4
  {
4716
    $array = self::split($str);
4717
4718
    shuffle($array);
4719
4720 4
    return implode('', $array);
4721
  }
4722
4723
  /**
4724
   * Sort all characters according to code points.
4725 4
   *
4726 4
   * @param    string $str    A UTF-8 string.
4727 2
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4728 2
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4729
   *
4730 2
   * @return   string String of sorted characters
4731 2
   */
4732 1
  public static function str_sort($str, $unique = false, $desc = false)
4733
  {
4734 2
    $array = self::codepoints($str);
4735
4736 4
    if ($unique) {
4737 4
      $array = array_flip(array_flip($array));
4738 4
    }
4739 4
4740 1
    if ($desc) {
4741
      arsort($array);
4742 7
    } else {
4743
      asort($array);
4744 7
    }
4745
4746
    return self::string($array);
4747
  }
4748
4749
  /**
4750
   * Convert a string to an array.
4751
   *
4752
   * @param string $str
4753
   * @param int    $len
4754
   *
4755
   * @return array
4756 1
   */
4757
  public static function str_split($str, $len = 1)
4758 1
  {
4759 1
    // init
4760 1
    self::checkForSupport();
4761 1
    $len = (int)$len;
4762
4763 1
    if ($len < 1) {
4764
      return str_split($str, $len);
4765
    }
4766
4767 1
    if (self::$support['intl'] === true) {
4768
      $a = array();
4769
      $p = 0;
4770
      $l = strlen($str);
4771
      while ($p < $l) {
4772
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4773
      }
4774
    } else {
4775
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4776 1
      $a = $a[0];
4777
    }
4778
4779 1
    if ($len === 1) {
4780
      return $a;
4781
    }
4782
4783
    $arrayOutput = array();
4784
    $p = -1;
4785
4786
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4787
    foreach ($a as $l => $a) {
4788
      if ($l % $len) {
4789
        $arrayOutput[$p] .= $a;
4790 8
      } else {
4791
        $arrayOutput[++$p] = $a;
4792 8
      }
4793
    }
4794
4795
    return $arrayOutput;
4796
  }
4797
4798
  /**
4799
   * Get a binary representation of a specific character.
4800
   *
4801
   * @param   string $str The input character.
4802
   *
4803
   * @return  string
4804
   */
4805 8
  public static function str_to_binary($str)
4806
  {
4807 8
    $str = (string)$str;
4808 5
4809 5
    if (!isset($str[0])) {
4810 8
      return '';
4811
    }
4812
4813
    // init
4814
    $out = null;
4815
    $max = strlen($str);
4816
4817
    /** @noinspection ForeachInvariantsInspection */
4818
    for ($i = 0; $i < $max; ++$i) {
4819
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4820
    }
4821
4822
    return $out;
4823 5
  }
4824
4825 5
  /**
4826
   * US-ASCII transliterations of Unicode text.
4827
   *
4828
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4829 5
   * Warning: you should only pass this well formed UTF-8!
4830
   * Be aware it works by making a copy of the input string which it appends transliterated
4831
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4832 5
   * requiring up to the same amount again as the input string
4833
   *
4834
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4835
   *
4836 5
   * @author <[email protected]>
4837 5
   *
4838
   * @param string $str     UTF-8 string to convert
4839
   * @param string $unknown Character use if character unknown. (default is ?)
4840
   *
4841
   * @return string US-ASCII string
4842
   */
4843
  public static function str_transliterate($str, $unknown = '?')
4844
  {
4845
    static $UTF8_TO_ASCII;
4846
4847
    $str = (string)$str;
4848
4849
    if (!isset($str[0])) {
4850 2
      return '';
4851
    }
4852 2
4853 2
    $str = self::clean($str);
4854
4855 2
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4856 2
    $chars = $ar[0];
4857 2
    foreach ($chars as &$c) {
4858
4859 2
      $ordC0 = ord($c[0]);
4860 2
4861
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4862
        continue;
4863
      }
4864
4865
      $ordC1 = ord($c[1]);
4866
4867
      // ASCII - next please
4868
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4869
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4870 1
      }
4871
4872 1
      if ($ordC0 >= 224) {
4873
        $ordC2 = ord($c[2]);
4874
4875
        if ($ordC0 <= 239) {
4876
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4877
        }
4878
4879
        if ($ordC0 >= 240) {
4880
          $ordC3 = ord($c[3]);
4881
4882
          if ($ordC0 <= 247) {
4883
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4884
          }
4885
4886
          if ($ordC0 >= 248) {
4887
            $ordC4 = ord($c[4]);
4888
4889 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4890
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4891
            }
4892
4893
            if ($ordC0 >= 252) {
4894 2
              $ordC5 = ord($c[5]);
4895
4896 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4897 2
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4898
              }
4899 2
            }
4900
          }
4901
        }
4902
      }
4903
4904
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4905
        $c = $unknown;
4906
        continue;
4907
      }
4908
4909
      if (!isset($ord)) {
4910
        $c = $unknown;
4911
        continue;
4912
      }
4913
4914
      $bank = $ord >> 8;
4915
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4916
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4917
        if (file_exists($bankfile)) {
4918
          /** @noinspection PhpIncludeInspection */
4919
          require $bankfile;
4920
        } else {
4921
          $UTF8_TO_ASCII[$bank] = array();
4922
        }
4923
      }
4924
4925 8
      $newchar = $ord & 255;
4926
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4927 8
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4928 8
      } else {
4929
        $c = $unknown;
4930 8
      }
4931 2
    }
4932
4933
    return implode('', $chars);
4934
  }
4935 7
4936
  /**
4937 7
   * Counts number of words in the UTF-8 string.
4938 1
   *
4939 1
   * @param string $str    The input string.
4940 1
   * @param int    $format <strong>0</strong> => return a number of words<br />
4941
   *                       <strong>1</strong> => return an array of words
4942
   *                       <strong>2</strong> => return an array of words with word-offset as key
4943 7
   * @param string $charlist
4944 1
   *
4945 1
   * @return array|float The number of words in the string
4946
   */
4947 7
  public static function str_word_count($str, $format = 0, $charlist = '')
4948
  {
4949
    $charlist = self::rxClass($charlist, '\pL');
4950
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4951
4952
    $len = count($strParts);
4953
4954
    if ($format === 1) {
4955
4956
      $numberOfWords = array();
4957
      for ($i = 1; $i < $len; $i += 2) {
4958
        $numberOfWords[] = $strParts[$i];
4959 7
      }
4960
4961 7
    } elseif ($format === 2) {
4962 2
4963
      self::checkForSupport();
4964
4965
      $numberOfWords = array();
4966 5
      $offset = self::strlen($strParts[0]);
4967
      for ($i = 1; $i < $len; $i += 2) {
4968 5
        $numberOfWords[$offset] = $strParts[$i];
4969
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4970
      }
4971
4972
    } else {
4973
4974
      $numberOfWords = ($len - 1) / 2;
4975
4976
    }
4977
4978
    return $numberOfWords;
4979
  }
4980
4981
  /**
4982
   * Case-insensitive string comparison.
4983
   *
4984
   * @param string $str1
4985 66
   * @param string $str2
4986
   *
4987 66
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4988
   */
4989 66
  public static function strcasecmp($str1, $str2)
4990 4
  {
4991
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4992
  }
4993
4994 65
  /**
4995
   * String comparison.
4996
   *
4997 65
   * @param string $str1
4998
   * @param string $str2
4999
   *
5000
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
5001 65
   *              <strong>> 0</strong> if str1 is greater than str2<br />
5002
   *              <strong>0</strong> if they are equal.
5003
   */
5004
  public static function strcmp($str1, $str2)
5005 65
  {
5006
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5007
        \Normalizer::normalize($str1, \Normalizer::NFD),
5008
        \Normalizer::normalize($str2, \Normalizer::NFD)
5009
    );
5010
  }
5011
5012
  /**
5013
   * Find length of initial segment not matching mask.
5014
   *
5015
   * @param string $str
5016
   * @param string $charList
5017 1
   * @param int    $offset
5018
   * @param int    $length
5019 1
   *
5020
   * @return int|null
5021
   */
5022
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5023
  {
5024
    if ('' === $charList .= '') {
5025
      return null;
5026
    }
5027
5028
    if ($offset || 2147483647 !== $length) {
5029
      $str = (string)self::substr($str, $offset, $length);
5030
    } else {
5031 2
      $str = (string)$str;
5032
    }
5033 2
5034
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5035
      /** @noinspection OffsetOperationsInspection */
5036
      return self::strlen($length[1]);
5037
    } else {
5038
      return self::strlen($str);
5039
    }
5040
  }
5041
5042
  /**
5043
   * Makes a UTF-8 string from code points.
5044
   *
5045
   * @param    array $array Integer or Hexadecimal codepoints
5046
   *
5047
   * @return   string UTF-8 encoded string
5048
   */
5049
  public static function string($array)
5050
  {
5051
    return implode(
5052
        array_map(
5053
            array(
5054
                '\\voku\\helper\\UTF8',
5055
                'chr',
5056
            ),
5057
            $array
5058
        )
5059
    );
5060
  }
5061
5062
  /**
5063
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5064
   *
5065
   * @param    string $str The input string.
5066
   *
5067
   * @return   bool True if the string has BOM at the start, False otherwise.
5068
   */
5069
  public static function string_has_bom($str)
5070
  {
5071
    foreach (self::$bom as $bomString => $bomByteLength) {
5072
      if (0 === strpos($str, $bomString)) {
5073
        return true;
5074
      }
5075
    }
5076
5077
    return false;
5078
  }
5079
5080
  /**
5081
   * Strip HTML and PHP tags from a string.
5082
   *
5083
   * @link http://php.net/manual/en/function.strip-tags.php
5084
   *
5085
   * @param string $str            <p>
5086
   *                               The input string.
5087
   *                               </p>
5088
   * @param string $allowable_tags [optional] <p>
5089
   *                               You can use the optional second parameter to specify tags which should
5090
   *                               not be stripped.
5091
   *                               </p>
5092
   *                               <p>
5093
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5094
   *                               can not be changed with allowable_tags.
5095
   *                               </p>
5096
   *
5097
   * @return string the stripped string.
5098
   */
5099
  public static function strip_tags($str, $allowable_tags = null)
5100
  {
5101
    //clean broken utf8
5102
    $str = self::clean($str);
5103 11
5104
    return strip_tags($str, $allowable_tags);
5105 11
  }
5106 11
5107
  /**
5108 11
   * Finds position of first occurrence of a string within another, case insensitive.
5109 2
   *
5110
   * @link http://php.net/manual/en/function.mb-stripos.php
5111
   *
5112
   * @param string  $haystack  <p>
5113 10
   *                           The string from which to get the position of the first occurrence
5114 10
   *                           of needle
5115
   *                           </p>
5116
   * @param string  $needle    <p>
5117
   *                           The string to find in haystack
5118 10
   *                           </p>
5119
   * @param int     $offset    [optional] <p>
5120
   *                           The position in haystack
5121
   *                           to start searching
5122 10
   *                           </p>
5123
   * @param string  $encoding
5124
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5125
   *
5126 1
   * @return int Return the numeric position of the first occurrence of
5127 1
   * needle in the haystack
5128 1
   * string, or false if needle is not found.
5129
   */
5130 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5131
  {
5132
    $haystack = (string)$haystack;
5133 10
    $needle = (string)$needle;
5134 1
5135 1
    if (!isset($haystack[0], $needle[0])) {
5136
      return false;
5137 10
    }
5138
5139
    // init
5140
    self::checkForSupport();
5141
5142
    if ($cleanUtf8 === true) {
5143
      $haystack = self::clean($haystack);
5144
      $needle = self::clean($needle);
5145
    }
5146
5147
    // INFO: this is only a fallback for old versions
5148
    if ($encoding === true || $encoding === false) {
5149
      $encoding = 'UTF-8';
5150
    } else {
5151
      $encoding = self::normalizeEncoding($encoding);
5152
    }
5153
5154
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5155
  }
5156
5157
  /**
5158
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5159
   *
5160
   * @param string $str
5161
   * @param string $needle
5162
   * @param bool   $before_needle
5163
   *
5164
   * @return false|string
5165
   */
5166
  public static function stristr($str, $needle, $before_needle = false)
5167
  {
5168
    if ('' === $needle .= '') {
5169
      return false;
5170
    }
5171
5172
    // init
5173
    self::checkForSupport();
5174
5175
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5176
  }
5177
5178
  /**
5179
   * Get the string length, not the byte-length!
5180
   *
5181
   * @link     http://php.net/manual/en/function.mb-strlen.php
5182
   *
5183
   * @param string  $str       The string being checked for length.
5184
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5185
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5186 1
   *
5187
   * @return int the number of characters in
5188 1
   *           string str having character encoding
5189
   *           encoding. A multi-byte character is
5190 1
   *           counted as 1.
5191
   */
5192
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5193
  {
5194
    $str = (string)$str;
5195
5196
    if (!isset($str[0])) {
5197
      return 0;
5198
    }
5199
5200 4
    // INFO: this is only a fallback for old versions
5201
    if ($encoding === true || $encoding === false) {
5202 4
      $encoding = 'UTF-8';
5203
    } else {
5204
      $encoding = self::normalizeEncoding($encoding);
5205
    }
5206
5207
    switch ($encoding) {
5208
      case 'ASCII':
5209
      case 'CP850':
5210
        return strlen($str);
5211
    }
5212
5213
    self::checkForSupport();
5214
5215
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5216
      $str = self::clean($str);
5217
    }
5218
5219
    return \mb_strlen($str, $encoding);
5220
  }
5221
5222
  /**
5223
   * Case insensitive string comparisons using a "natural order" algorithm.
5224
   *
5225
   * @param string $str1
5226
   * @param string $str2
5227
   *
5228
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5229
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5230
   *             <strong>0</strong> if they are equal
5231
   */
5232
  public static function strnatcasecmp($str1, $str2)
5233 1
  {
5234
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5235 1
  }
5236
5237 1
  /**
5238
   * String comparisons using a "natural order" algorithm
5239
   *
5240
   * @link  http://php.net/manual/en/function.strnatcmp.php
5241
   *
5242
   * @param string $str1 <p>
5243
   *                     The first string.
5244
   *                     </p>
5245
   * @param string $str2 <p>
5246
   *                     The second string.
5247
   *                     </p>
5248
   *
5249 1
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5250
   * str1 is less than str2; &gt;
5251 1
   * 0 if str1 is greater than
5252
   * str2, and 0 if they are equal.
5253
   * @since 4.0
5254
   * @since 5.0
5255
   */
5256
  public static function strnatcmp($str1, $str2)
5257
  {
5258
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5259
  }
5260
5261
  /**
5262
   * Binary safe case-insensitive string comparison of the first n characters
5263
   *
5264
   * @link  http://php.net/manual/en/function.strncasecmp.php
5265
   *
5266
   * @param string $str1 <p>
5267
   *                     The first string.
5268
   *                     </p>
5269
   * @param string $str2 <p>
5270
   *                     The second string.
5271
   *                     </p>
5272
   * @param int    $len  <p>
5273
   *                     The length of strings to be used in the comparison.
5274
   *                     </p>
5275
   *
5276 10
   * @return int &lt; 0 if <i>str1</i> is less than
5277
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5278 10
   * greater than <i>str2</i>, and 0 if they are equal.
5279 10
   * @since 4.0.4
5280
   * @since 5.0
5281 10
   */
5282 2
  public static function strncasecmp($str1, $str2, $len)
5283
  {
5284
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5285
  }
5286 9
5287
  /**
5288 9
   * Binary safe string comparison of the first n characters
5289
   *
5290
   * @link  http://php.net/manual/en/function.strncmp.php
5291
   *
5292 9
   * @param string $str1 <p>
5293 9
   *                     The first string.
5294
   *                     </p>
5295 9
   * @param string $str2 <p>
5296
   *                     The second string.
5297
   *                     </p>
5298 1
   * @param int    $len  <p>
5299 1
   *                     Number of characters to use in the comparison.
5300 1
   *                     </p>
5301
   *
5302 9
   * @return int &lt; 0 if <i>str1</i> is less than
5303 9
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5304
   * is greater than <i>str2</i>, and 0 if they are
5305
   * equal.
5306
   * @since 4.0
5307
   * @since 5.0
5308
   */
5309
  public static function strncmp($str1, $str2, $len)
5310
  {
5311
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5312
  }
5313
5314
  /**
5315
   * Search a string for any of a set of characters
5316
   *
5317
   * @link  http://php.net/manual/en/function.strpbrk.php
5318
   *
5319
   * @param string $haystack  <p>
5320
   *                          The string where char_list is looked for.
5321
   *                          </p>
5322
   * @param string $char_list <p>
5323
   *                          This parameter is case sensitive.
5324
   *                          </p>
5325
   *
5326
   * @return string a string starting from the character found, or false if it is
5327
   * not found.
5328
   * @since 5.0
5329
   */
5330
  public static function strpbrk($haystack, $char_list)
5331
  {
5332
    $haystack = (string)$haystack;
5333
    $char_list = (string)$char_list;
5334
5335
    if (!isset($haystack[0], $char_list[0])) {
5336
      return false;
5337
    }
5338
5339 6
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5340
      return substr($haystack, strpos($haystack, $m[0]));
5341 6
    } else {
5342
      return false;
5343
    }
5344
  }
5345 6
5346
  /**
5347
   * Find position of first occurrence of string in a string.
5348
   *
5349
   * @link http://php.net/manual/en/function.mb-strpos.php
5350
   *
5351
   * @param string  $haystack     <p>
5352
   *                              The string being checked.
5353
   *                              </p>
5354
   * @param string  $needle       <p>
5355
   *                              The position counted from the beginning of haystack.
5356
   *                              </p>
5357
   * @param int     $offset       [optional] <p>
5358
   *                              The search offset. If it is not specified, 0 is used.
5359
   *                              </p>
5360
   * @param string  $encoding
5361
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5362
   *
5363
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5364
   *             If needle is not found it returns false.
5365
   */
5366 1
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5367
  {
5368 1
    $haystack = (string)$haystack;
5369
    $needle = (string)$needle;
5370 1
5371
    if (!isset($haystack[0], $needle[0])) {
5372
      return false;
5373
    }
5374
5375
    // init
5376
    self::checkForSupport();
5377
    $offset = (int)$offset;
5378
5379
    // iconv and mbstring do not support integer $needle
5380
5381
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5382
      $needle = self::chr($needle);
5383 10
    }
5384
5385 10
    if ($cleanUtf8 === true) {
5386 10
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5387 10
      // iconv_strpos is not tolerant to invalid characters
5388
5389 10
      $needle = self::clean((string)$needle);
5390 1
      $haystack = self::clean($haystack);
5391 1
    }
5392 1
5393 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5394 10
5395
      // INFO: this is only a fallback for old versions
5396 10
      if ($encoding === true || $encoding === false) {
5397
        $encoding = 'UTF-8';
5398 10
      } else {
5399 1
        $encoding = self::normalizeEncoding($encoding);
5400 1
      }
5401
5402
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5403 10
    }
5404 10
5405
    if (self::$support['iconv'] === true) {
5406 10
      // ignore invalid negative offset to keep compatility
5407
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5408 10
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5409
    }
5410
5411
    if ($offset > 0) {
5412
      $haystack = self::substr($haystack, $offset);
5413
    }
5414
5415 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5416
      $left = substr($haystack, 0, $pos);
5417
5418
      // negative offset not supported in PHP strpos(), ignoring
5419
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5420
    }
5421
5422
    return false;
5423
  }
5424 20
5425
  /**
5426 20
   * Finds the last occurrence of a character in a string within another.
5427
   *
5428 20
   * @link http://php.net/manual/en/function.mb-strrchr.php
5429 5
   *
5430
   * @param string $haystack <p>
5431
   *                         The string from which to get the last occurrence
5432
   *                         of needle
5433 18
   *                         </p>
5434
   * @param string $needle   <p>
5435 18
   *                         The string to find in haystack
5436
   *                         </p>
5437
   * @param bool   $part     [optional] <p>
5438
   *                         Determines which portion of haystack
5439
   *                         this function returns.
5440
   *                         If set to true, it returns all of haystack
5441
   *                         from the beginning to the last occurrence of needle.
5442
   *                         If set to false, it returns all of haystack
5443
   *                         from the last occurrence of needle to the end,
5444
   *                         </p>
5445 3
   * @param string $encoding [optional] <p>
5446
   *                         Character encoding name to use.
5447 3
   *                         If it is omitted, internal character encoding is used.
5448
   *                         </p>
5449
   *
5450
   * @return string the portion of haystack.
5451
   * or false if needle is not found.
5452
   */
5453 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5454
  {
5455
    self::checkForSupport();
5456
    $encoding = self::normalizeEncoding($encoding);
5457
5458
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5459
  }
5460
5461
  /**
5462 16
   * Reverses characters order in the string.
5463
   *
5464 16
   * @param    string $str The input string
5465
   *
5466 16
   * @return   string The string with characters in the reverse sequence
5467 4
   */
5468
  public static function strrev($str)
5469
  {
5470
    return implode(array_reverse(self::split($str)));
5471 15
  }
5472
5473 15
  /**
5474 15
   * Finds the last occurrence of a character in a string within another, case insensitive.
5475
   *
5476
   * @link http://php.net/manual/en/function.mb-strrichr.php
5477
   *
5478
   * @param string $haystack <p>
5479
   *                         The string from which to get the last occurrence
5480
   *                         of needle
5481
   *                         </p>
5482
   * @param string $needle   <p>
5483
   *                         The string to find in haystack
5484
   *                         </p>
5485
   * @param bool   $part     [optional] <p>
5486
   *                         Determines which portion of haystack
5487
   *                         this function returns.
5488
   *                         If set to true, it returns all of haystack
5489
   *                         from the beginning to the last occurrence of needle.
5490
   *                         If set to false, it returns all of haystack
5491
   *                         from the last occurrence of needle to the end,
5492
   *                         </p>
5493
   * @param string $encoding [optional] <p>
5494
   *                         Character encoding name to use.
5495
   *                         If it is omitted, internal character encoding is used.
5496
   *                         </p>
5497
   *
5498
   * @return string the portion of haystack.
5499
   * or false if needle is not found.
5500
   */
5501 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5502
  {
5503 1
    self::checkForSupport();
5504
    $encoding = self::normalizeEncoding($encoding);
5505 1
5506
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5507
  }
5508
5509
  /**
5510
   * Find position of last occurrence of a case-insensitive string.
5511
   *
5512
   * @param    string $haystack The string to look in
5513
   * @param    string $needle   The string to look for
5514
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5515
   *
5516
   * @return   int The position of offset
5517
   */
5518
  public static function strripos($haystack, $needle, $offset = 0)
5519
  {
5520 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5521
  }
5522
5523
  /**
5524
   * Find position of last occurrence of a string in a string.
5525
   *
5526
   * @link http://php.net/manual/en/function.mb-strrpos.php
5527
   *
5528
   * @param string     $haystack  <p>
5529
   *                              The string being checked, for the last occurrence
5530 1
   *                              of needle
5531
   *                              </p>
5532
   * @param string|int $needle    <p>
5533 1
   *                              The string to find in haystack.
5534
   *                              Or a code point as int.
5535 1
   *                              </p>
5536
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5537
   *                              the string. Negative values will stop searching at an arbitrary point
5538
   *                              prior to the end of the string.
5539
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5540
   *
5541
   * @return int the numeric position of
5542
   * the last occurrence of needle in the
5543
   * haystack string. If
5544
   * needle is not found, it returns false.
5545
   */
5546
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5547
  {
5548
    $haystack = (string)$haystack;
5549
5550
    if (((int)$needle) === $needle && ($needle >= 0)) {
5551
      $needle = self::chr($needle);
5552
    }
5553
5554
    $needle = (string)$needle;
5555
5556
    if (!isset($haystack[0], $needle[0])) {
5557
      return false;
5558 39
    }
5559
5560 39
    // init
5561
    self::checkForSupport();
5562 39
5563 9
    $needle = (string)$needle;
5564
    $offset = (int)$offset;
5565
5566
    if ($cleanUtf8 === true) {
5567 37
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5568
5569 37
      $needle = self::clean($needle);
5570
      $haystack = self::clean($haystack);
5571
    }
5572
5573 1
    if (self::$support['mbstring'] === true) {
5574 1
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5575
    }
5576 37
5577 22
    if (self::$support['iconv'] === true) {
5578 22
      return \grapheme_strrpos($haystack, $needle, $offset);
5579 33
    }
5580
5581
    // fallback
5582 37
5583
    if ($offset > 0) {
5584
      $haystack = self::substr($haystack, $offset);
5585 37
    } elseif ($offset < 0) {
5586 1
      $haystack = self::substr($haystack, 0, $offset);
5587 1
    }
5588
5589 37 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5590
      $left = substr($haystack, 0, $pos);
5591
5592
      // negative offset not supported in PHP strpos(), ignoring
5593
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5594
    }
5595
5596
    return false;
5597
  }
5598
5599
  /**
5600
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5601
   * mask.
5602
   *
5603
   * @param string $str
5604
   * @param string $mask
5605
   * @param int    $offset
5606
   * @param int    $length
5607
   *
5608
   * @return int|null
5609
   */
5610
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5611
  {
5612
    if ($offset || 2147483647 !== $length) {
5613
      $str = self::substr($str, $offset, $length);
5614
    }
5615
5616
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5617
  }
5618 1
5619
  /**
5620 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5621 1
   *
5622
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5623 1
   *
5624
   * @param string $haystack      <p>
5625
   *                              The input string. Must be valid UTF-8.
5626
   *                              </p>
5627
   * @param string $needle        <p>
5628
   *                              The string to look for. Must be valid UTF-8.
5629
   *                              </p>
5630
   * @param bool   $before_needle [optional] <p>
5631
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5632
   *                              haystack before the first occurrence of the needle (excluding the needle).
5633
   *                              </p>
5634
   *
5635
   * @return string the portion of string, or FALSE if needle is not found.
5636
   */
5637
  public static function strstr($haystack, $needle, $before_needle = false)
5638
  {
5639
    self::checkForSupport();
5640
5641
    return \grapheme_strstr($haystack, $needle, $before_needle);
5642
  }
5643
5644
  /**
5645
   * Unicode transformation for case-less matching.
5646
   *
5647
   * @link http://unicode.org/reports/tr21/tr21-5.html
5648
   *
5649
   * @param string $str
5650
   * @param bool   $full
5651
   *
5652
   * @return string
5653
   */
5654
  public static function strtocasefold($str, $full = true)
5655
  {
5656
    static $fullCaseFold = null;
5657
    static $commonCaseFoldKeys = null;
5658
    static $commonCaseFoldValues = null;
5659
5660
    if ($commonCaseFoldKeys === null) {
5661
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5662
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5663
    }
5664
5665 6
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5666
5667
    if ($full) {
5668 6
5669 1
      if ($fullCaseFold === null) {
5670
        $fullCaseFold = self::getData('caseFolding_full');
5671
      }
5672 1
5673 1
      /** @noinspection OffsetOperationsInspection */
5674 1
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5675 1
    }
5676
5677
    $str = self::clean($str);
5678
5679 1
    return self::strtolower($str);
5680 1
  }
5681 1
5682 1
  /**
5683 1
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5684 1
   * Make a string lowercase.
5685 1
   *
5686 1
   * @link http://php.net/manual/en/function.mb-strtolower.php
5687
   *
5688
   * @param string $str <p>
5689
   *                    The string being lowercased.
5690 1
   *                    </p>
5691 1
   * @param string $encoding
5692 1
   *
5693 1
   * @return string str with all alphabetic characters converted to lowercase.
5694 1
   */
5695 1
  public static function strtolower($str, $encoding = 'UTF-8')
5696 1
  {
5697 1
    $str = (string)$str;
5698
5699
    if (!isset($str[0])) {
5700 1
      return '';
5701 1
    }
5702 1
5703 1
    // init
5704
    self::checkForSupport();
5705
    $encoding = self::normalizeEncoding($encoding);
5706
5707 1
    return \mb_strtolower($str, $encoding);
5708
  }
5709 6
5710 1
  /**
5711 1
   * Generic case sensitive transformation for collation matching.
5712 1
   *
5713 1
   * @param string $s
5714
   *
5715 1
   * @return string
5716
   */
5717
  protected static function strtonatfold($s)
5718 6
  {
5719 6
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5720
  }
5721 6
5722 4
  /**
5723
   * Make a string uppercase.
5724 4
   *
5725 4
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5726
   *
5727 6
   * @param string $str <p>
5728
   *                    The string being uppercased.
5729 6
   *                    </p>
5730
   * @param string $encoding
5731
   *
5732
   * @return string str with all alphabetic characters converted to uppercase.
5733
   */
5734
  public static function strtoupper($str, $encoding = 'UTF-8')
5735
  {
5736
    $str = (string)$str;
5737
5738
    if (!isset($str[0])) {
5739
      return '';
5740 1
    }
5741
5742 1
    // init
5743
    self::checkForSupport();
5744 1
5745 1
    if (self::$support['mbstring'] === true) {
5746
      $encoding = self::normalizeEncoding($encoding);
5747
5748 1
      return \mb_strtoupper($str, $encoding);
5749
    } else {
5750 1
5751 1
      // fallback
5752
5753 1
      static $caseTableKeys = null;
5754
      static $caseTableValues = null;
5755 1
5756 1
      if ($caseTableKeys === null) {
5757
        $caseTable = self::case_table();
5758 1
        $caseTableKeys = array_keys($caseTable);
5759
        $caseTableValues = array_values($caseTable);
5760 1
      }
5761
5762 1
      $str = self::clean($str);
5763
5764 1
      return str_replace($caseTableKeys, $caseTableValues, $str);
5765
    }
5766
  }
5767
5768
  /**
5769
   * Translate characters or replace sub-strings.
5770
   *
5771
   * @link  http://php.net/manual/en/function.strtr.php
5772
   *
5773
   * @param string       $str  <p>
5774
   *                           The string being translated.
5775 6
   *                           </p>
5776
   * @param string|array $from <p>
5777 6
   *                           The string replacing from.
5778
   *                           </p>
5779
   * @param string|array $to   <p>
5780
   *                           The string being translated to to.
5781
   *                           </p>
5782
   *
5783
   * @return string This function returns a copy of str,
5784
   * translating all occurrences of each character in
5785
   * from to the corresponding character in
5786
   * to.
5787
   * @since 4.0
5788
   * @since 5.0
5789
   */
5790
  public static function strtr($str, $from, $to = INF)
5791
  {
5792
    if (INF !== $to) {
5793
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5793 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5794
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5794 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5795
      $countFrom = count($from);
5796
      $countTo = count($to);
5797
5798
      if ($countFrom > $countTo) {
5799
        $from = array_slice($from, 0, $countTo);
5800
      } elseif ($countFrom < $countTo) {
5801
        $to = array_slice($to, 0, $countFrom);
5802
      }
5803
5804
      $from = array_combine($from, $to);
5805
    }
5806
5807
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5790 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5808
  }
5809
5810
  /**
5811
   * Return the width of a string.
5812 7
   *
5813
   * @param string $s
5814 7
   *
5815
   * @return int
5816 7
   */
5817
  public static function strwidth($s)
5818 7
  {
5819 2
    // init
5820
    self::checkForSupport();
5821
5822 6
    return \mb_strwidth($s, 'UTF-8');
5823
  }
5824 6
5825 3
  /**
5826
   * Get part of a string.
5827 3
   *
5828
   * @link http://php.net/manual/en/function.mb-substr.php
5829 3
   *
5830
   * @param string  $str       <p>
5831
   *                           The string being checked.
5832 3
   *                           </p>
5833
   * @param int     $start     <p>
5834 3
   *                           The first position used in str.
5835 3
   *                           </p>
5836
   * @param int     $length    [optional] <p>
5837
   *                           The maximum length of the returned string.
5838 3
   *                           </p>
5839 3
   * @param string  $encoding
5840 3
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5841
   *
5842
   * @return string mb_substr returns the portion of
5843
   * str specified by the start and length parameters.
5844
   */
5845
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5846
  {
5847
    $str = (string)$str;
5848
5849
    if (!isset($str[0])) {
5850
      return '';
5851
    }
5852 3
5853
    // init
5854 1
    self::checkForSupport();
5855 1
5856 1
    if ($cleanUtf8 === true) {
5857
      // iconv and mbstring are not tolerant to invalid encoding
5858 1
      // further, their behaviour is inconsistent with that of PHP's substr
5859 1
5860 1
      $str = self::clean($str);
5861 1
    }
5862
5863 1
    if ($length === null) {
5864
      $length = (int)self::strlen($str);
5865
    } else {
5866 1
      $length = (int)$length;
5867
    }
5868
5869 1 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5870
5871 3
      // INFO: this is only a fallback for old versions
5872 1
      if ($encoding === true || $encoding === false) {
5873 1
        $encoding = 'UTF-8';
5874
      } else {
5875 3
        $encoding = self::normalizeEncoding($encoding);
5876 3
      }
5877
5878 3
      return \mb_substr($str, $start, $length, $encoding);
5879 3
    }
5880
5881 6
    if (self::$support['iconv'] === true) {
5882
      return (string)\grapheme_substr($str, $start, $length);
5883
    }
5884
5885
    // fallback
5886
5887
    // split to array, and remove invalid characters
5888
    $array = self::split($str);
5889
5890
    // extract relevant part, and join to make sting again
5891
    return implode(array_slice($array, $start, $length));
5892
  }
5893
5894
  /**
5895
   * Binary safe comparison of two strings from an offset, up to length characters.
5896
   *
5897
   * @param string  $main_str           The main string being compared.
5898
   * @param string  $str                The secondary string being compared.
5899
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5900
   *                                    end of the string.
5901
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5902
   *                                    the str compared to the length of main_str less the offset.
5903 2
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5904
   *
5905 2
   * @return int
5906
   */
5907
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5908
  {
5909
    $main_str = self::substr($main_str, $offset, $length);
5910
    $str = self::substr($str, 0, self::strlen($main_str));
5911
5912
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5913
  }
5914
5915
  /**
5916
   * Count the number of substring occurrences
5917
   *
5918
   * @link  http://php.net/manual/en/function.substr-count.php
5919
   *
5920
   * @param string $haystack <p>
5921
   *                         The string to search in
5922
   *                         </p>
5923
   * @param string $needle   <p>
5924
   *                         The substring to search for
5925
   *                         </p>
5926
   * @param int    $offset   [optional] <p>
5927
   *                         The offset where to start counting
5928
   *                         </p>
5929 20
   * @param int    $length   [optional] <p>
5930
   *                         The maximum length after the specified offset to search for the
5931 20
   *                         substring. It outputs a warning if the offset plus the length is
5932 2
   *                         greater than the haystack length.
5933
   *                         </p>
5934 2
   *
5935 2
   * @return int This functions returns an integer.
5936
   * @since 4.0
5937 2
   * @since 5.0
5938
   */
5939
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5940 20
  {
5941
    $haystack = (string)$haystack;
5942 20
    $needle = (string)$needle;
5943 9
5944
    if (!isset($haystack[0], $needle[0])) {
5945
      return 0;
5946 20
    }
5947
5948 20
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5949
      $offset = (int)$offset;
5950 20
      $length = (int)$length;
5951 20
5952
      $haystack = self::substr($haystack, $offset, $length);
5953 20
    }
5954 20
5955 20
    self::checkForSupport();
5956 20
5957
    return \mb_substr_count($haystack, $needle);
5958 20
  }
5959
5960 18
  /**
5961 17
   * Replace text within a portion of a string.
5962 17
   *
5963 17
   * source: https://gist.github.com/stemar/8287074
5964 5
   *
5965 5
   * @param string|array   $str
5966 5
   * @param string|array   $replacement
5967
   * @param int|array      $start
5968
   * @param null|int|array $length
5969 20
   *
5970
   * @return array|string
5971 18
   */
5972 14
  public static function substr_replace($str, $replacement, $start, $length = null)
5973 14
  {
5974 14
    if (is_array($str)) {
5975 8
      $num = count($str);
5976 8
5977 8
      // $replacement
5978
      if (is_array($replacement)) {
5979
        $replacement = array_slice($replacement, 0, $num);
5980 19
      } else {
5981
        $replacement = array_pad(array($replacement), $num, $replacement);
5982 9
      }
5983 3
5984 3
      // $start
5985 3 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5986 6
        $start = array_slice($start, 0, $num);
5987 6
        foreach ($start as &$valueTmp) {
5988 6
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5989
        }
5990
        unset($valueTmp);
5991 9
      } else {
5992 6
        $start = array_pad(array($start), $num, $start);
5993 6
      }
5994 6
5995
      // $length
5996
      if (!isset($length)) {
5997 20
        $length = array_fill(0, $num, 0);
5998 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5999 2
        $length = array_slice($length, 0, $num);
6000 2
        foreach ($length as &$valueTmpV2) {
6001
          if (isset($valueTmpV2)) {
6002
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6003 2
          } else {
6004 2
            $valueTmpV2 = 0;
6005 2
          }
6006
        }
6007
        unset($valueTmpV2);
6008 2
      } else {
6009 18
        $length = array_pad(array($length), $num, $length);
6010
      }
6011 20
6012
      // Recursive call
6013 20
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6014
    } else {
6015
      if (is_array($replacement)) {
6016 20
        if (count($replacement) > 0) {
6017 20
          $replacement = $replacement[0];
6018
        } else {
6019 3
          $replacement = '';
6020 20
        }
6021
      }
6022 20
    }
6023
6024
    preg_match_all('/./us', (string)$str, $smatches);
6025 20
    preg_match_all('/./us', (string)$replacement, $rmatches);
6026 20
6027 20
    if ($length === null) {
6028 2
      self::checkForSupport();
6029 20
6030
      $length = \mb_strlen($str);
6031 20
    }
6032
6033 20
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6034
6035
    return implode($smatches[0], null);
6036
  }
6037
6038
  /**
6039
   * Returns a case swapped version of the string.
6040
   *
6041
   * @param string $str
6042
   * @param string $encoding
6043 2
   *
6044
   * @return string each character's case swapped
6045 2
   */
6046
  public static function swapCase($str, $encoding = 'UTF-8')
6047 1
  {
6048
    $str = (string)$str;
6049 1
6050 1
    if (!isset($str[0])) {
6051
      return '';
6052 1
    }
6053 2
6054 2
    $encoding = self::normalizeEncoding($encoding);
6055
    $str = self::clean($str);
6056
6057
    $strSwappedCase = preg_replace_callback(
6058
        '/[\S]/u',
6059
        function ($match) use ($encoding) {
6060
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6061
6062
          if ($match[0] === $marchToUpper) {
6063
            return UTF8::strtolower($match[0], $encoding);
6064
          } else {
6065
            return $marchToUpper;
6066
          }
6067
        },
6068
        $str
6069
    );
6070
6071
    return $strSwappedCase;
6072
  }
6073 26
6074
  /**
6075 26
   * alias for "UTF8::to_ascii()"
6076
   *
6077 26
   * @param string $s The input string e.g. a UTF-8 String
6078 5
   * @param string $subst_chr
6079
   *
6080
   * @return string
6081
   */
6082 22
  public static function toAscii($s, $subst_chr = '?')
6083 6
  {
6084
    return self::to_ascii($s, $subst_chr);
6085
  }
6086 16
6087
  /**
6088
   * alias for "UTF8::to_latin1()"
6089
   *
6090
   * @param $str
6091
   *
6092
   * @return string
6093
   */
6094
  public static function toLatin1($str)
6095
  {
6096 14
    return self::to_latin1($str);
6097
  }
6098 14
6099
  /**
6100
   * alias for "UTF8::to_utf8"
6101
   *
6102
   * @param string $str
6103
   *
6104
   * @return string
6105
   */
6106
  public static function toUTF8($str)
6107
  {
6108
    return self::to_utf8($str);
6109
  }
6110
6111
  /**
6112
   * convert to ASCII
6113
   *
6114
   * @param string $s The input string e.g. a UTF-8 String
6115
   * @param string $subst_chr
6116
   *
6117
   * @return string
6118
   */
6119
  public static function to_ascii($s, $subst_chr = '?')
6120
  {
6121 8
    static $translitExtra = null;
6122
6123 8
    $s = (string)$s;
6124 2
6125
    if (!isset($s[0])) {
6126
      return '';
6127
    }
6128 7
6129 7
    $s = self::clean($s);
6130
6131 7
    if (preg_match("/[\x80-\xFF]/", $s)) {
6132 1
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6133 1
6134 7
      $glibc = 'glibc' === ICONV_IMPL;
6135
6136
      preg_match_all('/./u', $s, $s);
6137 7
6138
      /** @noinspection AlterInForeachInspection */
6139 7
      foreach ($s[0] as &$c) {
6140
6141
        if (!isset($c[1])) {
6142
          continue;
6143 1
        }
6144 1
6145 1
        if ($glibc) {
6146 7
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6147 7
        } else {
6148 7
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6149 7
6150 7
          if ($t !== false && is_string($t)) {
6151
            if (!isset($t[0])) {
6152 7
              $t = '?';
6153
            } elseif (isset($t[1])) {
6154
              $t = ltrim($t, '\'`"^~');
6155
            }
6156
          }
6157
        }
6158
6159
        if ('?' === $t) {
6160
6161
          if ($translitExtra === null) {
6162
            $translitExtra = (array)self::getData('translit_extra');
6163
          }
6164
6165
          if (isset($translitExtra[$c])) {
6166
            $t = $translitExtra[$c];
6167
          } else {
6168
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6169
6170
            if ($t[0] < "\x80") {
6171
              $t = $t[0];
6172 1
            } else {
6173
              $t = $subst_chr;
6174 1
            }
6175
          }
6176 1
        }
6177 1
6178
        if ('?' === $t) {
6179
          $t = self::str_transliterate($c, $subst_chr);
6180 1
        }
6181
6182 1
        $c = $t;
6183
      }
6184 1
6185 1
      $s = implode('', $s[0]);
6186 1
    }
6187 1
6188
    return $s;
6189 1
  }
6190 1
6191 1
  /**
6192
   * alias for "UTF8::to_win1252()"
6193 1
   *
6194
   * @param   string $str
6195
   *
6196
   * @return  array|string
6197
   */
6198
  public static function to_iso8859($str)
6199
  {
6200
    return self::to_win1252($str);
6201
  }
6202
6203
  /**
6204
   * alias for "UTF8::to_win1252()"
6205
   *
6206
   * @param string|array $str
6207
   *
6208
   * @return string|array
6209
   */
6210
  public static function to_latin1($str)
6211
  {
6212
    return self::to_win1252($str);
6213
  }
6214
6215
  /**
6216
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6217
   *
6218
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6219
   *
6220
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6221
   *
6222
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6223
   *    are followed by any of these:  ("group B")
6224
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6225
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6226
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6227
   * is also a valid unicode character, and will be left unchanged.
6228
   *
6229
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6230
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6231
   *
6232
   * @param string|array $str Any string or array.
6233
   *
6234
   * @return string The same string, but UTF8 encoded.
6235
   */
6236
  public static function to_utf8($str)
6237
  {
6238
    if (is_array($str)) {
6239
      foreach ($str as $k => $v) {
6240
        /** @noinspection AlterInForeachInspection */
6241
        $str[$k] = self::to_utf8($v);
6242
      }
6243
6244
      return $str;
6245
    }
6246
6247
    $str = (string)$str;
6248
6249
    if (!isset($str[0])) {
6250
      return $str;
6251
    }
6252
6253
    $max = strlen($str);
6254
    $buf = '';
6255
6256
    /** @noinspection ForeachInvariantsInspection */
6257
    for ($i = 0; $i < $max; $i++) {
6258
      $c1 = $str[$i];
6259
6260
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6261
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6262
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6263
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6264
6265
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6266
6267
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6268
            $buf .= $c1 . $c2;
6269
            $i++;
6270
          } else { // not valid UTF8 - convert it
6271
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6272
            $cc2 = ($c1 & "\x3f") | "\x80";
6273
            $buf .= $cc1 . $cc2;
6274
          }
6275
6276 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6277
6278
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6279
            $buf .= $c1 . $c2 . $c3;
6280
            $i += 2;
6281
          } else { // not valid UTF8 - convert it
6282
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6283
            $cc2 = ($c1 & "\x3f") | "\x80";
6284
            $buf .= $cc1 . $cc2;
6285
          }
6286
6287
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6288
6289 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6290
            $buf .= $c1 . $c2 . $c3 . $c4;
6291
            $i += 3;
6292
          } else { // not valid UTF8 - convert it
6293
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6294
            $cc2 = ($c1 & "\x3f") | "\x80";
6295
            $buf .= $cc1 . $cc2;
6296
          }
6297
6298
        } else { // doesn't look like UTF8, but should be converted
6299
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6300
          $cc2 = (($c1 & "\x3f") | "\x80");
6301
          $buf .= $cc1 . $cc2;
6302
        }
6303
6304
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6305
6306
        $ordC1 = ord($c1);
6307
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6308
          $buf .= self::$win1252ToUtf8[$ordC1];
6309
        } else {
6310
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6311
          $cc2 = (($c1 & "\x3f") | "\x80");
6312
          $buf .= $cc1 . $cc2;
6313
        }
6314
6315
      } else { // it doesn't need conversion
6316
        $buf .= $c1;
6317
      }
6318
    }
6319
6320
    self::checkForSupport();
6321
6322
    // decode unicode escape sequences
6323
    $buf = preg_replace_callback(
6324
        '/\\\\u([0-9a-f]{4})/i',
6325
        function ($match) {
6326
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6327
        },
6328
        $buf
6329
    );
6330
6331
    // decode UTF-8 codepoints
6332
    $buf = preg_replace_callback(
6333
        '/&#\d{2,4};/',
6334
        function ($match) {
6335
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6336
        },
6337
        $buf
6338
    );
6339
6340
    return $buf;
6341
  }
6342
6343
  /**
6344
   * Convert a string into "win1252"-encoding.
6345
   *
6346
   * @param  string|array $str
6347
   *
6348
   * @return string|array
6349
   */
6350
  protected static function to_win1252($str)
6351
  {
6352
    if (is_array($str)) {
6353
6354
      foreach ($str as $k => $v) {
6355
        /** @noinspection AlterInForeachInspection */
6356
        $str[$k] = self::to_win1252($v);
6357
      }
6358
6359
      return $str;
6360
    }
6361
6362
    $str = (string)$str;
6363
6364
    if (!isset($str[0])) {
6365
      return '';
6366
    }
6367
6368
    return self::utf8_decode($str);
6369
  }
6370
6371
  /**
6372
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6373
   *
6374
   * INFO: This is slower then "trim()"
6375
   *
6376
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6377
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6378
   *
6379
   * @param    string $str   The string to be trimmed
6380
   * @param    string $chars Optional characters to be stripped
6381
   *
6382
   * @return   string The trimmed string
6383
   */
6384
  public static function trim($str = '', $chars = INF)
6385
  {
6386
    $str = (string)$str;
6387
6388
    if (!isset($str[0])) {
6389
      return '';
6390
    }
6391
6392
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6393
    if ($chars === INF || !$chars) {
6394
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6395
    }
6396
6397
    return self::rtrim(self::ltrim($str, $chars), $chars);
6398
  }
6399
6400
  /**
6401
   * Makes string's first char uppercase.
6402
   *
6403
   * @param    string $str The input string
6404
   *
6405
   * @return   string The resulting string
6406
   */
6407
  public static function ucfirst($str)
6408
  {
6409
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6410
  }
6411
6412
  /**
6413
   * alias for "UTF8::ucfirst"
6414
   *
6415
   * @param $str
6416
   *
6417
   * @return string
6418
   */
6419
  public static function ucword($str)
6420
  {
6421
    return self::ucfirst($str);
6422
  }
6423
6424
  /**
6425
   * Uppercase for all words in the string.
6426
   *
6427
   * @param  string $str
6428
   * @param array   $exceptions
6429
   *
6430
   * @return string
6431
   */
6432
  public static function ucwords($str, $exceptions = array())
6433
  {
6434
    if (!$str) {
6435
      return '';
6436
    }
6437
6438
    // init
6439
    $words = explode(' ', $str);
6440 6
    $newwords = array();
6441
6442 6
    if (count($exceptions) > 0) {
6443 6
      $useExceptions = true;
6444
    } else {
6445 6
      $useExceptions = false;
6446
    }
6447 6
6448 5
    foreach ($words as $word) {
6449
      if (
6450
          ($useExceptions === false)
6451
          ||
6452 6
          (
6453
              $useExceptions === true
6454 6
              &&
6455
              !in_array($word, $exceptions, true)
6456 6
          )
6457 1
      ) {
6458 1
        $word = self::ucfirst($word);
6459 1
      }
6460
      $newwords[] = $word;
6461 6
    }
6462
6463
    return self::ucfirst(implode(' ', $newwords));
6464
  }
6465
6466
  /**
6467
   * Multi decode html entity & fix urlencoded-win1252-chars.
6468
   *
6469
   * e.g:
6470
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6471 6
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6472
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6473 6
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6474
   * 'Düsseldorf'                   => 'Düsseldorf'
6475 6
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6476 6
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6477
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6478
   *
6479 5
   * @param string $str
6480 5
   *
6481
   * @return string
6482 5
   */
6483 1
  public static function urldecode($str)
6484 1
  {
6485 1
    $str = (string)$str;
6486
6487 5
    if (!isset($str[0])) {
6488
      return '';
6489
    }
6490
6491
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6492
6493
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6494
6495
    $str = self::fix_simple_utf8(
6496
        rawurldecode(
6497
            self::html_entity_decode(
6498
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6499
                $flags
6500
            )
6501
        )
6502
    );
6503
6504
    return (string)$str;
6505
  }
6506
6507
  /**
6508
   * Return a array with "urlencoded"-win1252 -> UTF-8
6509
   *
6510
   * @return mixed
6511
   */
6512
  public static function urldecode_fix_win1252_chars()
6513
  {
6514
    static $array = array(
6515
        '%20' => ' ',
6516
        '%21' => '!',
6517
        '%22' => '"',
6518
        '%23' => '#',
6519 1
        '%24' => '$',
6520
        '%25' => '%',
6521 1
        '%26' => '&',
6522
        '%27' => "'",
6523
        '%28' => '(',
6524
        '%29' => ')',
6525
        '%2A' => '*',
6526
        '%2B' => '+',
6527
        '%2C' => ',',
6528
        '%2D' => '-',
6529
        '%2E' => '.',
6530
        '%2F' => '/',
6531
        '%30' => '0',
6532
        '%31' => '1',
6533 1
        '%32' => '2',
6534
        '%33' => '3',
6535 1
        '%34' => '4',
6536
        '%35' => '5',
6537
        '%36' => '6',
6538
        '%37' => '7',
6539 1
        '%38' => '8',
6540
        '%39' => '9',
6541 1
        '%3A' => ':',
6542
        '%3B' => ';',
6543
        '%3C' => '<',
6544 1
        '%3D' => '=',
6545 1
        '%3E' => '>',
6546 1
        '%3F' => '?',
6547 1
        '%40' => '@',
6548 1
        '%41' => 'A',
6549
        '%42' => 'B',
6550
        '%43' => 'C',
6551 1
        '%44' => 'D',
6552
        '%45' => 'E',
6553
        '%46' => 'F',
6554
        '%47' => 'G',
6555
        '%48' => 'H',
6556
        '%49' => 'I',
6557
        '%4A' => 'J',
6558
        '%4B' => 'K',
6559
        '%4C' => 'L',
6560
        '%4D' => 'M',
6561
        '%4E' => 'N',
6562
        '%4F' => 'O',
6563
        '%50' => 'P',
6564 4
        '%51' => 'Q',
6565
        '%52' => 'R',
6566 4
        '%53' => 'S',
6567
        '%54' => 'T',
6568
        '%55' => 'U',
6569
        '%56' => 'V',
6570 4
        '%57' => 'W',
6571 4
        '%58' => 'X',
6572 4
        '%59' => 'Y',
6573
        '%5A' => 'Z',
6574 4
        '%5B' => '[',
6575 4
        '%5C' => '\\',
6576 4
        '%5D' => ']',
6577 4
        '%5E' => '^',
6578
        '%5F' => '_',
6579 4
        '%60' => '`',
6580
        '%61' => 'a',
6581
        '%62' => 'b',
6582
        '%63' => 'c',
6583
        '%64' => 'd',
6584 4
        '%65' => 'e',
6585
        '%66' => 'f',
6586 4
        '%67' => 'g',
6587
        '%68' => 'h',
6588
        '%69' => 'i',
6589
        '%6A' => 'j',
6590
        '%6B' => 'k',
6591 4
        '%6C' => 'l',
6592 4
        '%6D' => 'm',
6593
        '%6E' => 'n',
6594 4
        '%6F' => 'o',
6595 4
        '%70' => 'p',
6596 4
        '%71' => 'q',
6597 4
        '%72' => 'r',
6598 4
        '%73' => 's',
6599
        '%74' => 't',
6600 4
        '%75' => 'u',
6601 4
        '%76' => 'v',
6602 4
        '%77' => 'w',
6603 4
        '%78' => 'x',
6604
        '%79' => 'y',
6605 4
        '%7A' => 'z',
6606 3
        '%7B' => '{',
6607 3
        '%7C' => '|',
6608 3
        '%7D' => '}',
6609 3
        '%7E' => '~',
6610
        '%7F' => '',
6611 3
        '%80' => '`',
6612
        '%81' => '',
6613
        '%82' => '‚',
6614
        '%83' => 'ƒ',
6615 3
        '%84' => '„',
6616 3
        '%85' => '…',
6617
        '%86' => '†',
6618 4
        '%87' => '‡',
6619
        '%88' => 'ˆ',
6620
        '%89' => '‰',
6621
        '%8A' => 'Š',
6622
        '%8B' => '‹',
6623
        '%8C' => 'Œ',
6624
        '%8D' => '',
6625
        '%8E' => 'Ž',
6626
        '%8F' => '',
6627
        '%90' => '',
6628
        '%91' => '‘',
6629
        '%92' => '’',
6630
        '%93' => '“',
6631
        '%94' => '”',
6632
        '%95' => '•',
6633
        '%96' => '–',
6634
        '%97' => '—',
6635
        '%98' => '˜',
6636
        '%99' => '™',
6637
        '%9A' => 'š',
6638
        '%9B' => '›',
6639
        '%9C' => 'œ',
6640
        '%9D' => '',
6641
        '%9E' => 'ž',
6642
        '%9F' => 'Ÿ',
6643
        '%A0' => '',
6644
        '%A1' => '¡',
6645
        '%A2' => '¢',
6646
        '%A3' => '£',
6647
        '%A4' => '¤',
6648
        '%A5' => '¥',
6649
        '%A6' => '¦',
6650
        '%A7' => '§',
6651
        '%A8' => '¨',
6652
        '%A9' => '©',
6653
        '%AA' => 'ª',
6654
        '%AB' => '«',
6655
        '%AC' => '¬',
6656
        '%AD' => '',
6657
        '%AE' => '®',
6658
        '%AF' => '¯',
6659
        '%B0' => '°',
6660
        '%B1' => '±',
6661
        '%B2' => '²',
6662
        '%B3' => '³',
6663
        '%B4' => '´',
6664
        '%B5' => 'µ',
6665
        '%B6' => '¶',
6666
        '%B7' => '·',
6667
        '%B8' => '¸',
6668
        '%B9' => '¹',
6669
        '%BA' => 'º',
6670
        '%BB' => '»',
6671
        '%BC' => '¼',
6672
        '%BD' => '½',
6673
        '%BE' => '¾',
6674
        '%BF' => '¿',
6675
        '%C0' => 'À',
6676
        '%C1' => 'Á',
6677
        '%C2' => 'Â',
6678
        '%C3' => 'Ã',
6679
        '%C4' => 'Ä',
6680
        '%C5' => 'Å',
6681
        '%C6' => 'Æ',
6682
        '%C7' => 'Ç',
6683
        '%C8' => 'È',
6684
        '%C9' => 'É',
6685
        '%CA' => 'Ê',
6686
        '%CB' => 'Ë',
6687
        '%CC' => 'Ì',
6688
        '%CD' => 'Í',
6689
        '%CE' => 'Î',
6690
        '%CF' => 'Ï',
6691
        '%D0' => 'Ð',
6692
        '%D1' => 'Ñ',
6693
        '%D2' => 'Ò',
6694
        '%D3' => 'Ó',
6695
        '%D4' => 'Ô',
6696
        '%D5' => 'Õ',
6697
        '%D6' => 'Ö',
6698
        '%D7' => '×',
6699
        '%D8' => 'Ø',
6700
        '%D9' => 'Ù',
6701
        '%DA' => 'Ú',
6702
        '%DB' => 'Û',
6703
        '%DC' => 'Ü',
6704
        '%DD' => 'Ý',
6705
        '%DE' => 'Þ',
6706
        '%DF' => 'ß',
6707
        '%E0' => 'à',
6708
        '%E1' => 'á',
6709
        '%E2' => 'â',
6710
        '%E3' => 'ã',
6711
        '%E4' => 'ä',
6712
        '%E5' => 'å',
6713
        '%E6' => 'æ',
6714
        '%E7' => 'ç',
6715
        '%E8' => 'è',
6716
        '%E9' => 'é',
6717
        '%EA' => 'ê',
6718
        '%EB' => 'ë',
6719
        '%EC' => 'ì',
6720
        '%ED' => 'í',
6721
        '%EE' => 'î',
6722
        '%EF' => 'ï',
6723
        '%F0' => 'ð',
6724
        '%F1' => 'ñ',
6725
        '%F2' => 'ò',
6726
        '%F3' => 'ó',
6727
        '%F4' => 'ô',
6728
        '%F5' => 'õ',
6729
        '%F6' => 'ö',
6730
        '%F7' => '÷',
6731
        '%F8' => 'ø',
6732
        '%F9' => 'ù',
6733
        '%FA' => 'ú',
6734
        '%FB' => 'û',
6735
        '%FC' => 'ü',
6736
        '%FD' => 'ý',
6737
        '%FE' => 'þ',
6738
        '%FF' => 'ÿ',
6739
    );
6740
6741
    return $array;
6742
  }
6743
6744
  /**
6745
   * Decodes an UTF-8 string to ISO-8859-1.
6746
   *
6747
   * @param string $str
6748
   *
6749
   * @return string
6750
   */
6751
  public static function utf8_decode($str)
6752
  {
6753
    static $utf8ToWin1252Keys = null;
6754
    static $utf8ToWin1252Values = null;
6755
6756
    $str = (string)$str;
6757
6758
    if (!isset($str[0])) {
6759
      return '';
6760
    }
6761
6762
    // init
6763
    self::checkForSupport();
6764
6765
    $str = self::to_utf8($str);
6766
6767
    if ($utf8ToWin1252Keys === null) {
6768
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6769
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6770
    }
6771
6772
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6773
  }
6774
6775
  /**
6776
   * Encodes an ISO-8859-1 string to UTF-8.
6777
   *
6778
   * @param string $str
6779
   *
6780
   * @return string
6781
   */
6782
  public static function utf8_encode($str)
6783
  {
6784
    $str = \utf8_encode($str);
6785
6786
    if (false === strpos($str, "\xC2")) {
6787
      return $str;
6788
    } else {
6789
6790
      static $cp1252ToUtf8Keys = null;
6791
      static $cp1252ToUtf8Values = null;
6792
6793
      if ($cp1252ToUtf8Keys === null) {
6794
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6795
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6796
      }
6797
6798
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6799
    }
6800
  }
6801
6802
  /**
6803
   * fix -> utf8-win1252 chars
6804
   *
6805
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6806
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6807
   * See: http://en.wikipedia.org/wiki/Windows-1252
6808
   *
6809
   * @deprecated use "UTF8::fix_simple_utf8()"
6810
   *
6811
   * @param   string $str
6812
   *
6813
   * @return  string
6814
   */
6815
  public static function utf8_fix_win1252_chars($str)
6816
  {
6817
    return self::fix_simple_utf8($str);
6818
  }
6819
6820
  /**
6821
   * Returns an array with all utf8 whitespace characters.
6822
   *
6823
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6824
   *
6825
   * @author: Derek E. [email protected]
6826
   *
6827
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6828
   *         as defined in above URL
6829
   */
6830
  public static function whitespace_table()
6831
  {
6832
    return self::$whitespaceTable;
6833
  }
6834
6835
  /**
6836
   * Limit the number of words in a string.
6837
   *
6838
   * @param  string $str
6839
   * @param  int    $words
6840
   * @param  string $strAddOn
6841
   *
6842
   * @return string
6843
   */
6844
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6845
  {
6846
    $str = (string)$str;
6847
6848
    if (!isset($str[0])) {
6849
      return '';
6850
    }
6851
6852
    $words = (int)$words;
6853
6854
    if ($words < 1) {
6855
      return '';
6856
    }
6857
6858
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6859
6860
    if (
6861
        !isset($matches[0])
6862
        ||
6863
        self::strlen($str) === self::strlen($matches[0])
6864
    ) {
6865
      return $str;
6866
    }
6867
6868
    return self::rtrim($matches[0]) . $strAddOn;
6869
  }
6870
6871
  /**
6872
   * Wraps a string to a given number of characters
6873
   *
6874
   * @link  http://php.net/manual/en/function.wordwrap.php
6875
   *
6876
   * @param string $str   <p>
6877
   *                      The input string.
6878
   *                      </p>
6879
   * @param int    $width [optional] <p>
6880
   *                      The column width.
6881
   *                      </p>
6882
   * @param string $break [optional] <p>
6883
   *                      The line is broken using the optional
6884
   *                      break parameter.
6885
   *                      </p>
6886
   * @param bool   $cut   [optional] <p>
6887
   *                      If the cut is set to true, the string is
6888
   *                      always wrapped at or before the specified width. So if you have
6889
   *                      a word that is larger than the given width, it is broken apart.
6890
   *                      (See second example).
6891
   *                      </p>
6892
   *
6893
   * @return string the given string wrapped at the specified column.
6894
   * @since 4.0.2
6895
   * @since 5.0
6896
   */
6897
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6898
  {
6899
    $str = (string)$str;
6900
    $break = (string)$break;
6901
6902
    if (!isset($str[0], $break[0])) {
6903
      return '';
6904
    }
6905
6906
    $w = '';
6907
    $strSplit = explode($break, $str);
6908
    $count = count($strSplit);
6909
6910
    if (1 === $count && '' === $strSplit[0]) {
6911
      return '';
6912
    }
6913
6914
    $chars = array();
6915
    /** @noinspection ForeachInvariantsInspection */
6916
    for ($i = 0; $i < $count; ++$i) {
6917
6918
      if ($i) {
6919
        $chars[] = $break;
6920
        $w .= '#';
6921
      }
6922
6923
      $c = $strSplit[$i];
6924
      unset($strSplit[$i]);
6925
6926
      foreach (self::split($c) as $c) {
6927
        $chars[] = $c;
6928
        $w .= ' ' === $c ? ' ' : '?';
6929
      }
6930
    }
6931
6932
    $strReturn = '';
6933
    $j = 0;
6934
    $b = $i = -1;
6935
    $w = wordwrap($w, $width, '#', $cut);
6936
6937
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6938
      for (++$i; $i < $b; ++$i) {
6939
        $strReturn .= $chars[$j];
6940
        unset($chars[$j++]);
6941
      }
6942
6943
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6944
        unset($chars[$j++]);
6945
      }
6946
6947
      $strReturn .= $break;
6948
    }
6949
6950
    return $strReturn . implode('', $chars);
6951
  }
6952
6953
  /**
6954
   * Returns an array of Unicode White Space characters.
6955
   *
6956
   * @return   array An array with numeric code point as key and White Space Character as value.
6957
   */
6958
  public static function ws()
6959
  {
6960
    return self::$whitespace;
6961
  }
6962
6963
}
6964