Completed
Push — master ( de7a7e...42bf5d )
by Lars
04:19
created

UTF8::chr()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 20
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 4

Importance

Changes 7
Bugs 1 Features 1
Metric Value
c 7
b 1
f 1
dl 0
loc 20
ccs 3
cts 3
cp 1
rs 9.2
cc 4
eloc 10
nc 5
nop 1
crap 4
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  protected static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  protected static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  protected static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  protected static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  protected static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  protected static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  protected static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  protected static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790 1
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792 1
      'ISO-IR-230',
793 1
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803 1
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809
    self::checkForSupport();
810
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820
  public static function access($str, $pos)
821
  {
822
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833 2
   */
834
  public static function add_bom_to_string($str)
835 2
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Returns the UTF-8 Byte Order Mark Character.
845
   *
846 1
   * @return string UTF-8 Byte Order Mark
847
   */
848 1
  public static function bom()
849
  {
850
    return "\xEF\xBB\xBF";
851
  }
852
853
  /**
854
   * @alias of UTF8::chr_map()
855
   *
856
   * @param string|array $callback
857
   * @param string       $str
858
   *
859
   * @return array
860
   */
861
  public static function callback($callback, $str)
862
  {
863
    return self::chr_map($callback, $str);
864
  }
865
866
  /**
867
   * Returns an array of all lower and upper case UTF-8 encoded characters.
868
   *
869
   * @return   string An array with lower case chars as keys and upper chars as values.
870
   */
871
  protected static function case_table()
872
  {
873
    static $case = array(
874
875
      // lower => upper
876
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
877
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
878
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
879
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
880
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
881
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
882
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
883
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
884
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
885
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
886
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
887
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
888
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
889
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
890
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
891
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
892
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
893
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
894
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
895
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
896
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
897
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
898
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
899
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
900
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
901
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
902
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
903
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
904
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
905
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
906
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
907
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
908
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
909
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
910
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
911
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
912
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
913
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
914
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
915
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
916
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
917
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
918
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
919
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
920
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
921
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
922
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
923
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
924
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
925
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
926
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
927
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
928
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
929
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
930
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
931
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
932
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
933
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
934
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
935
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
936
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
937
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
938
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
939
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
940
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
941
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
942
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
943
      "\xea\x9e\x87"     => "\xea\x9e\x86",
944
      "\xea\x9e\x85"     => "\xea\x9e\x84",
945
      "\xea\x9e\x83"     => "\xea\x9e\x82",
946
      "\xea\x9e\x81"     => "\xea\x9e\x80",
947
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
948
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
949
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
950
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
951
      "\xea\x9d\xad"     => "\xea\x9d\xac",
952
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
953
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
954
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
955
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
956
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
957
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
958
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
959
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
960
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
961
      "\xea\x9d\x99"     => "\xea\x9d\x98",
962
      "\xea\x9d\x97"     => "\xea\x9d\x96",
963
      "\xea\x9d\x95"     => "\xea\x9d\x94",
964
      "\xea\x9d\x93"     => "\xea\x9d\x92",
965
      "\xea\x9d\x91"     => "\xea\x9d\x90",
966
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
967
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
968
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
969
      "\xea\x9d\x89"     => "\xea\x9d\x88",
970
      "\xea\x9d\x87"     => "\xea\x9d\x86",
971
      "\xea\x9d\x85"     => "\xea\x9d\x84",
972
      "\xea\x9d\x83"     => "\xea\x9d\x82",
973
      "\xea\x9d\x81"     => "\xea\x9d\x80",
974
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
975
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
976
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
977
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
978
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
979
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
980
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
981
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
982
      "\xea\x9c\xad"     => "\xea\x9c\xac",
983
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
984
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
985
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
986
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
987
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
988
      "\xea\x9a\x97"     => "\xea\x9a\x96",
989
      "\xea\x9a\x95"     => "\xea\x9a\x94",
990
      "\xea\x9a\x93"     => "\xea\x9a\x92",
991
      "\xea\x9a\x91"     => "\xea\x9a\x90",
992
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
993
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
994
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
995
      "\xea\x9a\x89"     => "\xea\x9a\x88",
996
      "\xea\x9a\x87"     => "\xea\x9a\x86",
997
      "\xea\x9a\x85"     => "\xea\x9a\x84",
998
      "\xea\x9a\x83"     => "\xea\x9a\x82",
999
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1000
      "\xea\x99\xad"     => "\xea\x99\xac",
1001
      "\xea\x99\xab"     => "\xea\x99\xaa",
1002
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1003
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1004
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1005
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1006
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1007
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1008
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1009
      "\xea\x99\x99"     => "\xea\x99\x98",
1010
      "\xea\x99\x97"     => "\xea\x99\x96",
1011
      "\xea\x99\x95"     => "\xea\x99\x94",
1012
      "\xea\x99\x93"     => "\xea\x99\x92",
1013
      "\xea\x99\x91"     => "\xea\x99\x90",
1014
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1015
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1016
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1017
      "\xea\x99\x89"     => "\xea\x99\x88",
1018
      "\xea\x99\x87"     => "\xea\x99\x86",
1019
      "\xea\x99\x85"     => "\xea\x99\x84",
1020
      "\xea\x99\x83"     => "\xea\x99\x82",
1021
      "\xea\x99\x81"     => "\xea\x99\x80",
1022
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1023
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1024
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1025
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1026
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1027
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1028
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1029
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1030
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1031
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1032
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1033
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1034
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1035
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1036
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1037
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1038
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1039
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1040
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1041
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1042
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1043
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1044
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1045
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1046
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1047
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1048
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1049
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1050
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1051
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1052
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1053
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1054
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1055
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1056
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1057
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1058
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1059
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1060
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1061
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1062
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1063
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1064
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1065
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1066
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1067
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1068
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1069
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1070
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1071
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1072
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1073
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1074
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1075
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1076
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1077
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1078
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1079
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1080
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1081
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1082
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1083
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1084
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1085
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1086
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1087
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1088
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1089
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1090
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1091
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1092
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1093
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1094
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1095
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1096
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1097
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1098
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1099
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1100
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1101
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1102
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1103
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1104
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1105
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1106
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1107
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1108
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1109
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1110
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1111
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1112
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1113
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1114
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1115
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1116
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1117
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1118
      "\xe2\xb1\xa5"     => "\xc8\xba",
1119
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1120
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1121
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1122
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1123
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1124
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1125
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1126
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1127
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1128
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1129
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1130
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1131
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1132
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1133
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1134
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1135
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1136
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1137
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1138
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1139
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1140
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1141
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1142
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1143
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1144
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1145
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1146
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1147
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1148
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1149
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1150
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1151
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1152
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1153
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1154
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1155
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1156
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1157
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1158
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1159
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1160
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1161
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1162
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1163
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1164
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1165
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1166
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1167
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1168
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1169
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1170
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1171
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1172
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1173
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1174
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1175
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1176
      "\xe1\xbe\xbe"     => "\xce\x99",
1177
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1178
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1179
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1180
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1181
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1182
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1183
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1184
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1185
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1186
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1187
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1188
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1189
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1190
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1191
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1192
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1193
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1194
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1195
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1196
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1197
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1198
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1199
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1200
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1201
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1202
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1203
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1204
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1205
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1206
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1207
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1208
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1209
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1210
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1211
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1212
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1213
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1214
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1215
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1216
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1217
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1218
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1219
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1220
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1221
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1222
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1223
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1224
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1225
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1226
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1227
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1228
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1229
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1230
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1231
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1232
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1233
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1234
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1235
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1236
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1237
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1238
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1239
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1240
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1241
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1242
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1243
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1244
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1245
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1246
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1247
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1248
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1249
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1250
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1251
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1252
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1253
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1254
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1255
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1256
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1257
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1258
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1259
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1260
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1261
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1262
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1263
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1264
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1265
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1266
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1267
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1268
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1269
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1270
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1271
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1272
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1273
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1274
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1275
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1276
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1277
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1278
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1279
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1280
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1281
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1282
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1283
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1284
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1285
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1286
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1287
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1288
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1289
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1290
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1291
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1292
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1293
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1294
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1295
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1296
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1297
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1298
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1299
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1300
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1301
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1302
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1303
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1304
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1305
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1306
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1307
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1308
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1309
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1310
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1311
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1312
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1313
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1314
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1315
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1316
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1317
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1318
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1319
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1320
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1321
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1322
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1323
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1324
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1325
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1326
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1327
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1328
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1329
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1330
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1331
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1332
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1333
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1334
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1335
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1336
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1337
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1338
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1339
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1340
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1341
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1342
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1343
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1344
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1345
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1346
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1347
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1348
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1349
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1350
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1351
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1352
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1353
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1354
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1355
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1356
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1357
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1358
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1359
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1360
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1361
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1362
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1363
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1364
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1365
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1366
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1367
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1368
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1369
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1370
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1371
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1372
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1373
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1374
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1375
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1376
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1377
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1378
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1379
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1380
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1381
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1382
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1383
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1384
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1385
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1386
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1387
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1388
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1389
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1390
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1391
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1392
      "\xd6\x86"         => "\xd5\x96",
1393
      "\xd6\x85"         => "\xd5\x95",
1394
      "\xd6\x84"         => "\xd5\x94",
1395
      "\xd6\x83"         => "\xd5\x93",
1396
      "\xd6\x82"         => "\xd5\x92",
1397
      "\xd6\x81"         => "\xd5\x91",
1398
      "\xd6\x80"         => "\xd5\x90",
1399
      "\xd5\xbf"         => "\xd5\x8f",
1400
      "\xd5\xbe"         => "\xd5\x8e",
1401
      "\xd5\xbd"         => "\xd5\x8d",
1402
      "\xd5\xbc"         => "\xd5\x8c",
1403
      "\xd5\xbb"         => "\xd5\x8b",
1404
      "\xd5\xba"         => "\xd5\x8a",
1405
      "\xd5\xb9"         => "\xd5\x89",
1406
      "\xd5\xb8"         => "\xd5\x88",
1407
      "\xd5\xb7"         => "\xd5\x87",
1408
      "\xd5\xb6"         => "\xd5\x86",
1409
      "\xd5\xb5"         => "\xd5\x85",
1410
      "\xd5\xb4"         => "\xd5\x84",
1411
      "\xd5\xb3"         => "\xd5\x83",
1412
      "\xd5\xb2"         => "\xd5\x82",
1413
      "\xd5\xb1"         => "\xd5\x81",
1414
      "\xd5\xb0"         => "\xd5\x80",
1415
      "\xd5\xaf"         => "\xd4\xbf",
1416
      "\xd5\xae"         => "\xd4\xbe",
1417
      "\xd5\xad"         => "\xd4\xbd",
1418
      "\xd5\xac"         => "\xd4\xbc",
1419
      "\xd5\xab"         => "\xd4\xbb",
1420
      "\xd5\xaa"         => "\xd4\xba",
1421
      "\xd5\xa9"         => "\xd4\xb9",
1422
      "\xd5\xa8"         => "\xd4\xb8",
1423
      "\xd5\xa7"         => "\xd4\xb7",
1424
      "\xd5\xa6"         => "\xd4\xb6",
1425
      "\xd5\xa5"         => "\xd4\xb5",
1426
      "\xd5\xa4"         => "\xd4\xb4",
1427
      "\xd5\xa3"         => "\xd4\xb3",
1428
      "\xd5\xa2"         => "\xd4\xb2",
1429
      "\xd5\xa1"         => "\xd4\xb1",
1430
      "\xd4\xa5"         => "\xd4\xa4",
1431
      "\xd4\xa3"         => "\xd4\xa2",
1432
      "\xd4\xa1"         => "\xd4\xa0",
1433
      "\xd4\x9f"         => "\xd4\x9e",
1434
      "\xd4\x9d"         => "\xd4\x9c",
1435
      "\xd4\x9b"         => "\xd4\x9a",
1436
      "\xd4\x99"         => "\xd4\x98",
1437
      "\xd4\x97"         => "\xd4\x96",
1438
      "\xd4\x95"         => "\xd4\x94",
1439
      "\xd4\x93"         => "\xd4\x92",
1440
      "\xd4\x91"         => "\xd4\x90",
1441
      "\xd4\x8f"         => "\xd4\x8e",
1442
      "\xd4\x8d"         => "\xd4\x8c",
1443
      "\xd4\x8b"         => "\xd4\x8a",
1444
      "\xd4\x89"         => "\xd4\x88",
1445
      "\xd4\x87"         => "\xd4\x86",
1446
      "\xd4\x85"         => "\xd4\x84",
1447
      "\xd4\x83"         => "\xd4\x82",
1448
      "\xd4\x81"         => "\xd4\x80",
1449
      "\xd3\xbf"         => "\xd3\xbe",
1450
      "\xd3\xbd"         => "\xd3\xbc",
1451
      "\xd3\xbb"         => "\xd3\xba",
1452
      "\xd3\xb9"         => "\xd3\xb8",
1453
      "\xd3\xb7"         => "\xd3\xb6",
1454
      "\xd3\xb5"         => "\xd3\xb4",
1455
      "\xd3\xb3"         => "\xd3\xb2",
1456
      "\xd3\xb1"         => "\xd3\xb0",
1457
      "\xd3\xaf"         => "\xd3\xae",
1458
      "\xd3\xad"         => "\xd3\xac",
1459
      "\xd3\xab"         => "\xd3\xaa",
1460
      "\xd3\xa9"         => "\xd3\xa8",
1461
      "\xd3\xa7"         => "\xd3\xa6",
1462
      "\xd3\xa5"         => "\xd3\xa4",
1463
      "\xd3\xa3"         => "\xd3\xa2",
1464
      "\xd3\xa1"         => "\xd3\xa0",
1465
      "\xd3\x9f"         => "\xd3\x9e",
1466
      "\xd3\x9d"         => "\xd3\x9c",
1467
      "\xd3\x9b"         => "\xd3\x9a",
1468
      "\xd3\x99"         => "\xd3\x98",
1469
      "\xd3\x97"         => "\xd3\x96",
1470
      "\xd3\x95"         => "\xd3\x94",
1471
      "\xd3\x93"         => "\xd3\x92",
1472
      "\xd3\x91"         => "\xd3\x90",
1473
      "\xd3\x8f"         => "\xd3\x80",
1474
      "\xd3\x8e"         => "\xd3\x8d",
1475
      "\xd3\x8c"         => "\xd3\x8b",
1476
      "\xd3\x8a"         => "\xd3\x89",
1477
      "\xd3\x88"         => "\xd3\x87",
1478
      "\xd3\x86"         => "\xd3\x85",
1479
      "\xd3\x84"         => "\xd3\x83",
1480
      "\xd3\x82"         => "\xd3\x81",
1481
      "\xd2\xbf"         => "\xd2\xbe",
1482
      "\xd2\xbd"         => "\xd2\xbc",
1483
      "\xd2\xbb"         => "\xd2\xba",
1484
      "\xd2\xb9"         => "\xd2\xb8",
1485
      "\xd2\xb7"         => "\xd2\xb6",
1486
      "\xd2\xb5"         => "\xd2\xb4",
1487
      "\xd2\xb3"         => "\xd2\xb2",
1488
      "\xd2\xb1"         => "\xd2\xb0",
1489
      "\xd2\xaf"         => "\xd2\xae",
1490
      "\xd2\xad"         => "\xd2\xac",
1491
      "\xd2\xab"         => "\xd2\xaa",
1492
      "\xd2\xa9"         => "\xd2\xa8",
1493
      "\xd2\xa7"         => "\xd2\xa6",
1494
      "\xd2\xa5"         => "\xd2\xa4",
1495
      "\xd2\xa3"         => "\xd2\xa2",
1496
      "\xd2\xa1"         => "\xd2\xa0",
1497
      "\xd2\x9f"         => "\xd2\x9e",
1498
      "\xd2\x9d"         => "\xd2\x9c",
1499
      "\xd2\x9b"         => "\xd2\x9a",
1500
      "\xd2\x99"         => "\xd2\x98",
1501
      "\xd2\x97"         => "\xd2\x96",
1502
      "\xd2\x95"         => "\xd2\x94",
1503
      "\xd2\x93"         => "\xd2\x92",
1504
      "\xd2\x91"         => "\xd2\x90",
1505
      "\xd2\x8f"         => "\xd2\x8e",
1506
      "\xd2\x8d"         => "\xd2\x8c",
1507
      "\xd2\x8b"         => "\xd2\x8a",
1508
      "\xd2\x81"         => "\xd2\x80",
1509
      "\xd1\xbf"         => "\xd1\xbe",
1510
      "\xd1\xbd"         => "\xd1\xbc",
1511
      "\xd1\xbb"         => "\xd1\xba",
1512
      "\xd1\xb9"         => "\xd1\xb8",
1513
      "\xd1\xb7"         => "\xd1\xb6",
1514
      "\xd1\xb5"         => "\xd1\xb4",
1515
      "\xd1\xb3"         => "\xd1\xb2",
1516
      "\xd1\xb1"         => "\xd1\xb0",
1517
      "\xd1\xaf"         => "\xd1\xae",
1518
      "\xd1\xad"         => "\xd1\xac",
1519
      "\xd1\xab"         => "\xd1\xaa",
1520
      "\xd1\xa9"         => "\xd1\xa8",
1521
      "\xd1\xa7"         => "\xd1\xa6",
1522
      "\xd1\xa5"         => "\xd1\xa4",
1523
      "\xd1\xa3"         => "\xd1\xa2",
1524
      "\xd1\xa1"         => "\xd1\xa0",
1525
      "\xd1\x9f"         => "\xd0\x8f",
1526
      "\xd1\x9e"         => "\xd0\x8e",
1527
      "\xd1\x9d"         => "\xd0\x8d",
1528
      "\xd1\x9c"         => "\xd0\x8c",
1529
      "\xd1\x9b"         => "\xd0\x8b",
1530
      "\xd1\x9a"         => "\xd0\x8a",
1531
      "\xd1\x99"         => "\xd0\x89",
1532
      "\xd1\x98"         => "\xd0\x88",
1533
      "\xd1\x97"         => "\xd0\x87",
1534
      "\xd1\x96"         => "\xd0\x86",
1535
      "\xd1\x95"         => "\xd0\x85",
1536
      "\xd1\x94"         => "\xd0\x84",
1537
      "\xd1\x93"         => "\xd0\x83",
1538
      "\xd1\x92"         => "\xd0\x82",
1539
      "\xd1\x91"         => "\xd0\x81",
1540
      "\xd1\x90"         => "\xd0\x80",
1541
      "\xd1\x8f"         => "\xd0\xaf",
1542
      "\xd1\x8e"         => "\xd0\xae",
1543
      "\xd1\x8d"         => "\xd0\xad",
1544
      "\xd1\x8c"         => "\xd0\xac",
1545
      "\xd1\x8b"         => "\xd0\xab",
1546
      "\xd1\x8a"         => "\xd0\xaa",
1547
      "\xd1\x89"         => "\xd0\xa9",
1548
      "\xd1\x88"         => "\xd0\xa8",
1549
      "\xd1\x87"         => "\xd0\xa7",
1550
      "\xd1\x86"         => "\xd0\xa6",
1551
      "\xd1\x85"         => "\xd0\xa5",
1552
      "\xd1\x84"         => "\xd0\xa4",
1553
      "\xd1\x83"         => "\xd0\xa3",
1554
      "\xd1\x82"         => "\xd0\xa2",
1555
      "\xd1\x81"         => "\xd0\xa1",
1556
      "\xd1\x80"         => "\xd0\xa0",
1557
      "\xd0\xbf"         => "\xd0\x9f",
1558
      "\xd0\xbe"         => "\xd0\x9e",
1559
      "\xd0\xbd"         => "\xd0\x9d",
1560
      "\xd0\xbc"         => "\xd0\x9c",
1561
      "\xd0\xbb"         => "\xd0\x9b",
1562
      "\xd0\xba"         => "\xd0\x9a",
1563
      "\xd0\xb9"         => "\xd0\x99",
1564
      "\xd0\xb8"         => "\xd0\x98",
1565
      "\xd0\xb7"         => "\xd0\x97",
1566
      "\xd0\xb6"         => "\xd0\x96",
1567
      "\xd0\xb5"         => "\xd0\x95",
1568
      "\xd0\xb4"         => "\xd0\x94",
1569
      "\xd0\xb3"         => "\xd0\x93",
1570
      "\xd0\xb2"         => "\xd0\x92",
1571
      "\xd0\xb1"         => "\xd0\x91",
1572
      "\xd0\xb0"         => "\xd0\x90",
1573
      "\xcf\xbb"         => "\xcf\xba",
1574
      "\xcf\xb8"         => "\xcf\xb7",
1575
      "\xcf\xb5"         => "\xce\x95",
1576
      "\xcf\xb2"         => "\xcf\xb9",
1577
      "\xcf\xb1"         => "\xce\xa1",
1578
      "\xcf\xb0"         => "\xce\x9a",
1579
      "\xcf\xaf"         => "\xcf\xae",
1580
      "\xcf\xad"         => "\xcf\xac",
1581
      "\xcf\xab"         => "\xcf\xaa",
1582
      "\xcf\xa9"         => "\xcf\xa8",
1583
      "\xcf\xa7"         => "\xcf\xa6",
1584
      "\xcf\xa5"         => "\xcf\xa4",
1585
      "\xcf\xa3"         => "\xcf\xa2",
1586
      "\xcf\xa1"         => "\xcf\xa0",
1587
      "\xcf\x9f"         => "\xcf\x9e",
1588
      "\xcf\x9d"         => "\xcf\x9c",
1589
      "\xcf\x9b"         => "\xcf\x9a",
1590
      "\xcf\x99"         => "\xcf\x98",
1591
      "\xcf\x97"         => "\xcf\x8f",
1592
      "\xcf\x96"         => "\xce\xa0",
1593
      "\xcf\x95"         => "\xce\xa6",
1594
      "\xcf\x91"         => "\xce\x98",
1595
      "\xcf\x90"         => "\xce\x92",
1596
      "\xcf\x8e"         => "\xce\x8f",
1597
      "\xcf\x8d"         => "\xce\x8e",
1598
      "\xcf\x8c"         => "\xce\x8c",
1599
      "\xcf\x8b"         => "\xce\xab",
1600
      "\xcf\x8a"         => "\xce\xaa",
1601
      "\xcf\x89"         => "\xce\xa9",
1602
      "\xcf\x88"         => "\xce\xa8",
1603
      "\xcf\x87"         => "\xce\xa7",
1604
      "\xcf\x86"         => "\xce\xa6",
1605
      "\xcf\x85"         => "\xce\xa5",
1606
      "\xcf\x84"         => "\xce\xa4",
1607
      "\xcf\x83"         => "\xce\xa3",
1608
      "\xcf\x82"         => "\xce\xa3",
1609
      "\xcf\x81"         => "\xce\xa1",
1610
      "\xcf\x80"         => "\xce\xa0",
1611
      "\xce\xbf"         => "\xce\x9f",
1612
      "\xce\xbe"         => "\xce\x9e",
1613
      "\xce\xbd"         => "\xce\x9d",
1614
      "\xce\xbc"         => "\xce\x9c",
1615
      "\xce\xbb"         => "\xce\x9b",
1616
      "\xce\xba"         => "\xce\x9a",
1617
      "\xce\xb9"         => "\xce\x99",
1618
      "\xce\xb8"         => "\xce\x98",
1619
      "\xce\xb7"         => "\xce\x97",
1620
      "\xce\xb6"         => "\xce\x96",
1621
      "\xce\xb5"         => "\xce\x95",
1622
      "\xce\xb4"         => "\xce\x94",
1623
      "\xce\xb3"         => "\xce\x93",
1624
      "\xce\xb2"         => "\xce\x92",
1625
      "\xce\xb1"         => "\xce\x91",
1626
      "\xce\xaf"         => "\xce\x8a",
1627
      "\xce\xae"         => "\xce\x89",
1628
      "\xce\xad"         => "\xce\x88",
1629
      "\xce\xac"         => "\xce\x86",
1630
      "\xcd\xbd"         => "\xcf\xbf",
1631
      "\xcd\xbc"         => "\xcf\xbe",
1632
      "\xcd\xbb"         => "\xcf\xbd",
1633
      "\xcd\xb7"         => "\xcd\xb6",
1634
      "\xcd\xb3"         => "\xcd\xb2",
1635
      "\xcd\xb1"         => "\xcd\xb0",
1636
      "\xca\x92"         => "\xc6\xb7",
1637
      "\xca\x8c"         => "\xc9\x85",
1638
      "\xca\x8b"         => "\xc6\xb2",
1639
      "\xca\x8a"         => "\xc6\xb1",
1640
      "\xca\x89"         => "\xc9\x84",
1641
      "\xca\x88"         => "\xc6\xae",
1642
      "\xca\x83"         => "\xc6\xa9",
1643
      "\xca\x80"         => "\xc6\xa6",
1644
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1645
      "\xc9\xb5"         => "\xc6\x9f",
1646
      "\xc9\xb2"         => "\xc6\x9d",
1647
      "\xc9\xb1"         => "\xe2\xb1\xae",
1648
      "\xc9\xaf"         => "\xc6\x9c",
1649
      "\xc9\xab"         => "\xe2\xb1\xa2",
1650
      "\xc9\xa9"         => "\xc6\x96",
1651
      "\xc9\xa8"         => "\xc6\x97",
1652
      "\xc9\xa5"         => "\xea\x9e\x8d",
1653
      "\xc9\xa3"         => "\xc6\x94",
1654
      "\xc9\xa0"         => "\xc6\x93",
1655
      "\xc9\x9b"         => "\xc6\x90",
1656
      "\xc9\x99"         => "\xc6\x8f",
1657
      "\xc9\x97"         => "\xc6\x8a",
1658
      "\xc9\x96"         => "\xc6\x89",
1659
      "\xc9\x94"         => "\xc6\x86",
1660
      "\xc9\x93"         => "\xc6\x81",
1661
      "\xc9\x92"         => "\xe2\xb1\xb0",
1662
      "\xc9\x91"         => "\xe2\xb1\xad",
1663
      "\xc9\x90"         => "\xe2\xb1\xaf",
1664
      "\xc9\x8f"         => "\xc9\x8e",
1665
      "\xc9\x8d"         => "\xc9\x8c",
1666
      "\xc9\x8b"         => "\xc9\x8a",
1667
      "\xc9\x89"         => "\xc9\x88",
1668
      "\xc9\x87"         => "\xc9\x86",
1669
      "\xc9\x82"         => "\xc9\x81",
1670
      "\xc9\x80"         => "\xe2\xb1\xbf",
1671
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1672
      "\xc8\xbc"         => "\xc8\xbb",
1673
      "\xc8\xb3"         => "\xc8\xb2",
1674
      "\xc8\xb1"         => "\xc8\xb0",
1675
      "\xc8\xaf"         => "\xc8\xae",
1676
      "\xc8\xad"         => "\xc8\xac",
1677
      "\xc8\xab"         => "\xc8\xaa",
1678
      "\xc8\xa9"         => "\xc8\xa8",
1679
      "\xc8\xa7"         => "\xc8\xa6",
1680
      "\xc8\xa5"         => "\xc8\xa4",
1681
      "\xc8\xa3"         => "\xc8\xa2",
1682
      "\xc8\x9f"         => "\xc8\x9e",
1683
      "\xc8\x9d"         => "\xc8\x9c",
1684
      "\xc8\x9b"         => "\xc8\x9a",
1685
      "\xc8\x99"         => "\xc8\x98",
1686
      "\xc8\x97"         => "\xc8\x96",
1687
      "\xc8\x95"         => "\xc8\x94",
1688
      "\xc8\x93"         => "\xc8\x92",
1689
      "\xc8\x91"         => "\xc8\x90",
1690
      "\xc8\x8f"         => "\xc8\x8e",
1691
      "\xc8\x8d"         => "\xc8\x8c",
1692
      "\xc8\x8b"         => "\xc8\x8a",
1693
      "\xc8\x89"         => "\xc8\x88",
1694
      "\xc8\x87"         => "\xc8\x86",
1695
      "\xc8\x85"         => "\xc8\x84",
1696
      "\xc8\x83"         => "\xc8\x82",
1697
      "\xc8\x81"         => "\xc8\x80",
1698
      "\xc7\xbf"         => "\xc7\xbe",
1699
      "\xc7\xbd"         => "\xc7\xbc",
1700
      "\xc7\xbb"         => "\xc7\xba",
1701
      "\xc7\xb9"         => "\xc7\xb8",
1702
      "\xc7\xb5"         => "\xc7\xb4",
1703
      "\xc7\xb3"         => "\xc7\xb2",
1704
      "\xc7\xaf"         => "\xc7\xae",
1705
      "\xc7\xad"         => "\xc7\xac",
1706
      "\xc7\xab"         => "\xc7\xaa",
1707
      "\xc7\xa9"         => "\xc7\xa8",
1708
      "\xc7\xa7"         => "\xc7\xa6",
1709
      "\xc7\xa5"         => "\xc7\xa4",
1710
      "\xc7\xa3"         => "\xc7\xa2",
1711
      "\xc7\xa1"         => "\xc7\xa0",
1712
      "\xc7\x9f"         => "\xc7\x9e",
1713
      "\xc7\x9d"         => "\xc6\x8e",
1714
      "\xc7\x9c"         => "\xc7\x9b",
1715
      "\xc7\x9a"         => "\xc7\x99",
1716
      "\xc7\x98"         => "\xc7\x97",
1717
      "\xc7\x96"         => "\xc7\x95",
1718
      "\xc7\x94"         => "\xc7\x93",
1719
      "\xc7\x92"         => "\xc7\x91",
1720
      "\xc7\x90"         => "\xc7\x8f",
1721
      "\xc7\x8e"         => "\xc7\x8d",
1722
      "\xc7\x8c"         => "\xc7\x8b",
1723
      "\xc7\x89"         => "\xc7\x88",
1724
      "\xc7\x86"         => "\xc7\x85",
1725
      "\xc6\xbf"         => "\xc7\xb7",
1726
      "\xc6\xbd"         => "\xc6\xbc",
1727
      "\xc6\xb9"         => "\xc6\xb8",
1728
      "\xc6\xb6"         => "\xc6\xb5",
1729
      "\xc6\xb4"         => "\xc6\xb3",
1730
      "\xc6\xb0"         => "\xc6\xaf",
1731
      "\xc6\xad"         => "\xc6\xac",
1732
      "\xc6\xa8"         => "\xc6\xa7",
1733
      "\xc6\xa5"         => "\xc6\xa4",
1734
      "\xc6\xa3"         => "\xc6\xa2",
1735
      "\xc6\xa1"         => "\xc6\xa0",
1736
      "\xc6\x9e"         => "\xc8\xa0",
1737
      "\xc6\x9a"         => "\xc8\xbd",
1738
      "\xc6\x99"         => "\xc6\x98",
1739
      "\xc6\x95"         => "\xc7\xb6",
1740
      "\xc6\x92"         => "\xc6\x91",
1741
      "\xc6\x8c"         => "\xc6\x8b",
1742
      "\xc6\x88"         => "\xc6\x87",
1743
      "\xc6\x85"         => "\xc6\x84",
1744
      "\xc6\x83"         => "\xc6\x82",
1745
      "\xc6\x80"         => "\xc9\x83",
1746
      "\xc5\xbf"         => "\x53",
1747
      "\xc5\xbe"         => "\xc5\xbd",
1748
      "\xc5\xbc"         => "\xc5\xbb",
1749
      "\xc5\xba"         => "\xc5\xb9",
1750
      "\xc5\xb7"         => "\xc5\xb6",
1751
      "\xc5\xb5"         => "\xc5\xb4",
1752
      "\xc5\xb3"         => "\xc5\xb2",
1753
      "\xc5\xb1"         => "\xc5\xb0",
1754
      "\xc5\xaf"         => "\xc5\xae",
1755
      "\xc5\xad"         => "\xc5\xac",
1756
      "\xc5\xab"         => "\xc5\xaa",
1757
      "\xc5\xa9"         => "\xc5\xa8",
1758
      "\xc5\xa7"         => "\xc5\xa6",
1759
      "\xc5\xa5"         => "\xc5\xa4",
1760
      "\xc5\xa3"         => "\xc5\xa2",
1761
      "\xc5\xa1"         => "\xc5\xa0",
1762
      "\xc5\x9f"         => "\xc5\x9e",
1763
      "\xc5\x9d"         => "\xc5\x9c",
1764
      "\xc5\x9b"         => "\xc5\x9a",
1765
      "\xc5\x99"         => "\xc5\x98",
1766
      "\xc5\x97"         => "\xc5\x96",
1767
      "\xc5\x95"         => "\xc5\x94",
1768
      "\xc5\x93"         => "\xc5\x92",
1769
      "\xc5\x91"         => "\xc5\x90",
1770
      "\xc5\x8f"         => "\xc5\x8e",
1771
      "\xc5\x8d"         => "\xc5\x8c",
1772
      "\xc5\x8b"         => "\xc5\x8a",
1773
      "\xc5\x88"         => "\xc5\x87",
1774
      "\xc5\x86"         => "\xc5\x85",
1775
      "\xc5\x84"         => "\xc5\x83",
1776
      "\xc5\x82"         => "\xc5\x81",
1777
      "\xc5\x80"         => "\xc4\xbf",
1778
      "\xc4\xbe"         => "\xc4\xbd",
1779
      "\xc4\xbc"         => "\xc4\xbb",
1780
      "\xc4\xba"         => "\xc4\xb9",
1781
      "\xc4\xb7"         => "\xc4\xb6",
1782
      "\xc4\xb5"         => "\xc4\xb4",
1783
      "\xc4\xb3"         => "\xc4\xb2",
1784
      "\xc4\xb1"         => "\x49",
1785
      "\xc4\xaf"         => "\xc4\xae",
1786
      "\xc4\xad"         => "\xc4\xac",
1787
      "\xc4\xab"         => "\xc4\xaa",
1788
      "\xc4\xa9"         => "\xc4\xa8",
1789
      "\xc4\xa7"         => "\xc4\xa6",
1790
      "\xc4\xa5"         => "\xc4\xa4",
1791
      "\xc4\xa3"         => "\xc4\xa2",
1792
      "\xc4\xa1"         => "\xc4\xa0",
1793
      "\xc4\x9f"         => "\xc4\x9e",
1794
      "\xc4\x9d"         => "\xc4\x9c",
1795
      "\xc4\x9b"         => "\xc4\x9a",
1796
      "\xc4\x99"         => "\xc4\x98",
1797
      "\xc4\x97"         => "\xc4\x96",
1798
      "\xc4\x95"         => "\xc4\x94",
1799
      "\xc4\x93"         => "\xc4\x92",
1800
      "\xc4\x91"         => "\xc4\x90",
1801
      "\xc4\x8f"         => "\xc4\x8e",
1802
      "\xc4\x8d"         => "\xc4\x8c",
1803
      "\xc4\x8b"         => "\xc4\x8a",
1804
      "\xc4\x89"         => "\xc4\x88",
1805
      "\xc4\x87"         => "\xc4\x86",
1806
      "\xc4\x85"         => "\xc4\x84",
1807
      "\xc4\x83"         => "\xc4\x82",
1808
      "\xc4\x81"         => "\xc4\x80",
1809
      "\xc3\xbf"         => "\xc5\xb8",
1810
      "\xc3\xbe"         => "\xc3\x9e",
1811
      "\xc3\xbd"         => "\xc3\x9d",
1812
      "\xc3\xbc"         => "\xc3\x9c",
1813
      "\xc3\xbb"         => "\xc3\x9b",
1814
      "\xc3\xba"         => "\xc3\x9a",
1815
      "\xc3\xb9"         => "\xc3\x99",
1816
      "\xc3\xb8"         => "\xc3\x98",
1817
      "\xc3\xb6"         => "\xc3\x96",
1818
      "\xc3\xb5"         => "\xc3\x95",
1819
      "\xc3\xb4"         => "\xc3\x94",
1820
      "\xc3\xb3"         => "\xc3\x93",
1821
      "\xc3\xb2"         => "\xc3\x92",
1822
      "\xc3\xb1"         => "\xc3\x91",
1823
      "\xc3\xb0"         => "\xc3\x90",
1824
      "\xc3\xaf"         => "\xc3\x8f",
1825
      "\xc3\xae"         => "\xc3\x8e",
1826
      "\xc3\xad"         => "\xc3\x8d",
1827
      "\xc3\xac"         => "\xc3\x8c",
1828
      "\xc3\xab"         => "\xc3\x8b",
1829
      "\xc3\xaa"         => "\xc3\x8a",
1830
      "\xc3\xa9"         => "\xc3\x89",
1831
      "\xc3\xa8"         => "\xc3\x88",
1832
      "\xc3\xa7"         => "\xc3\x87",
1833
      "\xc3\xa6"         => "\xc3\x86",
1834
      "\xc3\xa5"         => "\xc3\x85",
1835
      "\xc3\xa4"         => "\xc3\x84",
1836
      "\xc3\xa3"         => "\xc3\x83",
1837
      "\xc3\xa2"         => "\xc3\x82",
1838
      "\xc3\xa1"         => "\xc3\x81",
1839
      "\xc3\xa0"         => "\xc3\x80",
1840
      "\xc2\xb5"         => "\xce\x9c",
1841
      "\x7a"             => "\x5a",
1842
      "\x79"             => "\x59",
1843
      "\x78"             => "\x58",
1844
      "\x77"             => "\x57",
1845
      "\x76"             => "\x56",
1846
      "\x75"             => "\x55",
1847
      "\x74"             => "\x54",
1848
      "\x73"             => "\x53",
1849
      "\x72"             => "\x52",
1850
      "\x71"             => "\x51",
1851
      "\x70"             => "\x50",
1852
      "\x6f"             => "\x4f",
1853
      "\x6e"             => "\x4e",
1854
      "\x6d"             => "\x4d",
1855
      "\x6c"             => "\x4c",
1856
      "\x6b"             => "\x4b",
1857
      "\x6a"             => "\x4a",
1858
      "\x69"             => "\x49",
1859
      "\x68"             => "\x48",
1860
      "\x67"             => "\x47",
1861 157
      "\x66"             => "\x46",
1862
      "\x65"             => "\x45",
1863 157
      "\x64"             => "\x44",
1864
      "\x63"             => "\x43",
1865 1
      "\x62"             => "\x42",
1866 1
      "\x61"             => "\x41",
1867 1
1868 1
    );
1869 1
1870 157
    return $case;
1871
  }
1872
1873
  /**
1874
   * This method will auto-detect your server environment for UTF-8 support.
1875
   *
1876
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1877
   */
1878
  public static function checkForSupport()
1879 8
  {
1880
    if (!isset(self::$support['mbstring'])) {
1881 8
1882
      self::$support['mbstring'] = self::mbstring_loaded();
1883 8
      self::$support['iconv'] = self::iconv_loaded();
1884
      self::$support['intl'] = self::intl_loaded();
1885
      self::$support['intlChar'] = self::intlChar_loaded();
1886
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1887
    }
1888
  }
1889
1890 8
  /**
1891
   * Generates a UTF-8 encoded character from the given code point.
1892
   *
1893
   * @param    int $code_point The code point for which to generate a character.
1894
   *
1895
   * @return   string|null Multi-Byte character, returns null on failure to encode.
1896
   */
1897
  public static function chr($code_point)
1898
  {
1899
    self::checkForSupport();
1900
1901
    $i = (int)$code_point;
1902 1
1903
    if (self::$support['intlChar'] === true) {
1904 1
      return \IntlChar::chr($code_point);
1905
    }
1906 1
1907
    if ($i !== $code_point) {
1908
      $i = self::hex_to_int($code_point);
1909
    }
1910
1911
    if (!$i) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $i of type integer|false is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1912
      return null;
1913
    }
1914
1915
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1916
  }
1917
1918
  /**
1919
   * Applies callback to all characters of a string.
1920
   *
1921 2
   * @param  string|array $callback The callback function.
1922
   * @param  string       $str      UTF-8 string to run callback on.
1923 2
   *
1924 2
   * @return array The outcome of callback.
1925
   */
1926
  public static function chr_map($callback, $str)
1927 2
  {
1928
    $chars = self::split($str);
1929
1930
    return array_map($callback, $chars);
1931
  }
1932
1933
  /**
1934
   * Generates an array of byte length of each character of a Unicode string.
1935
   *
1936
   * 1 byte => U+0000  - U+007F
1937 2
   * 2 byte => U+0080  - U+07FF
1938
   * 3 byte => U+0800  - U+FFFF
1939 2
   * 4 byte => U+10000 - U+10FFFF
1940 2
   *
1941 2
   * @param    string $str The original Unicode string.
1942
   *
1943 2
   * @return   array An array of byte lengths of each character.
1944
   */
1945 2
  public static function chr_size_list($str)
1946
  {
1947
    if (!$str) {
1948 2
      return array();
1949
    }
1950 2
1951 2
    return array_map('strlen', self::split($str));
1952 2
  }
1953
1954 1
  /**
1955 1
   * Get a decimal code representation of a specific character.
1956 1
   *
1957
   * @param   string $char The input character
1958
   *
1959
   * @return  int
1960
   */
1961
  public static function chr_to_decimal($char)
1962 2
  {
1963
    $char = (string)$char;
1964 2
    $code = self::ord($char[0]);
1965 2
    $bytes = 1;
1966
1967 2
    if (!($code & 0x80)) {
1968
      // 0xxxxxxx
1969
      return $code;
1970
    }
1971
1972
    if (($code & 0xe0) === 0xc0) {
1973
      // 110xxxxx
1974
      $bytes = 2;
1975
      $code &= ~0xc0;
1976
    } elseif (($code & 0xf0) === 0xe0) {
1977
      // 1110xxxx
1978
      $bytes = 3;
1979
      $code &= ~0xe0;
1980
    } elseif (($code & 0xf8) === 0xf0) {
1981
      // 11110xxx
1982
      $bytes = 4;
1983
      $code &= ~0xf0;
1984
    }
1985
1986
    for ($i = 2; $i <= $bytes; $i++) {
1987
      // 10xxxxxx
1988
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1989
    }
1990
1991
    return $code;
1992
  }
1993 1
1994
  /**
1995 1
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1996
   *
1997
   * @param    string $char The input character
1998
   * @param    string $pfix
1999
   *
2000
   * @return   string The code point encoded as U+xxxx
2001
   */
2002
  public static function chr_to_hex($char, $pfix = 'U+')
2003
  {
2004
    return self::int_to_hex(self::ord($char), $pfix);
2005
  }
2006
2007
  /**
2008
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2009 35
   *
2010
   * @param    string $body     The original string to be split.
2011
   * @param    int    $chunklen The maximum character length of a chunk.
2012
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2013
   *
2014
   * @return   string The chunked string
2015
   */
2016
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2017
  {
2018
    return implode($end, self::split($body, $chunklen));
2019
  }
2020
2021
  /**
2022
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2023
   *
2024 35
   * @param string $str                     The string to be sanitized.
2025 35
   * @param bool   $remove_bom
2026
   * @param bool   $normalize_whitespace
2027 35
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2028 35
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2029
   *
2030 35
   * @return string Clean UTF-8 encoded string
2031 7
   */
2032 7
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2033
  {
2034 35
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2035 1
    // caused connection reset problem on larger strings
2036 1
2037
    $regx = '/
2038 35
      (
2039 4
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2040 4
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2041
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2042 35
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2043
        ){1,100}                      # ...one or more times
2044
      )
2045
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2046
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2047
    /x';
2048
    $str = preg_replace($regx, '$1', $str);
2049
2050
    $str = self::replace_diamond_question_mark($str, '');
2051
    $str = self::remove_invisible_characters($str);
2052 3
2053
    if ($normalize_whitespace === true) {
2054 3
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2055
    }
2056 3
2057 1
    if ($normalize_msword === true) {
2058
      $str = self::normalize_msword($str);
2059
    }
2060
2061 3
    if ($remove_bom === true) {
2062
      $str = self::removeBOM($str);
2063
    }
2064
2065
    return $str;
2066
  }
2067
2068 3
  /**
2069
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2070 3
   *
2071
   * @param string $str
2072
   *
2073
   * @return string
2074
   */
2075
  public static function cleanup($str)
2076
  {
2077
    $str = (string)$str;
2078
2079
    if (!isset($str[0])) {
2080
      return '';
2081
    }
2082 3
2083
    // fixed ISO <-> UTF-8 Errors
2084 3
    $str = self::fix_simple_utf8($str);
2085 3
2086 3
    // remove all none UTF-8 symbols
2087
    // && remove diamond question mark (�)
2088 3
    // && remove remove invisible characters (e.g. "\0")
2089
    // && remove BOM
2090 3
    // && normalize whitespace chars (but keep non-breaking-spaces)
2091 3
    $str = self::clean($str, true, true, false, true);
2092 3
2093
    return (string)$str;
2094 3
  }
2095
2096 3
  /**
2097
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2098
   *
2099
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2100
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2101
   *                                    default, code points will be returned as integers.
2102
   *
2103
   * @return   array The array of code points
2104
   */
2105
  public static function codepoints($arg, $u_style = false)
2106 3
  {
2107
    if (is_string($arg)) {
2108
      $arg = self::split($arg);
2109
    }
2110
2111
    $arg = array_map(
2112
        array(
2113
            '\\voku\\helper\\UTF8',
2114
            'ord',
2115
        ),
2116
        $arg
2117 3
    );
2118
2119 3
    if ($u_style) {
2120
      $arg = array_map(
2121 3
          array(
2122
              '\\voku\\helper\\UTF8',
2123 3
              'int_to_hex',
2124
          ),
2125
          $arg
2126
      );
2127
    }
2128
2129
    return $arg;
2130
  }
2131
2132
  /**
2133 1
   * Returns count of characters used in a string.
2134
   *
2135 1
   * @param    string $str       The input string.
2136
   * @param    bool   $cleanUtf8 Clean non UTF-8 chars from the string.
2137 1
   *
2138 1
   * @return   array An associative array of Character as keys and
2139 1
   *           their count as values.
2140
   */
2141 1
  public static function count_chars($str, $cleanUtf8 = false)
2142
  {
2143
    return array_count_values(self::split($str, 1, $cleanUtf8));
2144
  }
2145
2146
  /**
2147
   * Get a UTF-8 character from its decimal code representation.
2148
   *
2149
   * @param   int $code Code.
2150
   *
2151
   * @return  string
2152
   */
2153
  public static function decimal_to_chr($code)
2154
  {
2155 11
    self::checkForSupport();
2156
2157 11
    return \mb_convert_encoding(
2158
        '&#x' . dechex($code) . ';',
2159 11
        'UTF-8',
2160 11
        'HTML-ENTITIES'
2161
    );
2162
  }
2163 1
2164 1
  /**
2165
   * Encode a string with a new charset-encoding.
2166
   *
2167
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2168
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2169
   *
2170
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2171
   * @param string $str      the string
2172
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2173
   *                         otherwise we auto-detect the current string-encoding
2174
   *
2175
   * @return string
2176
   */
2177
  public static function encode($encoding, $str, $force = true)
2178
  {
2179
    $str = (string)$str;
2180
    $encoding = (string)$encoding;
2181
2182
    if (!isset($str[0], $encoding[0])) {
2183
      return $str;
2184
    }
2185
2186
    $encoding = self::normalizeEncoding($encoding);
2187
    $encodingDetected = self::str_detect_encoding($str);
2188
2189
    if (
2190
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2191
        &&
2192
        (
2193
            $force === true
2194
            ||
2195
            $encodingDetected !== $encoding
2196
        )
2197
    ) {
2198
      self::checkForSupport();
2199
2200 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2201
          $encoding === 'UTF-8'
2202
          &&
2203
          (
2204
              $force === true
2205
              || $encodingDetected === 'UTF-8'
2206
              || $encodingDetected === 'WINDOWS-1252'
2207
              || $encodingDetected === 'ISO-8859-1'
2208
          )
2209
      ) {
2210
        return self::to_utf8($str);
2211
      }
2212
2213 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2214
          $encoding === 'ISO-8859-1'
2215
          &&
2216
          (
2217
              $force === true
2218
              || $encodingDetected === 'ISO-8859-1'
2219
              || $encodingDetected === 'UTF-8'
2220
          )
2221
      ) {
2222
        return self::to_win1252($str);
2223
      }
2224
2225
      $strEncoded = \mb_convert_encoding(
2226
          $str,
2227
          $encoding,
2228
          $encodingDetected
2229
      );
2230
2231
      if ($strEncoded) {
2232
        return $strEncoded;
2233
      }
2234
    }
2235
2236
    return $str;
2237
  }
2238
2239
  /**
2240
   * Callback function for preg_replace_callback use.
2241
   *
2242
   * @internal used for "UTF8::html_entity_decode()"
2243
   *
2244
   * @param  array $matches PREG matches
2245
   *
2246
   * @return string
2247
   */
2248
  protected static function html_entity_decode_callback($matches)
2249
  {
2250
    self::checkForSupport();
2251
2252 2
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2253
2254
    if ($return === "'") {
2255 2
      return '&#x27;';
2256 2
    }
2257
2258 2
    return $return;
2259 2
  }
2260
2261
  /**
2262
   * Reads entire file into a string.
2263 2
   *
2264 2
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2265
   *
2266 2
   * @link http://php.net/manual/en/function.file-get-contents.php
2267 2
   *
2268
   * @param string        $filename      <p>
2269 2
   *                                     Name of the file to read.
2270 1
   *                                     </p>
2271 1
   * @param int|null      $flags         [optional] <p>
2272 2
   *                                     Prior to PHP 6, this parameter is called
2273
   *                                     use_include_path and is a bool.
2274
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2275
   *                                     to trigger include path
2276 2
   *                                     search.
2277
   *                                     </p>
2278
   *                                     <p>
2279
   *                                     The value of flags can be any combination of
2280 2
   *                                     the following flags (with some restrictions), joined with the
2281 2
   *                                     binary OR (|)
2282
   *                                     operator.
2283 2
   *                                     </p>
2284
   *                                     <p>
2285 2
   *                                     <table>
2286 1
   *                                     Available flags
2287 1
   *                                     <tr valign="top">
2288 1
   *                                     <td>Flag</td>
2289 1
   *                                     <td>Description</td>
2290 1
   *                                     </tr>
2291 1
   *                                     <tr valign="top">
2292
   *                                     <td>
2293 2
   *                                     FILE_USE_INCLUDE_PATH
2294 2
   *                                     </td>
2295 2
   *                                     <td>
2296 2
   *                                     Search for filename in the include directory.
2297
   *                                     See include_path for more
2298
   *                                     information.
2299 2
   *                                     </td>
2300
   *                                     </tr>
2301
   *                                     <tr valign="top">
2302
   *                                     <td>
2303
   *                                     FILE_TEXT
2304
   *                                     </td>
2305
   *                                     <td>
2306
   *                                     As of PHP 6, the default encoding of the read
2307
   *                                     data is UTF-8. You can specify a different encoding by creating a
2308
   *                                     custom context or by changing the default using
2309 1
   *                                     stream_default_encoding. This flag cannot be
2310
   *                                     used with FILE_BINARY.
2311 1
   *                                     </td>
2312
   *                                     </tr>
2313
   *                                     <tr valign="top">
2314
   *                                     <td>
2315
   *                                     FILE_BINARY
2316
   *                                     </td>
2317
   *                                     <td>
2318
   *                                     With this flag, the file is read in binary mode. This is the default
2319
   *                                     setting and cannot be used with FILE_TEXT.
2320
   *                                     </td>
2321
   *                                     </tr>
2322
   *                                     </table>
2323 7
   *                                     </p>
2324
   * @param resource|null $context       [optional] <p>
2325 7
   *                                     A valid context resource created with
2326 7
   *                                     stream_context_create. If you don't need to use a
2327 2
   *                                     custom context, you can skip this parameter by &null;.
2328
   *                                     </p>
2329 1
   * @param int|null      $offset        [optional] <p>
2330 2
   *                                     The offset where the reading starts.
2331 2
   *                                     </p>
2332 7
   * @param int|null      $maxlen        [optional] <p>
2333 1
   *                                     Maximum length of data read. The default is to read until end
2334 1
   *                                     of file is reached.
2335 1
   *                                     </p>
2336 1
   * @param int           $timeout
2337 7
   *
2338 7
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2339
   *                                     non default utf-8 chars
2340
   *
2341
   * @return string The function returns the read data or false on failure.
2342 7
   */
2343 7
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2344 1
  {
2345 1
    // init
2346 7
    $timeout = (int)$timeout;
2347
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2348 7
2349 5
    if ($timeout && $context === null) {
2350 5
      $context = stream_context_create(
2351 4
          array(
2352
              'http' =>
2353
                  array(
2354
                      'timeout' => $timeout,
2355 7
                  ),
2356
          )
2357
      );
2358
    }
2359
2360 7
    if (is_int($maxlen)) {
2361 7
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2362 7
    } else {
2363
      $data = file_get_contents($filename, $flags, $context, $offset);
2364 7
    }
2365
2366
    // return false on error
2367
    if ($data === false) {
2368
      return false;
2369
    }
2370
2371
    if ($convertToUtf8 === true) {
2372
      self::checkForSupport();
2373
2374
      $data = self::encode('UTF-8', $data, false);
2375
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2376
    }
2377
2378
    // clean utf-8 string
2379
    return $data;
2380
  }
2381
2382
  /**
2383
   * Checks if a file starts with BOM (Byte Order Mark) character.
2384
   *
2385
   * @param    string $file_path Path to a valid file.
2386
   *
2387
   * @return   bool True if the file has BOM at the start, False otherwise.
2388
   */
2389
  public static function file_has_bom($file_path)
2390
  {
2391
    return self::string_has_bom(file_get_contents($file_path));
2392
  }
2393
2394
  /**
2395
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2396
   *
2397
   * @param mixed  $var
2398
   * @param int    $normalization_form
2399
   * @param string $leading_combining
2400
   *
2401
   * @return mixed
2402
   */
2403
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
2404
  {
2405
    switch (gettype($var)) {
2406 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2407
        foreach ($var as $k => $v) {
2408
          /** @noinspection AlterInForeachInspection */
2409
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2410
        }
2411
        break;
2412 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2413
        foreach ($var as $k => $v) {
2414
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
2415
        }
2416
        break;
2417 1
      case 'string':
2418
        if (false !== strpos($var, "\r")) {
2419 1
          // Workaround https://bugs.php.net/65732
2420 1
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2421 1
        }
2422 1
        if (preg_match('/[\x80-\xFF]/', $var)) {
2423
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2424
            $n = '-';
2425 1
          } else {
2426
            $n = \Normalizer::normalize($var, $normalization_form);
2427
2428
            if (isset($n[0])) {
2429
              $var = $n;
2430
            } else {
2431
              $var = self::encode('UTF-8', $var);
2432
            }
2433
2434
          }
2435
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2436
            // Prevent leading combining chars
2437 1
            // for NFC-safe concatenations.
2438
            $var = $leading_combining . $var;
2439 1
          }
2440 1
        }
2441 1
        break;
2442 1
    }
2443
2444
    return $var;
2445 1
  }
2446
2447
  /**
2448
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2449
   *
2450
   * @param int    $type
2451
   * @param string $var
2452
   * @param int    $filter
2453
   * @param mixed  $option
2454
   *
2455
   * @return mixed
2456
   */
2457 1 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2458
  {
2459 1
    if (4 > func_num_args()) {
2460
      $var = filter_input($type, $var, $filter);
2461
    } else {
2462
      $var = filter_input($type, $var, $filter, $option);
2463
    }
2464
2465
    return self::filter($var);
2466
  }
2467
2468
  /**
2469 8
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2470
   *
2471 8
   * @param int   $type
2472 8
   * @param mixed $definition
2473
   * @param bool  $add_empty
2474 8
   *
2475
   * @return mixed
2476 8
   */
2477 2 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2478
  {
2479
    if (2 > func_num_args()) {
2480 8
      $a = filter_input_array($type);
2481 1
    } else {
2482 1
      $a = filter_input_array($type, $definition, $add_empty);
2483 1
    }
2484
2485 8
    return self::filter($a);
2486
  }
2487
2488
  /**
2489
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2490
   *
2491
   * @param mixed $var
2492
   * @param int   $filter
2493
   * @param mixed $option
2494
   *
2495 1
   * @return mixed
2496
   */
2497 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2498
  {
2499
    if (3 > func_num_args()) {
2500
      $var = filter_var($var, $filter);
2501
    } else {
2502
      $var = filter_var($var, $filter, $option);
2503
    }
2504
2505
    return self::filter($var);
2506
  }
2507 1
2508 1
  /**
2509 1
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2510 1
   *
2511 1
   * @param array $data
2512
   * @param mixed $definition
2513 1
   * @param bool  $add_empty
2514
   *
2515
   * @return mixed
2516
   */
2517 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2518
  {
2519
    if (2 > func_num_args()) {
2520
      $a = filter_var_array($data);
2521
    } else {
2522
      $a = filter_var_array($data, $definition, $add_empty);
2523 1
    }
2524
2525 1
    return self::filter($a);
2526
  }
2527 1
2528 1
  /**
2529
   * Check if the number of unicode characters are not more than the specified integer.
2530
   *
2531 1
   * @param    string $str      The original string to be checked.
2532
   * @param    int    $box_size The size in number of chars to be checked against string.
2533 1
   *
2534 1
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2535 1
   */
2536 1
  public static function fits_inside($str, $box_size)
2537 1
  {
2538 1
    return (self::strlen($str) <= $box_size);
2539 1
  }
2540 1
2541 1
  /**
2542 1
   * Try to fix simple broken UTF-8 strings.
2543 1
   *
2544
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2545
   *
2546
   * @param string $str
2547
   *
2548
   * @return string
2549
   */
2550
  public static function fix_simple_utf8($str)
2551
  {
2552
    static $brokenUtf8ToUtf8Keys = null;
2553
    static $brokenUtf8ToUtf8Values = null;
2554
2555
    $str = (string)$str;
2556
2557
    if (!isset($str[0])) {
2558
      return '';
2559
    }
2560
2561
    if ($brokenUtf8ToUtf8Keys === null) {
2562
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2563 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2564 1
    }
2565
2566
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2567
  }
2568
2569
  /**
2570
   * Fix a double (or multiple) encoded UTF8 string.
2571
   *
2572
   * @param string|string[] $str You can use a string or an array of strings.
2573
   *
2574
   * @return mixed
2575
   */
2576
  public static function fix_utf8($str)
2577
  {
2578
    if (is_array($str)) {
2579
2580
      foreach ($str as $k => $v) {
2581
        /** @noinspection AlterInForeachInspection */
2582
        /** @noinspection OffsetOperationsInspection */
2583
        $str[$k] = self::fix_utf8($v);
2584
      }
2585
2586
      return $str;
2587
    }
2588
2589
    $last = '';
2590
    while ($last !== $str) {
2591
      $last = $str;
2592
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2592 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2593
    }
2594
2595
    return $str;
2596
  }
2597
2598
  /**
2599
   * Get character of a specific character.
2600
   *
2601
   * @param   string $char Character.
2602
   *
2603
   * @return  string 'RTL' or 'LTR'
2604
   */
2605
  public static function getCharDirection($char)
2606
  {
2607
    // init
2608
    self::checkForSupport();
2609
2610
    if (self::$support['intlChar'] === true) {
2611
      $tmpReturn = \IntlChar::charDirection($char);
2612
2613
      // from "IntlChar"-Class
2614
      $charDirection = array(
2615
          'RTL' => array(1, 13, 14, 15, 21),
2616
          'LTR' => array(0, 11, 12, 20),
2617
      );
2618
2619
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2620
        return 'LTR';
2621
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2622
        return 'RTL';
2623 2
      }
2624
    }
2625 2
2626 2
    $c = static::chr_to_decimal($char);
2627 2
2628
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2629
      return 'LTR';
2630
    }
2631
2632
    if (0x85e >= $c) {
2633
2634
      if (0x5be === $c ||
2635
          0x5c0 === $c ||
2636
          0x5c3 === $c ||
2637
          0x5c6 === $c ||
2638
          (0x5d0 <= $c && 0x5ea >= $c) ||
2639
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2640 1
          0x608 === $c ||
2641
          0x60b === $c ||
2642 1
          0x60d === $c ||
2643 1
          0x61b === $c ||
2644
          (0x61e <= $c && 0x64a >= $c) ||
2645 1
          (0x66d <= $c && 0x66f >= $c) ||
2646 1
          (0x671 <= $c && 0x6d5 >= $c) ||
2647
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2648
          (0x6ee <= $c && 0x6ef >= $c) ||
2649
          (0x6fa <= $c && 0x70d >= $c) ||
2650 1
          0x710 === $c ||
2651
          (0x712 <= $c && 0x72f >= $c) ||
2652 1
          (0x74d <= $c && 0x7a5 >= $c) ||
2653 1
          0x7b1 === $c ||
2654 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
2655
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2656 1
          0x7fa === $c ||
2657 1
          (0x800 <= $c && 0x815 >= $c) ||
2658 1
          0x81a === $c ||
2659 1
          0x824 === $c ||
2660 1
          0x828 === $c ||
2661
          (0x830 <= $c && 0x83e >= $c) ||
2662 1
          (0x840 <= $c && 0x858 >= $c) ||
2663
          0x85e === $c
2664 1
      ) {
2665 1
        return 'RTL';
2666
      }
2667
2668
    } elseif (0x200f === $c) {
2669 1
2670 1
      return 'RTL';
2671
2672 1
    } elseif (0xfb1d <= $c) {
2673
2674 1
      if (0xfb1d === $c ||
2675 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2676 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2677
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2678 1
          0xfb3e === $c ||
2679
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2680
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2681
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2682
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2683
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2684
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2685
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2686
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2687
          (0xfe76 <= $c && 0xfefc >= $c) ||
2688
          (0x10800 <= $c && 0x10805 >= $c) ||
2689
          0x10808 === $c ||
2690
          (0x1080a <= $c && 0x10835 >= $c) ||
2691
          (0x10837 <= $c && 0x10838 >= $c) ||
2692
          0x1083c === $c ||
2693
          (0x1083f <= $c && 0x10855 >= $c) ||
2694
          (0x10857 <= $c && 0x1085f >= $c) ||
2695
          (0x10900 <= $c && 0x1091b >= $c) ||
2696
          (0x10920 <= $c && 0x10939 >= $c) ||
2697
          0x1093f === $c ||
2698
          0x10a00 === $c ||
2699
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2700
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2701
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2702
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2703
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2704
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2705
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2706
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2707
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2708 1
          (0x10b78 <= $c && 0x10b7f >= $c)
2709
      ) {
2710 1
        return 'RTL';
2711 1
      }
2712
    }
2713 1
2714 1
    return 'LTR';
2715 1
  }
2716 1
2717 1
  /**
2718 1
   * get data from "/data/*.ser"
2719
   *
2720
   * @param string $file
2721
   *
2722
   * @return bool|string|array|int false on error
2723
   */
2724
  protected static function getData($file)
2725
  {
2726
    $file = __DIR__ . '/data/' . $file . '.php';
2727
    if (file_exists($file)) {
2728
      /** @noinspection PhpIncludeInspection */
2729
      return require $file;
2730
    } else {
2731
      return false;
2732
    }
2733
  }
2734
2735
  /**
2736
   * Creates a random string of UTF-8 characters.
2737
   *
2738
   * WARNING: This method does not create a hash of something, maybe it will be renamed in future.
2739
   *
2740
   * @param    int $len The length of string in characters.
2741
   *
2742
   * @return   string String consisting of random characters.
2743
   *
2744
   * @deprecated
2745
   */
2746
  public static function hash($len = 8)
2747
  {
2748
    static $chars = array();
2749
    static $chars_len = null;
2750
2751
    if ($len <= 0) {
2752
      return '';
2753
    }
2754
2755
    // init
2756
    self::checkForSupport();
2757
2758
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2759
      if (self::$support['pcre_utf8'] === true) {
2760
        $chars = array_map(
2761
            array(
2762
                '\\voku\\helper\\UTF8',
2763
                'chr',
2764
            ),
2765
            range(48, 79)
2766
        );
2767
2768
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2769
2770
        $chars = array_values(array_filter($chars));
2771
      } else {
2772
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2773
      }
2774
2775
      $chars_len = count($chars);
2776
    }
2777
2778
    $hash = '';
2779
2780
    for (; $len; --$len) {
2781
      $hash .= $chars[mt_rand() % $chars_len];
2782
    }
2783
2784
    return $hash;
2785
  }
2786
2787
  /**
2788
   * Converts hexadecimal U+xxxx code point representation to integer.
2789
   *
2790 15
   * INFO: opposite to UTF8::int_to_hex()
2791
   *
2792 15
   * @param    string $str The hexadecimal code point representation.
2793
   *
2794 15
   * @return   int|false The code point, or false on failure.
2795 3
   */
2796
  public static function hex_to_int($str)
2797
  {
2798 15
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2799 4
      return intval($match[1], 16);
2800
    }
2801
2802 15
    return false;
2803 3
  }
2804 3
2805 3
  /**
2806
   * alias for "UTF8::html_entity_decode()"
2807
   *
2808 3
   * @param string $str
2809
   * @param int    $flags
2810
   * @param string $encoding
2811 15
   *
2812
   * @return string
2813 15
   */
2814
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2815
  {
2816 15
    return self::html_entity_decode($str, $flags, $encoding);
2817 15
  }
2818 15
2819
  /**
2820 15
   * Converts a UTF-8 string to a series of HTML numbered entities.
2821
   *
2822 15
   * INFO: opposite to UTF8::html_decode()
2823
   *
2824 15
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2825
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2826
   * @param  string $encoding
2827
   *
2828
   * @return string HTML numbered entities.
2829
   */
2830
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2831
  {
2832
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2833
    if (function_exists('mb_encode_numericentity')) {
2834 12
2835
      $startCode = 0x00;
2836 12
      if ($keepAsciiChars === true) {
2837
        $startCode = 0x80;
2838 12
      }
2839
2840 12
      $encoding = self::normalizeEncoding($encoding);
2841 5
2842
      return mb_encode_numericentity(
2843
          $str,
2844 11
          array($startCode, 0xffff, 0, 0xffff,),
2845
          $encoding
2846
      );
2847
    }
2848
2849
    return implode(
2850
        array_map(
2851
            function ($data) use ($keepAsciiChars) {
2852
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
2853
            },
2854
            self::split($str)
2855
        )
2856
    );
2857
  }
2858
2859
  /**
2860
   * UTF-8 version of html_entity_decode()
2861
   *
2862
   * The reason we are not using html_entity_decode() by itself is because
2863
   * while it is not technically correct to leave out the semicolon
2864
   * at the end of an entity most browsers will still interpret the entity
2865
   * correctly. html_entity_decode() does not convert entities without
2866
   * semicolons, so we are left with our own little solution here. Bummer.
2867
   *
2868
   * Convert all HTML entities to their applicable characters
2869
   *
2870
   * INFO: opposite to UTF8::html_encode()
2871
   *
2872
   * @link http://php.net/manual/en/function.html-entity-decode.php
2873
   *
2874
   * @param string $str      <p>
2875
   *                         The input string.
2876
   *                         </p>
2877
   * @param int    $flags    [optional] <p>
2878
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2879
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2880
   *                         <table>
2881
   *                         Available <i>flags</i> constants
2882
   *                         <tr valign="top">
2883
   *                         <td>Constant Name</td>
2884
   *                         <td>Description</td>
2885
   *                         </tr>
2886
   *                         <tr valign="top">
2887
   *                         <td><b>ENT_COMPAT</b></td>
2888
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2889
   *                         </tr>
2890
   *                         <tr valign="top">
2891
   *                         <td><b>ENT_QUOTES</b></td>
2892
   *                         <td>Will convert both double and single quotes.</td>
2893
   *                         </tr>
2894
   *                         <tr valign="top">
2895
   *                         <td><b>ENT_NOQUOTES</b></td>
2896
   *                         <td>Will leave both double and single quotes unconverted.</td>
2897
   *                         </tr>
2898
   *                         <tr valign="top">
2899
   *                         <td><b>ENT_HTML401</b></td>
2900
   *                         <td>
2901
   *                         Handle code as HTML 4.01.
2902
   *                         </td>
2903
   *                         </tr>
2904
   *                         <tr valign="top">
2905
   *                         <td><b>ENT_XML1</b></td>
2906
   *                         <td>
2907
   *                         Handle code as XML 1.
2908
   *                         </td>
2909
   *                         </tr>
2910
   *                         <tr valign="top">
2911
   *                         <td><b>ENT_XHTML</b></td>
2912
   *                         <td>
2913
   *                         Handle code as XHTML.
2914
   *                         </td>
2915
   *                         </tr>
2916
   *                         <tr valign="top">
2917
   *                         <td><b>ENT_HTML5</b></td>
2918
   *                         <td>
2919
   *                         Handle code as HTML 5.
2920
   *                         </td>
2921
   *                         </tr>
2922
   *                         </table>
2923
   *                         </p>
2924
   * @param string $encoding [optional] <p>
2925
   *                         Encoding to use.
2926
   *                         </p>
2927
   *
2928
   * @return string the decoded string.
2929
   */
2930
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2931
  {
2932
    $str = (string)$str;
2933
2934
    if (!isset($str[0])) {
2935
      return '';
2936
    }
2937
2938
    if (strpos($str, '&') === false) {
2939
      return $str;
2940
    }
2941
2942
    $encoding = self::normalizeEncoding($encoding);
2943
2944
    if ($flags === null) {
2945
      if (Bootup::is_php('5.4') === true) {
2946
        $flags = ENT_COMPAT | ENT_HTML5;
2947
      } else {
2948
        $flags = ENT_COMPAT;
2949
      }
2950 2
    }
2951
2952 2
    do {
2953
      $str_compare = $str;
2954
2955
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'html_entity_decode_callback'), $str);
2956
2957
      // decode numeric & UTF16 two byte entities
2958
      $str = html_entity_decode(
2959
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2960
          $flags,
2961
          $encoding
2962
      );
2963
2964
    } while ($str_compare !== $str);
2965
2966
    return $str;
2967
  }
2968
2969
  /**
2970
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2971
   *
2972
   * @link http://php.net/manual/en/function.htmlentities.php
2973
   *
2974
   * @param string $str           <p>
2975
   *                              The input string.
2976
   *                              </p>
2977
   * @param int    $flags         [optional] <p>
2978
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2979
   *                              invalid code unit sequences and the used document type. The default is
2980
   *                              ENT_COMPAT | ENT_HTML401.
2981
   *                              <table>
2982
   *                              Available <i>flags</i> constants
2983
   *                              <tr valign="top">
2984
   *                              <td>Constant Name</td>
2985
   *                              <td>Description</td>
2986
   *                              </tr>
2987
   *                              <tr valign="top">
2988
   *                              <td><b>ENT_COMPAT</b></td>
2989
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2990
   *                              </tr>
2991
   *                              <tr valign="top">
2992
   *                              <td><b>ENT_QUOTES</b></td>
2993
   *                              <td>Will convert both double and single quotes.</td>
2994
   *                              </tr>
2995
   *                              <tr valign="top">
2996
   *                              <td><b>ENT_NOQUOTES</b></td>
2997
   *                              <td>Will leave both double and single quotes unconverted.</td>
2998
   *                              </tr>
2999
   *                              <tr valign="top">
3000
   *                              <td><b>ENT_IGNORE</b></td>
3001
   *                              <td>
3002
   *                              Silently discard invalid code unit sequences instead of returning
3003
   *                              an empty string. Using this flag is discouraged as it
3004
   *                              may have security implications.
3005
   *                              </td>
3006
   *                              </tr>
3007
   *                              <tr valign="top">
3008
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3009
   *                              <td>
3010
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3011
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3012
   *                              </td>
3013
   *                              </tr>
3014
   *                              <tr valign="top">
3015
   *                              <td><b>ENT_DISALLOWED</b></td>
3016
   *                              <td>
3017
   *                              Replace invalid code points for the given document type with a
3018
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3019
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3020
   *                              instance, to ensure the well-formedness of XML documents with
3021
   *                              embedded external content.
3022
   *                              </td>
3023
   *                              </tr>
3024
   *                              <tr valign="top">
3025
   *                              <td><b>ENT_HTML401</b></td>
3026
   *                              <td>
3027
   *                              Handle code as HTML 4.01.
3028
   *                              </td>
3029
   *                              </tr>
3030
   *                              <tr valign="top">
3031
   *                              <td><b>ENT_XML1</b></td>
3032
   *                              <td>
3033
   *                              Handle code as XML 1.
3034
   *                              </td>
3035
   *                              </tr>
3036
   *                              <tr valign="top">
3037
   *                              <td><b>ENT_XHTML</b></td>
3038
   *                              <td>
3039
   *                              Handle code as XHTML.
3040
   *                              </td>
3041
   *                              </tr>
3042
   *                              <tr valign="top">
3043
   *                              <td><b>ENT_HTML5</b></td>
3044
   *                              <td>
3045
   *                              Handle code as HTML 5.
3046
   *                              </td>
3047
   *                              </tr>
3048
   *                              </table>
3049
   *                              </p>
3050
   * @param string $encoding      [optional] <p>
3051
   *                              Like <b>htmlspecialchars</b>,
3052
   *                              <b>htmlentities</b> takes an optional third argument
3053
   *                              <i>encoding</i> which defines encoding used in
3054
   *                              conversion.
3055
   *                              Although this argument is technically optional, you are highly
3056
   *                              encouraged to specify the correct value for your code.
3057
   *                              </p>
3058
   * @param bool   $double_encode [optional] <p>
3059
   *                              When <i>double_encode</i> is turned off PHP will not
3060
   *                              encode existing html entities. The default is to convert everything.
3061
   *                              </p>
3062 1
   *
3063
   *
3064 1
   * @return string the encoded string.
3065
   * </p>
3066
   * <p>
3067
   * If the input <i>string</i> contains an invalid code unit
3068
   * sequence within the given <i>encoding</i> an empty string
3069
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3070
   * <b>ENT_SUBSTITUTE</b> flags are set.
3071
   */
3072 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3073
  {
3074 1
    $encoding = self::normalizeEncoding($encoding);
3075
3076
    $str = htmlentities($str, $flags, $encoding, $double_encode);
3077
3078
    if ($encoding !== 'UTF-8') {
3079
      return $str;
3080
    }
3081
3082
    $byteLengths = self::chr_size_list($str);
3083
    $search = array();
3084
    $replacements = array();
3085
    foreach ($byteLengths as $counter => $byteLength) {
3086
      if ($byteLength >= 3) {
3087
        $char = self::access($str, $counter);
3088
3089
        if (!isset($replacements[$char])) {
3090
          $search[$char] = $char;
3091
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 3087 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3092
        }
3093
      }
3094
    }
3095
3096
    return str_replace($search, $replacements, $str);
3097
  }
3098
3099
  /**
3100
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3101
   *
3102
   * INFO: Take a look at "UTF8::htmlentities()"
3103 1
   *
3104
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3105 1
   *
3106
   * @param string $str           <p>
3107
   *                              The string being converted.
3108
   *                              </p>
3109
   * @param int    $flags         [optional] <p>
3110
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3111
   *                              invalid code unit sequences and the used document type. The default is
3112
   *                              ENT_COMPAT | ENT_HTML401.
3113
   *                              <table>
3114
   *                              Available <i>flags</i> constants
3115 1
   *                              <tr valign="top">
3116
   *                              <td>Constant Name</td>
3117 1
   *                              <td>Description</td>
3118
   *                              </tr>
3119
   *                              <tr valign="top">
3120
   *                              <td><b>ENT_COMPAT</b></td>
3121
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3122
   *                              </tr>
3123
   *                              <tr valign="top">
3124
   *                              <td><b>ENT_QUOTES</b></td>
3125
   *                              <td>Will convert both double and single quotes.</td>
3126
   *                              </tr>
3127 1
   *                              <tr valign="top">
3128
   *                              <td><b>ENT_NOQUOTES</b></td>
3129 1
   *                              <td>Will leave both double and single quotes unconverted.</td>
3130
   *                              </tr>
3131
   *                              <tr valign="top">
3132
   *                              <td><b>ENT_IGNORE</b></td>
3133
   *                              <td>
3134
   *                              Silently discard invalid code unit sequences instead of returning
3135
   *                              an empty string. Using this flag is discouraged as it
3136
   *                              may have security implications.
3137
   *                              </td>
3138
   *                              </tr>
3139
   *                              <tr valign="top">
3140
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3141
   *                              <td>
3142
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3143
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3144
   *                              </td>
3145
   *                              </tr>
3146
   *                              <tr valign="top">
3147
   *                              <td><b>ENT_DISALLOWED</b></td>
3148
   *                              <td>
3149
   *                              Replace invalid code points for the given document type with a
3150
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3151
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3152
   *                              instance, to ensure the well-formedness of XML documents with
3153
   *                              embedded external content.
3154
   *                              </td>
3155
   *                              </tr>
3156
   *                              <tr valign="top">
3157
   *                              <td><b>ENT_HTML401</b></td>
3158
   *                              <td>
3159
   *                              Handle code as HTML 4.01.
3160
   *                              </td>
3161
   *                              </tr>
3162
   *                              <tr valign="top">
3163
   *                              <td><b>ENT_XML1</b></td>
3164
   *                              <td>
3165
   *                              Handle code as XML 1.
3166
   *                              </td>
3167
   *                              </tr>
3168
   *                              <tr valign="top">
3169
   *                              <td><b>ENT_XHTML</b></td>
3170
   *                              <td>
3171
   *                              Handle code as XHTML.
3172
   *                              </td>
3173
   *                              </tr>
3174
   *                              <tr valign="top">
3175
   *                              <td><b>ENT_HTML5</b></td>
3176
   *                              <td>
3177
   *                              Handle code as HTML 5.
3178
   *                              </td>
3179 16
   *                              </tr>
3180
   *                              </table>
3181 16
   *                              </p>
3182
   * @param string $encoding      [optional] <p>
3183
   *                              Defines encoding used in conversion.
3184
   *                              </p>
3185
   *                              <p>
3186
   *                              For the purposes of this function, the encodings
3187
   *                              ISO-8859-1, ISO-8859-15,
3188
   *                              UTF-8, cp866,
3189
   *                              cp1251, cp1252, and
3190
   *                              KOI8-R are effectively equivalent, provided the
3191
   *                              <i>string</i> itself is valid for the encoding, as
3192 4
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3193
   *                              the same positions in all of these encodings.
3194 4
   *                              </p>
3195
   * @param bool   $double_encode [optional] <p>
3196
   *                              When <i>double_encode</i> is turned off PHP will not
3197
   *                              encode existing html entities, the default is to convert everything.
3198
   *                              </p>
3199
   *
3200
   * @return string The converted string.
3201
   * </p>
3202
   * <p>
3203
   * If the input <i>string</i> contains an invalid code unit
3204 1
   * sequence within the given <i>encoding</i> an empty string
3205
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3206 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
3207
   */
3208 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3209 1
  {
3210
    $encoding = self::normalizeEncoding($encoding);
3211
3212 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3213 1
  }
3214
3215 1
  /**
3216
   * checks whether iconv is available on the server
3217
   *
3218
   * @return   bool True if available, False otherwise
3219
   */
3220
  public static function iconv_loaded()
3221
  {
3222
    return extension_loaded('iconv') ? true : false;
3223
  }
3224
3225
  /**
3226 4
   * Converts Integer to hexadecimal U+xxxx code point representation.
3227
   *
3228
   * INFO: opposite to UTF8::hex_to_int()
3229 4
   *
3230
   * @param    int    $int The integer to be converted to hexadecimal code point.
3231
   * @param    string $pfix
3232 4
   *
3233
   * @return   string The code point, or empty string on failure.
3234 4
   */
3235 4
  public static function int_to_hex($int, $pfix = 'U+')
3236 4
  {
3237 4
    if (ctype_digit((string)$int)) {
3238 3
      $hex = dechex((int)$int);
3239
3240 4
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3241
3242
      return $pfix . $hex;
3243
    }
3244
3245
    return '';
3246
  }
3247
3248
  /**
3249
   * checks whether intl is available on the server
3250
   *
3251
   * @return   bool True if available, False otherwise
3252
   */
3253
  public static function intl_loaded()
3254
  {
3255
    return extension_loaded('intl') ? true : false;
3256
  }
3257
3258
  /**
3259
   * checks whether intl-char is available on the server
3260
   *
3261
   * @return   bool True if available, False otherwise
3262
   */
3263
  public static function intlChar_loaded()
3264
  {
3265
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3266
  }
3267
3268
  /**
3269
   * alias for "UTF8::is_ascii()"
3270
   *
3271
   * @param string $str
3272
   *
3273 2
   * @return boolean
3274
   */
3275 2
  public static function isAscii($str)
3276
  {
3277
    return self::is_ascii($str);
3278
  }
3279
3280
  /**
3281
   * alias for "UTF8::is_base64()"
3282
   *
3283
   * @param string $str
3284
   *
3285 2
   * @return bool
3286
   */
3287 2
  public static function isBase64($str)
3288 2
  {
3289
    return self::is_base64($str);
3290 2
  }
3291 2
3292 2
  /**
3293 2
   * alias for "UTF8::is_binary()"
3294 2
   *
3295 2
   * @param string $str
3296 2
   *
3297 2
   * @return bool
3298 2
   */
3299 1
  public static function isBinary($str)
3300 1
  {
3301 2
    return self::is_binary($str);
3302 2
  }
3303 2
3304
  /**
3305 2
   * alias for "UTF8::is_bom()"
3306 2
   *
3307 2
   * @param string $utf8_chr
3308 2
   *
3309 2
   * @return boolean
3310 2
   */
3311 2
  public static function isBom($utf8_chr)
3312 2
  {
3313 2
    return self::is_bom($utf8_chr);
3314 1
  }
3315 1
3316 2
  /**
3317 2
   * alias for "UTF8::is_json()"
3318 2
   *
3319
   * @param string $str
3320 2
   *
3321 1
   * @return bool
3322 1
   */
3323
  public static function isJson($str)
3324 1
  {
3325
    return self::is_json($str);
3326
  }
3327
3328 2
  /**
3329
   * alias for "UTF8::is_html()"
3330 2
   *
3331
   * @param string $str
3332
   *
3333
   * @return boolean
3334
   */
3335
  public static function isHtml($str)
3336
  {
3337
    return self::is_html($str);
3338
  }
3339
3340 2
  /**
3341
   * alias for "UTF8::is_utf8()"
3342 2
   *
3343 2
   * @param string $str
3344
   *
3345 2
   * @return bool
3346 2
   */
3347 2
  public static function isUtf8($str)
3348 2
  {
3349 2
    return self::is_utf8($str);
3350 2
  }
3351 2
3352 2
  /**
3353 2
   * alias for "UTF8::is_utf16()"
3354
   *
3355
   * @param string $str
3356 2
   *
3357 2
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3358 2
   */
3359
  public static function isUtf16($str)
3360 2
  {
3361 2
    return self::is_utf16($str);
3362 2
  }
3363 1
3364 1
  /**
3365 1
   * alias for "UTF8::is_utf32()"
3366 1
   *
3367 1
   * @param string $str
3368 1
   *
3369
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3370
   */
3371 1
  public static function isUtf32($str)
3372 1
  {
3373 1
    return self::is_utf32($str);
3374
  }
3375 2
3376
  /**
3377
   * Checks if a string is 7 bit ASCII.
3378
   *
3379
   * @param    string $str The string to check.
3380
   *
3381
   * @return   bool <strong>true</strong> if it is ASCII<br />
3382
   *                <strong>false</strong> otherwise
3383 2
   */
3384
  public static function is_ascii($str)
3385 2
  {
3386
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3387
  }
3388
3389
  /**
3390
   * Returns true if the string is base64 encoded, false otherwise.
3391
   *
3392
   * @param string $str
3393
   *
3394
   * @return bool Whether or not $str is base64 encoded
3395
   */
3396
  public static function is_base64($str)
3397 34
  {
3398
    $str = (string)$str;
3399 34
3400
    if (!isset($str[0])) {
3401 34
      return false;
3402 3
    }
3403
3404
    if (base64_encode(base64_decode($str, true)) === $str) {
3405 32
      return true;
3406
    } else {
3407
      return false;
3408
    }
3409
  }
3410
3411
  /**
3412
   * Check if the input is binary... (is look like a hack).
3413
   *
3414
   * @param mixed $input
3415 32
   *
3416
   * @return bool
3417 32
   */
3418 32
  public static function is_binary($input)
3419 32
  {
3420
3421
    $testLength = strlen($input);
3422 32
3423 32
    if (
3424 32
        preg_match('~^[01]+$~', $input)
3425
        ||
3426
        substr_count($input, "\x00") > 0
3427 32
        ||
3428
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3429 30
    ) {
3430 32
      return true;
3431
    } else {
3432 28
      return false;
3433 28
    }
3434 28
  }
3435 28
3436 30
  /**
3437
   * Check if the file is binary.
3438 13
   *
3439 13
   * @param string $file
3440 13
   *
3441 13
   * @return boolean
3442 23
   */
3443
  public static function is_binary_file($file)
3444 6
  {
3445 6
    try {
3446 6
      $fp = fopen($file, 'r');
3447 6
      $block = fread($fp, 512);
3448 12
      fclose($fp);
3449
    } catch (\Exception $e) {
3450
      $block = '';
3451
    }
3452
3453
    return self::is_binary($block);
3454
  }
3455
3456
  /**
3457 3
   * Checks if the given string is equal to any "Byte Order Mark".
3458 3
   *
3459 3
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3460 3
   *
3461 7
   * @param    string $str The input string.
3462
   *
3463 3
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3464 3
   */
3465 3
  public static function is_bom($str)
3466 3
  {
3467 3
    foreach (self::$bom as $bomString => $bomByteLength) {
3468
      if ($str === $bomString) {
3469
        return true;
3470
      }
3471 3
    }
3472
3473 32
    return false;
3474
  }
3475
3476 30
  /**
3477
   * Try to check if "$str" is an json-string.
3478 28
   *
3479 28
   * @param string $str
3480 28
   *
3481 28
   * @return bool
3482
   */
3483
  public static function is_json($str)
3484
  {
3485
    $str = (string)$str;
3486 28
3487
    if (!isset($str[0])) {
3488
      return false;
3489
    }
3490
3491
    if (
3492 28
        is_object(self::json_decode($str))
3493 28
        &&
3494 28
        json_last_error() === JSON_ERROR_NONE
3495 28
    ) {
3496
      return true;
3497 28
    } else {
3498
      return false;
3499 28
    }
3500 28
  }
3501 5
3502
  /**
3503
   * Check if the string contains any html-tags <lall>.
3504 28
   *
3505 28
   * @param string $str
3506 28
   *
3507 28
   * @return boolean
3508 28
   */
3509
  public static function is_html($str)
3510
  {
3511
    $str = (string)$str;
3512
3513 13
    if (!isset($str[0])) {
3514
      return false;
3515
    }
3516 32
3517
    // init
3518 14
    $matches = array();
3519
3520
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3521
3522
    if (count($matches) == 0) {
3523
      return false;
3524
    } else {
3525
      return true;
3526
    }
3527
  }
3528
3529
  /**
3530
   * Check if the string is UTF-16.
3531
   *
3532
   * @param string $str
3533
   *
3534
   * @return int|false false if is't not UTF-16, 1 for UTF-16LE, 2 for UTF-16BE.
3535
   */
3536 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3537
  {
3538
    $str = self::remove_bom($str);
3539
3540
    if (self::is_binary($str)) {
3541
      self::checkForSupport();
3542
3543
      $maybeUTF16LE = 0;
3544
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3545
      if ($test) {
3546
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3547
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3548
        if ($test3 === $test) {
3549
          $strChars = self::count_chars($str, true);
3550
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3551
            if (in_array($test3char, $strChars, true) === true) {
3552
              $maybeUTF16LE++;
3553
            }
3554
          }
3555
        }
3556
      }
3557
3558 2
      $maybeUTF16BE = 0;
3559
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3560 2
      if ($test) {
3561
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3562 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3563 2
        if ($test3 === $test) {
3564 2
          $strChars = self::count_chars($str, true);
3565
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3566
            if (in_array($test3char, $strChars, true) === true) {
3567
              $maybeUTF16BE++;
3568 2
            }
3569
          }
3570
        }
3571
      }
3572
3573
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3574
        if ($maybeUTF16LE > $maybeUTF16BE) {
3575
          return 1;
3576
        } else {
3577
          return 2;
3578
        }
3579
      }
3580
3581
    }
3582
3583
    return false;
3584
  }
3585
3586
  /**
3587
   * Check if the string is UTF-32.
3588
   *
3589
   * @param string $str
3590
   *
3591
   * @return int|false false if is't not UTF-16, 1 for UTF-32LE, 2 for UTF-32BE.
3592
   */
3593 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3594
  {
3595
    $str = self::remove_bom($str);
3596
3597
    if (self::is_binary($str)) {
3598
      self::checkForSupport();
3599
3600
      $maybeUTF32LE = 0;
3601
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3602
      if ($test) {
3603
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3604
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3605
        if ($test3 === $test) {
3606
          $strChars = self::count_chars($str, true);
3607 1
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3608
            if (in_array($test3char, $strChars, true) === true) {
3609 1
              $maybeUTF32LE++;
3610
            }
3611 1
          }
3612
        }
3613
      }
3614 1
3615
      $maybeUTF32BE = 0;
3616
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3617 1
      if ($test) {
3618
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3619
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3620
        if ($test3 === $test) {
3621
          $strChars = self::count_chars($str, true);
3622
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3623
            if (in_array($test3char, $strChars, true) === true) {
3624
              $maybeUTF32BE++;
3625
            }
3626
          }
3627 6
        }
3628
      }
3629 6
3630
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3631
        if ($maybeUTF32LE > $maybeUTF32BE) {
3632
          return 1;
3633
        } else {
3634
          return 2;
3635
        }
3636
      }
3637
3638
    }
3639
3640
    return false;
3641
  }
3642 24
3643
  /**
3644 24
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3645
   *
3646 24
   * @see    http://hsivonen.iki.fi/php-utf8/
3647 2
   *
3648
   * @param    string $str The string to be checked.
3649
   *
3650 23
   * @return   bool
3651
   */
3652 23
  public static function is_utf8($str)
3653
  {
3654
    $str = (string)$str;
3655
3656
    if (!isset($str[0])) {
3657
      return true;
3658
    }
3659
3660
    if (self::pcre_utf8_support() !== true) {
3661
3662 1
      // If even just the first character can be matched, when the /u
3663
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3664 1
      // invalid, nothing at all will match, even if the string contains
3665
      // some valid sequences
3666
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3667
3668 1
    } else {
3669
3670
      $mState = 0; // cached expected number of octets after the current octet
3671
      // until the beginning of the next UTF8 character sequence
3672
      $mUcs4 = 0; // cached Unicode character
3673
      $mBytes = 1; // cached expected number of octets in the current sequence
3674
      $len = strlen($str);
3675
3676
      /** @noinspection ForeachInvariantsInspection */
3677
      for ($i = 0; $i < $len; $i++) {
3678
        $in = ord($str[$i]);
3679 1
        if ($mState === 0) {
3680
          // When mState is zero we expect either a US-ASCII character or a
3681 1
          // multi-octet sequence.
3682 1
          if (0 === (0x80 & $in)) {
3683 1
            // US-ASCII, pass straight through.
3684
            $mBytes = 1;
3685 1 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3686
            // First octet of 2 octet sequence.
3687
            $mUcs4 = $in;
3688
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3689
            $mState = 1;
3690
            $mBytes = 2;
3691
          } elseif (0xE0 === (0xF0 & $in)) {
3692
            // First octet of 3 octet sequence.
3693
            $mUcs4 = $in;
3694 2
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3695
            $mState = 2;
3696 2
            $mBytes = 3;
3697 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3698 2
            // First octet of 4 octet sequence.
3699 2
            $mUcs4 = $in;
3700 2
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3701
            $mState = 3;
3702 2
            $mBytes = 4;
3703
          } elseif (0xF8 === (0xFC & $in)) {
3704
            /* First octet of 5 octet sequence.
3705
            *
3706
            * This is illegal because the encoded codepoint must be either
3707
            * (a) not the shortest form or
3708
            * (b) outside the Unicode range of 0-0x10FFFF.
3709
            * Rather than trying to resynchronize, we will carry on until the end
3710
            * of the sequence and let the later error handling code catch it.
3711
            */
3712 1
            $mUcs4 = $in;
3713
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3714 1
            $mState = 4;
3715
            $mBytes = 5;
3716 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3717
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3718 1
            $mUcs4 = $in;
3719
            $mUcs4 = ($mUcs4 & 1) << 30;
3720
            $mState = 5;
3721
            $mBytes = 6;
3722
          } else {
3723
            /* Current octet is neither in the US-ASCII range nor a legal first
3724
             * octet of a multi-octet sequence.
3725
             */
3726
            return false;
3727
          }
3728 13
        } else {
3729
          // When mState is non-zero, we expect a continuation of the multi-octet
3730 13
          // sequence
3731
          if (0x80 === (0xC0 & $in)) {
3732 13
            // Legal continuation.
3733
            $shift = ($mState - 1) * 6;
3734
            $tmp = $in;
3735 13
            $tmp = ($tmp & 0x0000003F) << $shift;
3736 13
            $mUcs4 |= $tmp;
3737 13
            /**
3738 13
             * End of the multi-octet sequence. mUcs4 now contains the final
3739 13
             * Unicode code point to be output
3740 13
             */
3741 13
            if (0 === --$mState) {
3742 13
              /*
3743 13
              * Check for illegal sequences and code points.
3744 13
              */
3745 13
              // From Unicode 3.1, non-shortest form is illegal
3746 13
              if (
3747 13
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3748 13
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3749
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3750 13
                  (4 < $mBytes) ||
3751 2
                  // From Unicode 3.2, surrogate characters are illegal.
3752
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3753
                  // Code points outside the Unicode range are illegal.
3754 13
                  ($mUcs4 > 0x10FFFF)
3755
              ) {
3756
                return false;
3757
              }
3758
              // initialize UTF8 cache
3759
              $mState = 0;
3760
              $mUcs4 = 0;
3761
              $mBytes = 1;
3762
            }
3763
          } else {
3764 2
            /**
3765
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3766 2
             * Incomplete multi-octet sequence.
3767 2
             */
3768
            return false;
3769 2
          }
3770 1
        }
3771 1
      }
3772 1
3773
      return true;
3774 2
    }
3775
  }
3776
3777
  /**
3778
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3779
   * Decodes a JSON string
3780
   *
3781
   * @link http://php.net/manual/en/function.json-decode.php
3782
   *
3783
   * @param string $json    <p>
3784
   *                        The <i>json</i> string being decoded.
3785
   *                        </p>
3786 8
   *                        <p>
3787
   *                        This function only works with UTF-8 encoded strings.
3788 8
   *                        </p>
3789 8
   *                        <p>PHP implements a superset of
3790
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3791 8
   *                        only supports these values when they are nested inside an array or an object.
3792
   *                        </p>
3793 8
   * @param bool   $assoc   [optional] <p>
3794
   *                        When <b>TRUE</b>, returned objects will be converted into
3795 2
   *                        associative arrays.
3796
   *                        </p>
3797 2
   * @param int    $depth   [optional] <p>
3798
   *                        User specified recursion depth.
3799 1
   *                        </p>
3800 1
   * @param int    $options [optional] <p>
3801
   *                        Bitmask of JSON decode options. Currently only
3802 2
   *                        <b>JSON_BIGINT_AS_STRING</b>
3803 2
   *                        is supported (default is to cast large integers as floats)
3804
   *                        </p>
3805 8
   *
3806 8
   * @return mixed the value encoded in <i>json</i> in appropriate
3807 1
   * PHP type. Values true, false and
3808 1
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3809
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3810 8
   * <i>json</i> cannot be decoded or if the encoded
3811 8
   * data is deeper than the recursion limit.
3812
   */
3813 8
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3814
  {
3815
    $json = self::filter($json);
3816
3817
    if (Bootup::is_php('5.4') === true) {
3818
      $json = json_decode($json, $assoc, $depth, $options);
3819
    } else {
3820
      $json = json_decode($json, $assoc, $depth);
3821
    }
3822
3823
    return $json;
3824
  }
3825
3826 1
  /**
3827
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3828 1
   * Returns the JSON representation of a value
3829 1
   *
3830
   * @link http://php.net/manual/en/function.json-encode.php
3831
   *
3832
   * @param mixed $value   <p>
3833
   *                       The <i>value</i> being encoded. Can be any type except
3834
   *                       a resource.
3835
   *                       </p>
3836
   *                       <p>
3837
   *                       All string data must be UTF-8 encoded.
3838
   *                       </p>
3839
   *                       <p>PHP implements a superset of
3840
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3841
   *                       only supports these values when they are nested inside an array or an object.
3842 1
   *                       </p>
3843
   * @param int   $options [optional] <p>
3844 1
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3845
   *                       <b>JSON_HEX_TAG</b>,
3846
   *                       <b>JSON_HEX_AMP</b>,
3847
   *                       <b>JSON_HEX_APOS</b>,
3848
   *                       <b>JSON_NUMERIC_CHECK</b>,
3849
   *                       <b>JSON_PRETTY_PRINT</b>,
3850
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3851
   *                       <b>JSON_FORCE_OBJECT</b>,
3852
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3853
   *                       constants is described on
3854
   *                       the JSON constants page.
3855 15
   *                       </p>
3856
   * @param int   $depth   [optional] <p>
3857 15
   *                       Set the maximum depth. Must be greater than zero.
3858 2
   *                       </p>
3859
   *
3860
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3861 14
   */
3862 14
  public static function json_encode($value, $options = 0, $depth = 512)
3863
  {
3864 14
    $value = self::filter($value);
3865 2
3866
    if (Bootup::is_php('5.5')) {
3867
      $json = json_encode($value, $options, $depth);
3868 13
    } else {
3869 7
      $json = json_encode($value, $options);
3870
    }
3871
3872 12
    return $json;
3873 8
  }
3874
3875
  /**
3876 10
   * Makes string's first char lowercase.
3877
   *
3878
   * @param    string $str The input string
3879
   *
3880
   * @return   string The resulting string
3881
   */
3882
  public static function lcfirst($str)
3883
  {
3884
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3885
  }
3886
3887
  /**
3888
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3889
   *
3890
   * WARNING: This is much slower then "ltrim()" !!!!
3891
   *
3892
   * @param    string $str   The string to be trimmed
3893
   * @param    string $chars Optional characters to be stripped
3894
   *
3895
   * @return   string The string with unwanted characters stripped from the left
3896
   */
3897 1 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3898
  {
3899
    $str = (string)$str;
3900 1
3901
    if (!isset($str[0])) {
3902 1
      return '';
3903
    }
3904 1
3905 1
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3906
3907
    return preg_replace("/^{$chars}+/u", '', $str);
3908
  }
3909
3910
  /**
3911
   * Returns the UTF-8 character with the maximum code point in the given data.
3912 33
   *
3913
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3914
   *
3915 33
   * @return   string The character with the highest code point than others.
3916
   */
3917 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3918
  {
3919
    if (is_array($arg)) {
3920
      $arg = implode($arg);
3921
    }
3922
3923
    return self::chr(max(self::codepoints($arg)));
3924
  }
3925
3926 1
  /**
3927
   * Calculates and returns the maximum number of bytes taken by any
3928 1
   * UTF-8 encoded character in the given string.
3929 1
   *
3930
   * @param    string $str The original Unicode string.
3931
   *
3932 1
   * @return   int An array of byte lengths of each character.
3933
   */
3934 1
  public static function max_chr_width($str)
3935
  {
3936
    $bytes = self::chr_size_list($str);
3937 1
    if (count($bytes) > 0) {
3938
      return (int)max($bytes);
3939
    } else {
3940 1
      return 0;
3941
    }
3942
  }
3943
3944 1
  /**
3945
   * checks whether mbstring is available on the server
3946 1
   *
3947
   * @return   bool True if available, False otherwise
3948
   */
3949 1
  public static function mbstring_loaded()
3950
  {
3951
    $return = extension_loaded('mbstring');
3952 1
3953
    if ($return === true) {
3954
      \mb_internal_encoding('UTF-8');
3955
    }
3956 1
3957
    return $return;
3958 1
  }
3959 1
3960 1
  /**
3961 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3962 1
   *
3963
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3964
   *
3965
   * @return   string The character with the lowest code point than others.
3966
   */
3967 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3968
  {
3969
    if (is_array($arg)) {
3970
      $arg = implode($arg);
3971
    }
3972
3973
    return self::chr(min(self::codepoints($arg)));
3974
  }
3975 7
3976
  /**
3977 7
   * Normalize the encoding-name input.
3978
   *
3979
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3980 7
   *
3981 2
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3982 2
   */
3983 7
  public static function normalizeEncoding($encoding)
3984
  {
3985 7
    static $staticNormalizeEncodingCache = array();
3986
3987
    if (!$encoding) {
3988 3
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalizeEncoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3989 1
    }
3990 1
3991
    if ('UTF-8' === $encoding) {
3992
      return $encoding;
3993
    }
3994 3
3995 1
    if (in_array($encoding, self::$iconvEncoding, true)) {
3996 1
      return $encoding;
3997 3
    }
3998
3999 7
    if (isset($staticNormalizeEncodingCache[$encoding])) {
4000
      return $staticNormalizeEncodingCache[$encoding];
4001
    }
4002 3
4003 1
    $encodingOrig = $encoding;
4004 1
    $encoding = strtoupper($encoding);
4005
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4006
4007
    $equivalences = array(
4008 3
        'ISO88591'    => 'ISO-8859-1',
4009 1
        'ISO8859'     => 'ISO-8859-1',
4010 1
        'ISO'         => 'ISO-8859-1',
4011 3
        'LATIN1'      => 'ISO-8859-1',
4012
        'LATIN'       => 'ISO-8859-1',
4013 7
        'UTF16'       => 'UTF-16',
4014
        'UTF32'       => 'UTF-32',
4015
        'UTF8'        => 'UTF-8',
4016
        'UTF'         => 'UTF-8',
4017
        'UTF7'        => 'UTF-7',
4018
        'WIN1252'     => 'ISO-8859-1',
4019
        'WINDOWS1252' => 'ISO-8859-1',
4020
        '8BIT'        => 'CP850',
4021
        'BINARY'      => 'CP850',
4022
    );
4023
4024 1
    if (!empty($equivalences[$encodingUpperHelper])) {
4025
      $encoding = $equivalences[$encodingUpperHelper];
4026 1
    }
4027 1
4028 1
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
4029
4030 1
    return $encoding;
4031 1
  }
4032 1
4033 1
  /**
4034 1
   * Normalize MS Word special characters.
4035
   *
4036 1
   * @param string $str The string to be normalized.
4037
   *
4038
   * @return string
4039
   */
4040
  public static function normalize_msword($str)
4041
  {
4042
    static $utf8MSWordKeys = null;
4043
    static $utf8MSWordValues = null;
4044
4045
    if ($utf8MSWordKeys === null) {
4046
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
4047
      $utf8MSWordValues = array_values(self::$utf8MSWord);
4048
    }
4049
4050
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
4051
  }
4052 36
4053
  /**
4054
   * Normalize the whitespace.
4055 36
   *
4056
   * @param string $str                     The string to be normalized.
4057
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
4058
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
4059 36
   *
4060 36
   * @return string
4061 36
   */
4062 36
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
4063
  {
4064 36
    static $whitespaces = array();
4065
    static $bidiUniCodeControls = null;
4066
4067 36
    $cacheKey = (int)$keepNonBreakingSpace;
4068 36
4069
    if (!isset($whitespaces[$cacheKey])) {
4070 36
4071
      $whitespaces[$cacheKey] = self::$whitespaceTable;
4072
4073
      if ($keepNonBreakingSpace === true) {
4074
        /** @noinspection OffsetOperationsInspection */
4075
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
4076
      }
4077
4078
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
4079
    }
4080
4081 36
    if ($keepBidiUnicodeControls === false) {
4082
      if ($bidiUniCodeControls === null) {
4083 36
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
4084
      }
4085 36
4086 36
      $str = str_replace($bidiUniCodeControls, '', $str);
4087 36
    }
4088
4089 36
    return str_replace($whitespaces[$cacheKey], ' ', $str);
4090 36
  }
4091 36
4092
  /**
4093 36
   * Format a number with grouped thousands.
4094
   *
4095
   * @param float  $number
4096
   * @param int    $decimals
4097
   * @param string $dec_point
4098
   * @param string $thousands_sep
4099
   *
4100
   * @return string
4101
   */
4102
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4103
  {
4104
    $thousands_sep = (string)$thousands_sep;
4105
    $dec_point = (string)$dec_point;
4106 23
4107
    if (
4108 23
        isset($thousands_sep[1], $dec_point[1])
4109
        &&
4110 23
        Bootup::is_php('5.4') === true
4111 5
    ) {
4112
      return str_replace(
4113
          array(
4114 19
              '.',
4115
              ',',
4116 19
          ),
4117
          array(
4118
              $dec_point,
4119
              $thousands_sep,
4120
          ),
4121
          number_format($number, $decimals, '.', ',')
4122
      );
4123
    }
4124
4125
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4126
  }
4127 40
4128
  /**
4129 40
   * Calculates Unicode code point of the given UTF-8 encoded character.
4130
   *
4131 40
   * @param    string $s The character of which to calculate code point.
4132
   *
4133 40
   * @return   int Unicode code point of the given character,<br />
4134 30
   *           0 on invalid UTF-8 byte sequence.
4135
   */
4136
  public static function ord($s)
4137 16
  {
4138
    if (!$s && $s !== '0') {
4139 16
      return 0;
4140 15
    }
4141
4142 15
    // init
4143 14
    self::checkForSupport();
4144 15
4145 1
    if (self::$support['intlChar'] === true) {
4146 1
      $tmpReturn = \IntlChar::ord($s);
4147
      if ($tmpReturn) {
4148
        return $tmpReturn;
4149 16
      }
4150
    }
4151 16
4152
    $s = unpack('C*', substr($s, 0, 4));
4153 16
    $a = $s ? $s[1] : 0;
4154 16
4155 16
    if (0xF0 <= $a && isset($s[4])) {
4156
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
4157
    }
4158
4159 16
    if (0xE0 <= $a && isset($s[3])) {
4160
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
4161 16
    }
4162
4163
    if (0xC0 <= $a && isset($s[2])) {
4164
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
4165
    }
4166
4167
    return $a;
4168
  }
4169
4170
  /**
4171
   * Parses the string into variables.
4172
   *
4173
   * WARNING: This differs from parse_str() by returning the results
4174
   *    instead of placing them in the local scope!
4175
   *
4176
   * @link http://php.net/manual/en/function.parse-str.php
4177
   *
4178
   * @param string $str     <p>
4179
   *                        The input string.
4180
   *                        </p>
4181 2
   * @param array  $result  <p>
4182
   *                        If the second parameter arr is present,
4183 2
   *                        variables are stored in this variable as array elements instead.
4184 1
   *                        </p>
4185
   *
4186
   * @return void
4187 2
   */
4188
  public static function parse_str($str, &$result)
4189
  {
4190
    // init
4191
    self::checkForSupport();
4192
4193
    $str = self::filter($str);
4194
4195
    \mb_parse_str($str, $result);
4196
  }
4197
4198
  /**
4199 25
   * checks if \u modifier is available that enables Unicode support in PCRE.
4200
   *
4201 25
   * @return   bool True if support is available, false otherwise
4202
   */
4203 25
  public static function pcre_utf8_support()
4204 5
  {
4205
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4206
    return (bool)@preg_match('//u', '');
4207
  }
4208 24
4209 24
  /**
4210 24
   * Create an array containing a range of UTF-8 characters.
4211
   *
4212 24
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4213
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4214 24
   *
4215
   * @return   array
4216
   */
4217
  public static function range($var1, $var2)
4218 24
  {
4219 24
    if (!$var1 || !$var2) {
4220 24
      return array();
4221 24
    }
4222 24
4223 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4224 24
      $start = (int)$var1;
4225
    } elseif (ctype_xdigit($var1)) {
4226
      $start = (int)self::hex_to_int($var1);
4227
    } else {
4228
      $start = self::ord($var1);
4229
    }
4230
4231
    if (!$start) {
4232
      return array();
4233
    }
4234
4235 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4236
      $end = (int)$var2;
4237
    } elseif (ctype_xdigit($var2)) {
4238
      $end = (int)self::hex_to_int($var2);
4239
    } else {
4240
      $end = self::ord($var2);
4241
    }
4242
4243
    if (!$end) {
4244
      return array();
4245
    }
4246
4247
    return array_map(
4248
        array(
4249
            '\\voku\\helper\\UTF8',
4250
            'chr',
4251
        ),
4252
        range($start, $end)
4253
    );
4254
  }
4255
4256 24
  /**
4257 5
   * alias for "UTF8::removeBOM()"
4258
   *
4259 5
   * @param string $str
4260 5
   *
4261
   * @return string
4262 24
   */
4263
  public static function remove_bom($str)
4264
  {
4265
    return self::removeBOM($str);
4266 24
  }
4267
4268
  /**
4269
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4270
   *
4271
   * @param string $str
4272
   *
4273
   * @return string
4274
   */
4275
  public static function removeBOM($str)
4276
  {
4277 3
    foreach (self::$bom as $bomString => $bomByteLength) {
4278
      if (0 === strpos($str, $bomString)) {
4279
        $str = substr($str, $bomByteLength);
4280
      }
4281
    }
4282
4283
    return $str;
4284 3
  }
4285 2
4286 1
  /**
4287 2
   * Removes duplicate occurrences of a string in another string.
4288 1
   *
4289 2
   * @param    string       $str  The base string
4290
   * @param    string|array $what String to search for in the base string
4291 2
   *
4292
   * @return   string The result string with removed duplicates
4293
   */
4294 2
  public static function remove_duplicates($str, $what = ' ')
4295
  {
4296
    if (is_string($what)) {
4297
      $what = array($what);
4298
    }
4299
4300 3
    if (is_array($what)) {
4301 1
      foreach ($what as $item) {
4302
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4303
      }
4304
    }
4305
4306
    return $str;
4307
  }
4308
4309
  /**
4310 3
   * Remove Invisible Characters
4311 3
   *
4312 3
   * This prevents sandwiching null characters
4313 3
   * between ascii characters, like Java\0script.
4314 3
   *
4315 3
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4316 3
   *
4317 3
   * @param  string $str
4318
   * @param  bool   $url_encoded
4319
   * @param  string $replacement
4320 3
   *
4321 3
   * @return  string
4322 3
   */
4323 3
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4324
  {
4325
    // init
4326
    $non_displayables = array();
4327
4328
    // every control character except newline (dec 10),
4329
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4330
    if ($url_encoded) {
4331
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4332
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4333
    }
4334
4335
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4336
4337
    do {
4338
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4339
    } while ($count !== 0);
4340
4341
    return $str;
4342
  }
4343
4344
  /**
4345
   * replace diamond question mark (�)
4346
   *
4347
   * @param string $str
4348
   * @param string $unknown
4349
   *
4350
   * @return string
4351
   */
4352
  public static function replace_diamond_question_mark($str, $unknown = '?')
4353 13
  {
4354
    return str_replace(
4355 13
        array(
4356
            "\xEF\xBF\xBD",
4357
            '�',
4358 13
        ),
4359 13
        array(
4360 1
            $unknown,
4361 1
            $unknown,
4362 12
        ),
4363
        $str
4364 13
    );
4365
  }
4366 13
4367 13
  /**
4368
   * Strip whitespace or other characters from end of a UTF-8 string.
4369 13
   *
4370
   * WARNING: This is much slower then "rtrim()" !!!!
4371
   *
4372
   * @param    string $str   The string to be trimmed
4373
   * @param    string $chars Optional characters to be stripped
4374
   *
4375
   * @return   string The string with unwanted characters stripped from the right
4376
   */
4377 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4378
  {
4379
    $str = (string)$str;
4380
4381 1
    if (!isset($str[0])) {
4382
      return '';
4383 1
    }
4384
4385
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4386
4387 1
    return preg_replace("/{$chars}+$/u", '', $str);
4388
  }
4389 1
4390
  /**
4391
   * rxClass
4392
   *
4393 1
   * @param string $s
4394 1
   * @param string $class
4395
   *
4396
   * @return string
4397 1
   */
4398 1
  protected static function rxClass($s, $class = '')
4399 1
  {
4400 1
    static $rxClassCache = array();
4401
4402 1
    $cacheKey = $s . $class;
4403
4404
    if (isset($rxClassCache[$cacheKey])) {
4405 1
      return $rxClassCache[$cacheKey];
4406
    }
4407
4408 1
    $class = array($class);
4409
4410
    /** @noinspection SuspiciousLoopInspection */
4411
    foreach (self::str_split($s) as $s) {
4412
      if ('-' === $s) {
4413
        $class[0] = '-' . $class[0];
4414
      } elseif (!isset($s[2])) {
4415
        $class[0] .= preg_quote($s, '/');
4416
      } elseif (1 === self::strlen($s)) {
4417
        $class[0] .= $s;
4418
      } else {
4419
        $class[] = $s;
4420
      }
4421 2
    }
4422
4423 2
    $class[0] = '[' . $class[0] . ']';
4424
4425 2
    if (1 === count($class)) {
4426 2
      $return = $class[0];
4427
    } else {
4428 2
      $return = '(?:' . implode('|', $class) . ')';
4429
    }
4430
4431 2
    $rxClassCache[$cacheKey] = $return;
4432 2
4433 2
    return $return;
4434 2
  }
4435 2
4436
  /**
4437 2
   * Echo native UTF8-Support libs, e.g. for debugging.
4438 2
   */
4439 2
  public static function showSupport()
4440 2
  {
4441 2
    foreach (self::$support as $utf8Support) {
4442 2
      echo $utf8Support . "\n<br>";
4443
    }
4444 2
  }
4445 2
4446 2
  /**
4447 2
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4448 2
   *
4449 2
   * @param    string $char           The Unicode character to be encoded as numbered entity.
4450
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4451 2
   *
4452
   * @return   string The HTML numbered entity.
4453
   */
4454 2
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
4455
  {
4456
    if (!$char) {
4457
      return '';
4458
    }
4459
4460
    if (
4461
        $keepAsciiChars === true
4462
        &&
4463
        self::isAscii($char) === true
4464
    ) {
4465
      return $char;
4466
    }
4467
4468
    return '&#' . self::ord($char) . ';';
4469
  }
4470
4471
  /**
4472
   * Convert a string to an array of Unicode characters.
4473
   *
4474
   * @param    string  $str       The string to split into array.
4475 1
   * @param    int     $length    Max character length of each array element.
4476
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4477 1
   *
4478
   * @return   array An array containing chunks of the string.
4479 1
   */
4480
  public static function split($str, $length = 1, $cleanUtf8 = false)
4481
  {
4482
    $str = (string)$str;
4483
4484
    if (!isset($str[0])) {
4485
      return array();
4486
    }
4487
4488
    // init
4489
    self::checkForSupport();
4490
    $str = (string)$str;
4491
    $ret = array();
4492
4493
    if (self::$support['pcre_utf8'] === true) {
4494
4495
      if ($cleanUtf8 === true) {
4496
        $str = self::clean($str);
4497
      }
4498
4499
      preg_match_all('/./us', $str, $retArray);
4500
      if (isset($retArray[0])) {
4501
        $ret = $retArray[0];
4502
      }
4503
      unset($retArray);
4504
4505
    } else {
4506
4507
      // fallback
4508
4509
      $len = strlen($str);
4510
4511
      /** @noinspection ForeachInvariantsInspection */
4512 12
      for ($i = 0; $i < $len; $i++) {
4513
        if (($str[$i] & "\x80") === "\x00") {
4514 12
          $ret[] = $str[$i];
4515
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4516
          if (($str[$i + 1] & "\xC0") === "\x80") {
4517
            $ret[] = $str[$i] . $str[$i + 1];
4518
4519
            $i++;
4520
          }
4521 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4522
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4523
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4524
4525
            $i += 2;
4526
          }
4527
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4528 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4529
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4530
4531
            $i += 3;
4532
          }
4533
        }
4534
      }
4535
    }
4536
4537
    if ($length > 1) {
4538
      $ret = array_chunk($ret, $length);
4539
4540
      $ret = array_map('implode', $ret);
4541
    }
4542 1
4543
    /** @noinspection OffsetOperationsInspection */
4544 1
    if (isset($ret[0]) && $ret[0] === '') {
4545
      return array();
4546 1
    }
4547 1
4548 1
    return $ret;
4549
  }
4550 1
4551 1
  /**
4552 1
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4553 1
   *
4554
   * @param string $str
4555
   *
4556 1
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4557
   *                      otherwise it will return false.
4558
   */
4559
  public static function str_detect_encoding($str)
4560
  {
4561
4562
    //
4563
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4564
    //
4565
4566
    if (self::is_binary($str)) {
4567 17
      if (self::is_utf16($str) === 1) {
4568
        return 'UTF-16LE';
4569
      } elseif (self::is_utf16($str) === 2) {
4570 17
        return 'UTF-16BE';
4571
      } elseif (self::is_utf32($str) === 1) {
4572 17
        return 'UTF-32LE';
4573
      } elseif (self::is_utf32($str) === 2) {
4574
        return 'UTF-32BE';
4575
      }
4576
    }
4577
4578 17
    //
4579 17
    // 2.) simple check for ASCII chars
4580 17
    //
4581 17
4582 17
    if (self::is_ascii($str) === true) {
4583 16
      return 'ASCII';
4584 16
    }
4585 17
4586
    //
4587
    // 3.) simple check for UTF-8 chars
4588
    //
4589
4590 17
    if (self::is_utf8($str) === true) {
4591 17
      return 'UTF-8';
4592
    }
4593
4594 1
    //
4595 1
    // 4.) check via "\mb_detect_encoding()"
4596
    //
4597
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4598 1
4599 1
    $detectOrder = array(
4600 1
        'windows-1251',
4601 1
        'ISO-8859-1',
4602 1
        'ASCII',
4603
        'UTF-8',
4604 1
    );
4605
4606 1
    self::checkForSupport();
4607
4608
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4609
    if ($encoding) {
4610
      return $encoding;
4611
    }
4612
4613
    //
4614
    // 5.) check via "iconv()"
4615
    //
4616 1
4617
    $md5 = md5($str);
4618 1
    foreach (self::$iconvEncoding as $encodingTmp) {
4619
      # INFO: //IGNORE and //TRANSLIT still throw notice
4620 1
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4621
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4622
        return $encodingTmp;
4623
      }
4624
    }
4625 1
4626 1
    return false;
4627
  }
4628
4629 1
  /**
4630 1
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4631 1
   *
4632
   * @link  http://php.net/manual/en/function.str-ireplace.php
4633 1
   *
4634
   * @param mixed $search  <p>
4635
   *                       Every replacement with search array is
4636
   *                       performed on the result of previous replacement.
4637
   *                       </p>
4638
   * @param mixed $replace <p>
4639
   *                       </p>
4640
   * @param mixed $subject <p>
4641
   *                       If subject is an array, then the search and
4642
   *                       replace is performed with every entry of
4643
   *                       subject, and the return value is an array as
4644
   *                       well.
4645
   *                       </p>
4646
   * @param int   $count   [optional] <p>
4647
   *                       The number of matched and replaced needles will
4648
   *                       be returned in count which is passed by
4649
   *                       reference.
4650
   *                       </p>
4651
   *
4652
   * @return mixed a string or an array of replacements.
4653
   * @since 5.0
4654 8
   */
4655
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4656 8
  {
4657
    $search = (array)$search;
4658 8
4659
    /** @noinspection AlterInForeachInspection */
4660 8
    foreach ($search as &$s) {
4661 2
      if ('' === $s .= '') {
4662
        $s = '/^(?<=.)$/';
4663
      } else {
4664 7
        $s = '/' . preg_quote($s, '/') . '/ui';
4665
      }
4666 7
    }
4667 7
4668 7
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4669
    $count = $replace;
4670 7
4671
    return $subject;
4672 7
  }
4673 6
4674
  /**
4675
   * Limit the number of characters in a string, but also after the next word.
4676 4
   *
4677
   * @param  string $str
4678
   * @param  int    $length
4679 4
   * @param  string $strAddOn
4680 4
   *
4681 4
   * @return string
4682
   */
4683 4
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4684 3
  {
4685
    $str = (string)$str;
4686 3
4687 3
    if (!isset($str[0])) {
4688 3
      return '';
4689
    }
4690 3
4691 1
    $length = (int)$length;
4692
4693 1
    if (self::strlen($str) <= $length) {
4694 1
      return $str;
4695 1
    }
4696
4697 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4698
      return self::substr($str, 0, $length - 1) . $strAddOn;
4699
    }
4700
4701
    $str = self::substr($str, 0, $length);
4702
    $array = explode(' ', $str);
4703
    array_pop($array);
4704
    $new_str = implode(' ', $array);
4705
4706
    if ($new_str === '') {
4707
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4708
    } else {
4709
      $str = $new_str . $strAddOn;
4710
    }
4711
4712 1
    return $str;
4713 3
  }
4714
4715 4
  /**
4716
   * Pad a UTF-8 string to given length with another string.
4717
   *
4718
   * @param    string $str        The input string
4719
   * @param    int    $pad_length The length of return string
4720 4
   * @param    string $pad_string String to use for padding the input string
4721
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4722
   *
4723
   * @return   string Returns the padded string
4724
   */
4725 4
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4726 4
  {
4727 2
    $str_length = self::strlen($str);
4728 2
4729
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $str_length)) {
4730 2
      $ps_length = self::strlen($pad_string);
4731 2
4732 1
      $diff = $pad_length - $str_length;
4733
4734 2
      switch ($pad_type) {
4735 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4736 4
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4737 4
          $pre = self::substr($pre, 0, $diff);
4738 4
          $post = '';
4739 4
          break;
4740 1
4741
        case STR_PAD_BOTH:
4742 7
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4743
          $pre = self::substr($pre, 0, (int)$diff / 2);
4744 7
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4745
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4746
          break;
4747
4748
        case STR_PAD_RIGHT:
4749 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4750
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4751
          $post = self::substr($post, 0, $diff);
4752
          $pre = '';
4753
      }
4754
4755
      return $pre . $str . $post;
4756 1
    }
4757
4758 1
    return $str;
4759 1
  }
4760 1
4761 1
  /**
4762
   * Repeat a string.
4763 1
   *
4764
   * @param string $str        <p>
4765
   *                           The string to be repeated.
4766
   *                           </p>
4767 1
   * @param int    $multiplier <p>
4768
   *                           Number of time the input string should be
4769
   *                           repeated.
4770
   *                           </p>
4771
   *                           <p>
4772
   *                           multiplier has to be greater than or equal to 0.
4773
   *                           If the multiplier is set to 0, the function
4774
   *                           will return an empty string.
4775
   *                           </p>
4776 1
   *
4777
   * @return string the repeated string.
4778
   */
4779 1
  public static function str_repeat($str, $multiplier)
4780
  {
4781
    $str = self::filter($str);
4782
4783
    return str_repeat($str, $multiplier);
4784
  }
4785
4786
  /**
4787
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4788
   *
4789
   * (PHP 4, PHP 5)<br/>
4790 8
   * Replace all occurrences of the search string with the replacement string
4791
   *
4792 8
   * @link http://php.net/manual/en/function.str-replace.php
4793
   *
4794
   * @param mixed $search  <p>
4795
   *                       The value being searched for, otherwise known as the needle.
4796
   *                       An array may be used to designate multiple needles.
4797
   *                       </p>
4798
   * @param mixed $replace <p>
4799
   *                       The replacement value that replaces found search
4800
   *                       values. An array may be used to designate multiple replacements.
4801
   *                       </p>
4802
   * @param mixed $subject <p>
4803
   *                       The string or array being searched and replaced on,
4804
   *                       otherwise known as the haystack.
4805 8
   *                       </p>
4806
   *                       <p>
4807 8
   *                       If subject is an array, then the search and
4808 5
   *                       replace is performed with every entry of
4809 5
   *                       subject, and the return value is an array as
4810 8
   *                       well.
4811
   *                       </p>
4812
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4813
   *
4814
   * @return mixed This function returns a string or an array with the replaced values.
4815
   */
4816
  public static function str_replace($search, $replace, $subject, &$count = null)
4817
  {
4818
    return str_replace($search, $replace, $subject, $count);
4819
  }
4820
4821
  /**
4822
   * Shuffles all the characters in the string.
4823 5
   *
4824
   * @param    string $str The input string
4825 5
   *
4826
   * @return   string The shuffled string.
4827
   */
4828
  public static function str_shuffle($str)
4829 5
  {
4830
    $array = self::split($str);
4831
4832 5
    shuffle($array);
4833
4834
    return implode('', $array);
4835
  }
4836 5
4837 5
  /**
4838
   * Sort all characters according to code points.
4839
   *
4840
   * @param    string $str    A UTF-8 string.
4841
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4842
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4843
   *
4844
   * @return   string String of sorted characters
4845
   */
4846
  public static function str_sort($str, $unique = false, $desc = false)
4847
  {
4848
    $array = self::codepoints($str);
4849
4850 2
    if ($unique) {
4851
      $array = array_flip(array_flip($array));
4852 2
    }
4853 2
4854
    if ($desc) {
4855 2
      arsort($array);
4856 2
    } else {
4857 2
      asort($array);
4858
    }
4859 2
4860 2
    return self::string($array);
4861
  }
4862
4863
  /**
4864
   * Convert a string to an array.
4865
   *
4866
   * @param string $str
4867
   * @param int    $len
4868
   *
4869
   * @return array
4870 1
   */
4871
  public static function str_split($str, $len = 1)
4872 1
  {
4873
    // init
4874
    self::checkForSupport();
4875
    $len = (int)$len;
4876
4877
    if ($len < 1) {
4878
      return str_split($str, $len);
4879
    }
4880
4881
    if (self::$support['intl'] === true) {
4882
      $a = array();
4883
      $p = 0;
4884
      $l = strlen($str);
4885
      while ($p < $l) {
4886
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4887
      }
4888
    } else {
4889
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4890
      $a = $a[0];
4891
    }
4892
4893
    if ($len === 1) {
4894 2
      return $a;
4895
    }
4896
4897 2
    $arrayOutput = array();
4898
    $p = -1;
4899 2
4900
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4901
    foreach ($a as $l => $a) {
4902
      if ($l % $len) {
4903
        $arrayOutput[$p] .= $a;
4904
      } else {
4905
        $arrayOutput[++$p] = $a;
4906
      }
4907
    }
4908
4909
    return $arrayOutput;
4910
  }
4911
4912
  /**
4913
   * Convert binary into an string.
4914
   *
4915
   * @param $bin 1|0
4916
   *
4917
   * @return string
4918
   */
4919
  public static function binary_to_str($bin)
4920
  {
4921
    return pack('H*', base_convert($bin, 2, 16));
4922
  }
4923
4924
  /**
4925 8
   * Get a binary representation of a specific string.
4926
   *
4927 8
   * @param   string $str The input string.
4928 8
   *
4929
   * @return  string
4930 8
   */
4931 2
  public static function str_to_binary($str)
4932
  {
4933
    $str = (string)$str;
4934
4935 7
    $value = unpack('H*', $str);
4936
4937 7
    return base_convert($value[1], 16, 2);
4938 1
  }
4939 1
4940 1
  /**
4941
   * US-ASCII transliterations of Unicode text.
4942
   *
4943 7
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4944 1
   * Warning: you should only pass this well formed UTF-8!
4945 1
   * Be aware it works by making a copy of the input string which it appends transliterated
4946
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4947 7
   * requiring up to the same amount again as the input string
4948
   *
4949
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4950
   *
4951
   * @author <[email protected]>
4952
   *
4953
   * @param string $str     UTF-8 string to convert
4954
   * @param string $unknown Character use if character unknown. (default is ?)
4955
   *
4956
   * @return string US-ASCII string
4957
   */
4958
  public static function str_transliterate($str, $unknown = '?')
4959 7
  {
4960
    static $UTF8_TO_ASCII;
4961 7
4962 2
    $str = (string)$str;
4963
4964
    if (!isset($str[0])) {
4965
      return '';
4966 5
    }
4967
4968 5
    $str = self::clean($str);
4969
4970
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4971
    $chars = $ar[0];
4972
    foreach ($chars as &$c) {
4973
4974
      $ordC0 = ord($c[0]);
4975
4976
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4977
        continue;
4978
      }
4979
4980
      $ordC1 = ord($c[1]);
4981
4982
      // ASCII - next please
4983
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4984
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4985 66
      }
4986
4987 66
      if ($ordC0 >= 224) {
4988
        $ordC2 = ord($c[2]);
4989 66
4990 4
        if ($ordC0 <= 239) {
4991
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4992
        }
4993
4994 65
        if ($ordC0 >= 240) {
4995
          $ordC3 = ord($c[3]);
4996
4997 65
          if ($ordC0 <= 247) {
4998
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4999
          }
5000
5001 65
          if ($ordC0 >= 248) {
5002
            $ordC4 = ord($c[4]);
5003
5004 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5005 65
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
5006
            }
5007
5008
            if ($ordC0 >= 252) {
5009
              $ordC5 = ord($c[5]);
5010
5011 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5012
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
5013
              }
5014
            }
5015
          }
5016
        }
5017 1
      }
5018
5019 1
      if ($ordC0 >= 254 && $ordC0 <= 255) {
5020
        $c = $unknown;
5021
        continue;
5022
      }
5023
5024
      if (!isset($ord)) {
5025
        $c = $unknown;
5026
        continue;
5027
      }
5028
5029
      $bank = $ord >> 8;
5030
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
5031 2
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
5032
        if (file_exists($bankfile)) {
5033 2
          /** @noinspection PhpIncludeInspection */
5034
          require $bankfile;
5035
        } else {
5036
          $UTF8_TO_ASCII[$bank] = array();
5037
        }
5038
      }
5039
5040
      $newchar = $ord & 255;
5041
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
5042
        $c = $UTF8_TO_ASCII[$bank][$newchar];
5043
      } else {
5044
        $c = $unknown;
5045
      }
5046
    }
5047
5048
    return implode('', $chars);
5049
  }
5050
5051
  /**
5052
   * Counts number of words in the UTF-8 string.
5053
   *
5054
   * @param string $str    The input string.
5055
   * @param int    $format <strong>0</strong> => return a number of words<br />
5056
   *                       <strong>1</strong> => return an array of words
5057
   *                       <strong>2</strong> => return an array of words with word-offset as key
5058
   * @param string $charlist
5059
   *
5060
   * @return array|float The number of words in the string
5061
   */
5062
  public static function str_word_count($str, $format = 0, $charlist = '')
5063
  {
5064
    $charlist = self::rxClass($charlist, '\pL');
5065
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5066
5067
    $len = count($strParts);
5068
5069
    if ($format === 1) {
5070
5071
      $numberOfWords = array();
5072
      for ($i = 1; $i < $len; $i += 2) {
5073
        $numberOfWords[] = $strParts[$i];
5074
      }
5075
5076
    } elseif ($format === 2) {
5077
5078
      self::checkForSupport();
5079
5080
      $numberOfWords = array();
5081
      $offset = self::strlen($strParts[0]);
5082
      for ($i = 1; $i < $len; $i += 2) {
5083
        $numberOfWords[$offset] = $strParts[$i];
5084
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5085
      }
5086
5087
    } else {
5088
5089
      $numberOfWords = ($len - 1) / 2;
5090
5091
    }
5092
5093
    return $numberOfWords;
5094
  }
5095
5096
  /**
5097
   * Case-insensitive string comparison.
5098
   *
5099
   * @param string $str1
5100
   * @param string $str2
5101
   *
5102
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5103 11
   */
5104
  public static function strcasecmp($str1, $str2)
5105 11
  {
5106 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5107
  }
5108 11
5109 2
  /**
5110
   * String comparison.
5111
   *
5112
   * @param string $str1
5113 10
   * @param string $str2
5114 10
   *
5115
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
5116
   *              <strong>> 0</strong> if str1 is greater than str2<br />
5117
   *              <strong>0</strong> if they are equal.
5118 10
   */
5119
  public static function strcmp($str1, $str2)
5120
  {
5121
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5122 10
        \Normalizer::normalize($str1, \Normalizer::NFD),
5123
        \Normalizer::normalize($str2, \Normalizer::NFD)
5124
    );
5125
  }
5126 1
5127 1
  /**
5128 1
   * Find length of initial segment not matching mask.
5129
   *
5130 10
   * @param string $str
5131
   * @param string $charList
5132
   * @param int    $offset
5133 10
   * @param int    $length
5134 1
   *
5135 1
   * @return int|null
5136
   */
5137 10
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5138
  {
5139
    if ('' === $charList .= '') {
5140
      return null;
5141
    }
5142
5143
    if ($offset || 2147483647 !== $length) {
5144
      $str = (string)self::substr($str, $offset, $length);
5145
    } else {
5146
      $str = (string)$str;
5147
    }
5148
5149
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5150
      /** @noinspection OffsetOperationsInspection */
5151
      return self::strlen($length[1]);
5152
    } else {
5153
      return self::strlen($str);
5154
    }
5155
  }
5156
5157
  /**
5158
   * Makes a UTF-8 string from code points.
5159
   *
5160
   * @param    array $array Integer or Hexadecimal codepoints
5161
   *
5162
   * @return   string UTF-8 encoded string
5163
   */
5164
  public static function string($array)
5165
  {
5166
    return implode(
5167
        array_map(
5168
            array(
5169
                '\\voku\\helper\\UTF8',
5170
                'chr',
5171
            ),
5172
            $array
5173
        )
5174
    );
5175
  }
5176
5177
  /**
5178
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5179
   *
5180
   * @param    string $str The input string.
5181
   *
5182
   * @return   bool True if the string has BOM at the start, False otherwise.
5183
   */
5184
  public static function string_has_bom($str)
5185
  {
5186 1
    foreach (self::$bom as $bomString => $bomByteLength) {
5187
      if (0 === strpos($str, $bomString)) {
5188 1
        return true;
5189
      }
5190 1
    }
5191
5192
    return false;
5193
  }
5194
5195
  /**
5196
   * Strip HTML and PHP tags from a string.
5197
   *
5198
   * @link http://php.net/manual/en/function.strip-tags.php
5199
   *
5200 4
   * @param string $str            <p>
5201
   *                               The input string.
5202 4
   *                               </p>
5203
   * @param string $allowable_tags [optional] <p>
5204
   *                               You can use the optional second parameter to specify tags which should
5205
   *                               not be stripped.
5206
   *                               </p>
5207
   *                               <p>
5208
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5209
   *                               can not be changed with allowable_tags.
5210
   *                               </p>
5211
   *
5212
   * @return string the stripped string.
5213
   */
5214
  public static function strip_tags($str, $allowable_tags = null)
5215
  {
5216
    //clean broken utf8
5217
    $str = self::clean($str);
5218
5219
    return strip_tags($str, $allowable_tags);
5220
  }
5221
5222
  /**
5223
   * Finds position of first occurrence of a string within another, case insensitive.
5224
   *
5225
   * @link http://php.net/manual/en/function.mb-stripos.php
5226
   *
5227
   * @param string  $haystack  <p>
5228
   *                           The string from which to get the position of the first occurrence
5229
   *                           of needle
5230
   *                           </p>
5231
   * @param string  $needle    <p>
5232
   *                           The string to find in haystack
5233 1
   *                           </p>
5234
   * @param int     $offset    [optional] <p>
5235 1
   *                           The position in haystack
5236
   *                           to start searching
5237 1
   *                           </p>
5238
   * @param string  $encoding
5239
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5240
   *
5241
   * @return int Return the numeric position of the first occurrence of
5242
   * needle in the haystack
5243
   * string, or false if needle is not found.
5244
   */
5245
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5246
  {
5247
    $haystack = (string)$haystack;
5248
    $needle = (string)$needle;
5249 1
5250
    if (!isset($haystack[0], $needle[0])) {
5251 1
      return false;
5252
    }
5253
5254
    // init
5255
    self::checkForSupport();
5256
5257
    if ($cleanUtf8 === true) {
5258
      $haystack = self::clean($haystack);
5259
      $needle = self::clean($needle);
5260
    }
5261
5262
    // INFO: this is only a fallback for old versions
5263
    if ($encoding === true || $encoding === false) {
5264
      $encoding = 'UTF-8';
5265
    } else {
5266
      $encoding = self::normalizeEncoding($encoding);
5267
    }
5268
5269
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5270
  }
5271
5272
  /**
5273
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5274
   *
5275
   * @param string $str
5276 10
   * @param string $needle
5277
   * @param bool   $before_needle
5278 10
   *
5279 10
   * @return false|string
5280
   */
5281 10
  public static function stristr($str, $needle, $before_needle = false)
5282 2
  {
5283
    if ('' === $needle .= '') {
5284
      return false;
5285
    }
5286 9
5287
    // init
5288 9
    self::checkForSupport();
5289
5290
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5291
  }
5292 9
5293 9
  /**
5294
   * Get the string length, not the byte-length!
5295 9
   *
5296
   * @link     http://php.net/manual/en/function.mb-strlen.php
5297
   *
5298 1
   * @param string  $str       The string being checked for length.
5299 1
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5300 1
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5301
   *
5302 9
   * @return int the number of characters in
5303 9
   *           string str having character encoding
5304
   *           encoding. A multi-byte character is
5305
   *           counted as 1.
5306
   */
5307
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5308
  {
5309
    $str = (string)$str;
5310
5311
    if (!isset($str[0])) {
5312
      return 0;
5313
    }
5314
5315
    // INFO: this is only a fallback for old versions
5316
    if ($encoding === true || $encoding === false) {
5317
      $encoding = 'UTF-8';
5318
    } else {
5319
      $encoding = self::normalizeEncoding($encoding);
5320
    }
5321
5322
    switch ($encoding) {
5323
      case 'ASCII':
5324
      case 'CP850':
5325
        return strlen($str);
5326
    }
5327
5328
    self::checkForSupport();
5329
5330
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5331
      $str = self::clean($str);
5332
    }
5333
5334
    return \mb_strlen($str, $encoding);
5335
  }
5336
5337
  /**
5338
   * Case insensitive string comparisons using a "natural order" algorithm.
5339 6
   *
5340
   * @param string $str1
5341 6
   * @param string $str2
5342
   *
5343
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5344
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5345 6
   *             <strong>0</strong> if they are equal
5346
   */
5347
  public static function strnatcasecmp($str1, $str2)
5348
  {
5349
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5350
  }
5351
5352
  /**
5353
   * String comparisons using a "natural order" algorithm
5354
   *
5355
   * @link  http://php.net/manual/en/function.strnatcmp.php
5356
   *
5357
   * @param string $str1 <p>
5358
   *                     The first string.
5359
   *                     </p>
5360
   * @param string $str2 <p>
5361
   *                     The second string.
5362
   *                     </p>
5363
   *
5364
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5365
   * str1 is less than str2; &gt;
5366 1
   * 0 if str1 is greater than
5367
   * str2, and 0 if they are equal.
5368 1
   * @since 4.0
5369
   * @since 5.0
5370 1
   */
5371
  public static function strnatcmp($str1, $str2)
5372
  {
5373
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5374
  }
5375
5376
  /**
5377
   * Binary safe case-insensitive string comparison of the first n characters
5378
   *
5379
   * @link  http://php.net/manual/en/function.strncasecmp.php
5380
   *
5381
   * @param string $str1 <p>
5382
   *                     The first string.
5383 10
   *                     </p>
5384
   * @param string $str2 <p>
5385 10
   *                     The second string.
5386 10
   *                     </p>
5387 10
   * @param int    $len  <p>
5388
   *                     The length of strings to be used in the comparison.
5389 10
   *                     </p>
5390 1
   *
5391 1
   * @return int &lt; 0 if <i>str1</i> is less than
5392 1
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5393
   * greater than <i>str2</i>, and 0 if they are equal.
5394 10
   * @since 4.0.4
5395
   * @since 5.0
5396 10
   */
5397
  public static function strncasecmp($str1, $str2, $len)
5398 10
  {
5399 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5400 1
  }
5401
5402
  /**
5403 10
   * Binary safe string comparison of the first n characters
5404 10
   *
5405
   * @link  http://php.net/manual/en/function.strncmp.php
5406 10
   *
5407
   * @param string $str1 <p>
5408 10
   *                     The first string.
5409
   *                     </p>
5410
   * @param string $str2 <p>
5411
   *                     The second string.
5412
   *                     </p>
5413
   * @param int    $len  <p>
5414
   *                     Number of characters to use in the comparison.
5415
   *                     </p>
5416
   *
5417
   * @return int &lt; 0 if <i>str1</i> is less than
5418
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5419
   * is greater than <i>str2</i>, and 0 if they are
5420
   * equal.
5421
   * @since 4.0
5422
   * @since 5.0
5423
   */
5424 20
  public static function strncmp($str1, $str2, $len)
5425
  {
5426 20
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str1, 0, $len) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
Security Bug introduced by
It seems like self::substr($str2, 0, $len) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5427
  }
5428 20
5429 5
  /**
5430
   * Search a string for any of a set of characters
5431
   *
5432
   * @link  http://php.net/manual/en/function.strpbrk.php
5433 18
   *
5434
   * @param string $haystack  <p>
5435 18
   *                          The string where char_list is looked for.
5436
   *                          </p>
5437
   * @param string $char_list <p>
5438
   *                          This parameter is case sensitive.
5439
   *                          </p>
5440
   *
5441
   * @return string a string starting from the character found, or false if it is
5442
   * not found.
5443
   * @since 5.0
5444
   */
5445 3
  public static function strpbrk($haystack, $char_list)
5446
  {
5447 3
    $haystack = (string)$haystack;
5448
    $char_list = (string)$char_list;
5449
5450
    if (!isset($haystack[0], $char_list[0])) {
5451
      return false;
5452
    }
5453
5454
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5455
      return substr($haystack, strpos($haystack, $m[0]));
5456
    } else {
5457
      return false;
5458
    }
5459
  }
5460
5461
  /**
5462 16
   * Find position of first occurrence of string in a string.
5463
   *
5464 16
   * @link http://php.net/manual/en/function.mb-strpos.php
5465
   *
5466 16
   * @param string  $haystack     <p>
5467 4
   *                              The string being checked.
5468
   *                              </p>
5469
   * @param string  $needle       <p>
5470
   *                              The position counted from the beginning of haystack.
5471 15
   *                              </p>
5472
   * @param int     $offset       [optional] <p>
5473 15
   *                              The search offset. If it is not specified, 0 is used.
5474 15
   *                              </p>
5475
   * @param string  $encoding
5476
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5477
   *
5478
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5479
   *             If needle is not found it returns false.
5480
   */
5481
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5482
  {
5483
    $haystack = (string)$haystack;
5484
    $needle = (string)$needle;
5485
5486
    if (!isset($haystack[0], $needle[0])) {
5487
      return false;
5488
    }
5489
5490
    // init
5491
    self::checkForSupport();
5492
    $offset = (int)$offset;
5493
5494
    // iconv and mbstring do not support integer $needle
5495
5496
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5497
      $needle = self::chr($needle);
5498
    }
5499
5500
    if ($cleanUtf8 === true) {
5501
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5502
      // iconv_strpos is not tolerant to invalid characters
5503 1
5504
      $needle = self::clean((string)$needle);
5505 1
      $haystack = self::clean($haystack);
5506
    }
5507
5508 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5509
5510
      // INFO: this is only a fallback for old versions
5511
      if ($encoding === true || $encoding === false) {
5512
        $encoding = 'UTF-8';
5513
      } else {
5514
        $encoding = self::normalizeEncoding($encoding);
5515
      }
5516
5517
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5518
    }
5519
5520 1
    if (self::$support['iconv'] === true) {
5521
      // ignore invalid negative offset to keep compatility
5522
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5523
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5524
    }
5525
5526
    if ($offset > 0) {
5527
      $haystack = self::substr($haystack, $offset);
5528
    }
5529
5530 1 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5531
      $left = substr($haystack, 0, $pos);
5532
5533 1
      // negative offset not supported in PHP strpos(), ignoring
5534
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5535 1
    }
5536
5537
    return false;
5538
  }
5539
5540
  /**
5541
   * Finds the last occurrence of a character in a string within another.
5542
   *
5543
   * @link http://php.net/manual/en/function.mb-strrchr.php
5544
   *
5545
   * @param string $haystack <p>
5546
   *                         The string from which to get the last occurrence
5547
   *                         of needle
5548
   *                         </p>
5549
   * @param string $needle   <p>
5550
   *                         The string to find in haystack
5551
   *                         </p>
5552
   * @param bool   $part     [optional] <p>
5553
   *                         Determines which portion of haystack
5554
   *                         this function returns.
5555
   *                         If set to true, it returns all of haystack
5556
   *                         from the beginning to the last occurrence of needle.
5557
   *                         If set to false, it returns all of haystack
5558 39
   *                         from the last occurrence of needle to the end,
5559
   *                         </p>
5560 39
   * @param string $encoding [optional] <p>
5561
   *                         Character encoding name to use.
5562 39
   *                         If it is omitted, internal character encoding is used.
5563 9
   *                         </p>
5564
   *
5565
   * @return string the portion of haystack.
5566
   * or false if needle is not found.
5567 37
   */
5568 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5569 37
  {
5570
    self::checkForSupport();
5571
    $encoding = self::normalizeEncoding($encoding);
5572
5573 1
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5574 1
  }
5575
5576 37
  /**
5577 22
   * Reverses characters order in the string.
5578 22
   *
5579 33
   * @param    string $str The input string
5580
   *
5581
   * @return   string The string with characters in the reverse sequence
5582 37
   */
5583
  public static function strrev($str)
5584
  {
5585 37
    return implode(array_reverse(self::split($str)));
5586 1
  }
5587 1
5588
  /**
5589 37
   * Finds the last occurrence of a character in a string within another, case insensitive.
5590
   *
5591
   * @link http://php.net/manual/en/function.mb-strrichr.php
5592
   *
5593
   * @param string $haystack <p>
5594
   *                         The string from which to get the last occurrence
5595
   *                         of needle
5596
   *                         </p>
5597
   * @param string $needle   <p>
5598
   *                         The string to find in haystack
5599
   *                         </p>
5600
   * @param bool   $part     [optional] <p>
5601
   *                         Determines which portion of haystack
5602
   *                         this function returns.
5603
   *                         If set to true, it returns all of haystack
5604
   *                         from the beginning to the last occurrence of needle.
5605
   *                         If set to false, it returns all of haystack
5606
   *                         from the last occurrence of needle to the end,
5607
   *                         </p>
5608
   * @param string $encoding [optional] <p>
5609
   *                         Character encoding name to use.
5610
   *                         If it is omitted, internal character encoding is used.
5611
   *                         </p>
5612
   *
5613
   * @return string the portion of haystack.
5614
   * or false if needle is not found.
5615
   */
5616 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5617
  {
5618 1
    self::checkForSupport();
5619
    $encoding = self::normalizeEncoding($encoding);
5620 1
5621 1
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5622
  }
5623 1
5624
  /**
5625
   * Find position of last occurrence of a case-insensitive string.
5626
   *
5627
   * @param    string $haystack The string to look in
5628
   * @param    string $needle   The string to look for
5629
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5630
   *
5631
   * @return   int The position of offset
5632
   */
5633
  public static function strripos($haystack, $needle, $offset = 0)
5634
  {
5635
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5636
  }
5637
5638
  /**
5639
   * Find position of last occurrence of a string in a string.
5640
   *
5641
   * @link http://php.net/manual/en/function.mb-strrpos.php
5642
   *
5643
   * @param string     $haystack  <p>
5644
   *                              The string being checked, for the last occurrence
5645
   *                              of needle
5646
   *                              </p>
5647
   * @param string|int $needle    <p>
5648
   *                              The string to find in haystack.
5649
   *                              Or a code point as int.
5650
   *                              </p>
5651
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5652
   *                              the string. Negative values will stop searching at an arbitrary point
5653
   *                              prior to the end of the string.
5654
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5655
   *
5656
   * @return int the numeric position of
5657
   * the last occurrence of needle in the
5658
   * haystack string. If
5659
   * needle is not found, it returns false.
5660
   */
5661
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5662
  {
5663
    $haystack = (string)$haystack;
5664
5665 6
    if (((int)$needle) === $needle && ($needle >= 0)) {
5666
      $needle = self::chr($needle);
5667
    }
5668 6
5669 1
    $needle = (string)$needle;
5670
5671
    if (!isset($haystack[0], $needle[0])) {
5672 1
      return false;
5673 1
    }
5674 1
5675 1
    // init
5676
    self::checkForSupport();
5677
5678
    $needle = (string)$needle;
5679 1
    $offset = (int)$offset;
5680 1
5681 1
    if ($cleanUtf8 === true) {
5682 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5683 1
5684 1
      $needle = self::clean($needle);
5685 1
      $haystack = self::clean($haystack);
5686 1
    }
5687
5688
    if (self::$support['mbstring'] === true) {
5689
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5690 1
    }
5691 1
5692 1
    if (self::$support['iconv'] === true) {
5693 1
      return \grapheme_strrpos($haystack, $needle, $offset);
5694 1
    }
5695 1
5696 1
    // fallback
5697 1
5698
    if ($offset > 0) {
5699
      $haystack = self::substr($haystack, $offset);
5700 1
    } elseif ($offset < 0) {
5701 1
      $haystack = self::substr($haystack, 0, $offset);
5702 1
    }
5703 1
5704 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5705
      $left = substr($haystack, 0, $pos);
5706
5707 1
      // negative offset not supported in PHP strpos(), ignoring
5708
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5709 6
    }
5710 1
5711 1
    return false;
5712 1
  }
5713 1
5714
  /**
5715 1
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5716
   * mask.
5717
   *
5718 6
   * @param string $str
5719 6
   * @param string $mask
5720
   * @param int    $offset
5721 6
   * @param int    $length
5722 4
   *
5723
   * @return int|null
5724 4
   */
5725 4
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5726
  {
5727 6
    if ($offset || 2147483647 !== $length) {
5728
      $str = self::substr($str, $offset, $length);
5729 6
    }
5730
5731
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5732
  }
5733
5734
  /**
5735
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5736
   *
5737
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5738
   *
5739
   * @param string $haystack      <p>
5740 1
   *                              The input string. Must be valid UTF-8.
5741
   *                              </p>
5742 1
   * @param string $needle        <p>
5743
   *                              The string to look for. Must be valid UTF-8.
5744 1
   *                              </p>
5745 1
   * @param bool   $before_needle [optional] <p>
5746
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5747
   *                              haystack before the first occurrence of the needle (excluding the needle).
5748 1
   *                              </p>
5749
   *
5750 1
   * @return string the portion of string, or FALSE if needle is not found.
5751 1
   */
5752
  public static function strstr($haystack, $needle, $before_needle = false)
5753 1
  {
5754
    self::checkForSupport();
5755 1
5756 1
    return \grapheme_strstr($haystack, $needle, $before_needle);
5757
  }
5758 1
5759
  /**
5760 1
   * Unicode transformation for case-less matching.
5761
   *
5762 1
   * @link http://unicode.org/reports/tr21/tr21-5.html
5763
   *
5764 1
   * @param string $str
5765
   * @param bool   $full
5766
   *
5767
   * @return string
5768
   */
5769
  public static function strtocasefold($str, $full = true)
5770
  {
5771
    static $fullCaseFold = null;
5772
    static $commonCaseFoldKeys = null;
5773
    static $commonCaseFoldValues = null;
5774
5775 6
    if ($commonCaseFoldKeys === null) {
5776
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5777 6
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5778
    }
5779
5780
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5781
5782
    if ($full) {
5783
5784
      if ($fullCaseFold === null) {
5785
        $fullCaseFold = self::getData('caseFolding_full');
5786
      }
5787
5788
      /** @noinspection OffsetOperationsInspection */
5789
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5790
    }
5791
5792
    $str = self::clean($str);
5793
5794
    return self::strtolower($str);
5795
  }
5796
5797
  /**
5798
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5799
   * Make a string lowercase.
5800
   *
5801
   * @link http://php.net/manual/en/function.mb-strtolower.php
5802
   *
5803
   * @param string $str <p>
5804
   *                    The string being lowercased.
5805
   *                    </p>
5806
   * @param string $encoding
5807
   *
5808
   * @return string str with all alphabetic characters converted to lowercase.
5809
   */
5810
  public static function strtolower($str, $encoding = 'UTF-8')
5811
  {
5812 7
    $str = (string)$str;
5813
5814 7
    if (!isset($str[0])) {
5815
      return '';
5816 7
    }
5817
5818 7
    // init
5819 2
    self::checkForSupport();
5820
    $encoding = self::normalizeEncoding($encoding);
5821
5822 6
    return \mb_strtolower($str, $encoding);
5823
  }
5824 6
5825 3
  /**
5826
   * Generic case sensitive transformation for collation matching.
5827 3
   *
5828
   * @param string $s
5829 3
   *
5830
   * @return string
5831
   */
5832 3
  protected static function strtonatfold($s)
5833
  {
5834 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5835 3
  }
5836
5837
  /**
5838 3
   * Make a string uppercase.
5839 3
   *
5840 3
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5841
   *
5842
   * @param string $str <p>
5843
   *                    The string being uppercased.
5844
   *                    </p>
5845
   * @param string $encoding
5846
   *
5847
   * @return string str with all alphabetic characters converted to uppercase.
5848
   */
5849
  public static function strtoupper($str, $encoding = 'UTF-8')
5850
  {
5851
    $str = (string)$str;
5852 3
5853
    if (!isset($str[0])) {
5854 1
      return '';
5855 1
    }
5856 1
5857
    // init
5858 1
    self::checkForSupport();
5859 1
5860 1
    if (self::$support['mbstring'] === true) {
5861 1
      $encoding = self::normalizeEncoding($encoding);
5862
5863 1
      return \mb_strtoupper($str, $encoding);
5864
    } else {
5865
5866 1
      // fallback
5867
5868
      static $caseTableKeys = null;
5869 1
      static $caseTableValues = null;
5870
5871 3
      if ($caseTableKeys === null) {
5872 1
        $caseTable = self::case_table();
5873 1
        $caseTableKeys = array_keys($caseTable);
5874
        $caseTableValues = array_values($caseTable);
5875 3
      }
5876 3
5877
      $str = self::clean($str);
5878 3
5879 3
      return str_replace($caseTableKeys, $caseTableValues, $str);
5880
    }
5881 6
  }
5882
5883
  /**
5884
   * Translate characters or replace sub-strings.
5885
   *
5886
   * @link  http://php.net/manual/en/function.strtr.php
5887
   *
5888
   * @param string       $str  <p>
5889
   *                           The string being translated.
5890
   *                           </p>
5891
   * @param string|array $from <p>
5892
   *                           The string replacing from.
5893
   *                           </p>
5894
   * @param string|array $to   <p>
5895
   *                           The string being translated to to.
5896
   *                           </p>
5897
   *
5898
   * @return string This function returns a copy of str,
5899
   * translating all occurrences of each character in
5900
   * from to the corresponding character in
5901
   * to.
5902
   * @since 4.0
5903 2
   * @since 5.0
5904
   */
5905 2
  public static function strtr($str, $from, $to = INF)
5906
  {
5907
    if (INF !== $to) {
5908
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5908 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5909
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5909 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5910
      $countFrom = count($from);
5911
      $countTo = count($to);
5912
5913
      if ($countFrom > $countTo) {
5914
        $from = array_slice($from, 0, $countTo);
5915
      } elseif ($countFrom < $countTo) {
5916
        $to = array_slice($to, 0, $countFrom);
5917
      }
5918
5919
      $from = array_combine($from, $to);
5920
    }
5921
5922
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5905 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5923
  }
5924
5925
  /**
5926
   * Return the width of a string.
5927
   *
5928
   * @param string $s
5929 20
   *
5930
   * @return int
5931 20
   */
5932 2
  public static function strwidth($s)
5933
  {
5934 2
    // init
5935 2
    self::checkForSupport();
5936
5937 2
    return \mb_strwidth($s, 'UTF-8');
5938
  }
5939
5940 20
  /**
5941
   * Get part of a string.
5942 20
   *
5943 9
   * @link http://php.net/manual/en/function.mb-substr.php
5944
   *
5945
   * @param string  $str       <p>
5946 20
   *                           The string being checked.
5947
   *                           </p>
5948 20
   * @param int     $start     <p>
5949
   *                           The first position used in str.
5950 20
   *                           </p>
5951 20
   * @param int     $length    [optional] <p>
5952
   *                           The maximum length of the returned string.
5953 20
   *                           </p>
5954 20
   * @param string  $encoding
5955 20
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5956 20
   *
5957
   * @return string mb_substr returns the portion of
5958 20
   * str specified by the start and length parameters.
5959
   */
5960 18
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5961 17
  {
5962 17
    $str = (string)$str;
5963 17
5964 5
    if (!isset($str[0])) {
5965 5
      return '';
5966 5
    }
5967
5968
    // init
5969 20
    self::checkForSupport();
5970
5971 18
    if ($cleanUtf8 === true) {
5972 14
      // iconv and mbstring are not tolerant to invalid encoding
5973 14
      // further, their behaviour is inconsistent with that of PHP's substr
5974 14
5975 8
      $str = self::clean($str);
5976 8
    }
5977 8
5978
    $str_length = 0;
5979
    if ($start || $length === null) {
5980 19
      $str_length = (int)self::strlen($str);
5981
    }
5982 9
5983 3
    if ($start && $start > $str_length) {
5984 3
      return false;
5985 3
    }
5986 6
5987 6
    if ($length === null) {
5988 6
      $length = $str_length;
5989
    } else {
5990
      $length = (int)$length;
5991 9
    }
5992 6
5993 6 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5994 6
5995
      // INFO: this is only a fallback for old versions
5996
      if ($encoding === true || $encoding === false) {
5997 20
        $encoding = 'UTF-8';
5998
      } else {
5999 2
        $encoding = self::normalizeEncoding($encoding);
6000 2
      }
6001
6002
      return \mb_substr($str, $start, $length, $encoding);
6003 2
    }
6004 2
6005 2
    if (self::$support['iconv'] === true) {
6006
      return (string)\grapheme_substr($str, $start, $length);
6007
    }
6008 2
6009 18
    // fallback
6010
6011 20
    // split to array, and remove invalid characters
6012
    $array = self::split($str);
6013 20
6014
    // extract relevant part, and join to make sting again
6015
    return implode(array_slice($array, $start, $length));
6016 20
  }
6017 20
6018
  /**
6019 3
   * Binary safe comparison of two strings from an offset, up to length characters.
6020 20
   *
6021
   * @param string  $main_str           The main string being compared.
6022 20
   * @param string  $str                The secondary string being compared.
6023
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
6024
   *                                    end of the string.
6025 20
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
6026 20
   *                                    the str compared to the length of main_str less the offset.
6027 20
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
6028 2
   *
6029 20
   * @return int
6030
   */
6031 20
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6032
  {
6033 20
    $main_str = self::substr($main_str, $offset, $length);
6034
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6033 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6035
6036
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6033 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6034 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6033 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6034 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6037
  }
6038
6039
  /**
6040
   * Count the number of substring occurrences
6041
   *
6042
   * @link  http://php.net/manual/en/function.substr-count.php
6043 2
   *
6044
   * @param string $haystack <p>
6045 2
   *                         The string to search in
6046
   *                         </p>
6047 1
   * @param string $needle   <p>
6048
   *                         The substring to search for
6049 1
   *                         </p>
6050 1
   * @param int    $offset   [optional] <p>
6051
   *                         The offset where to start counting
6052 1
   *                         </p>
6053 2
   * @param int    $length   [optional] <p>
6054 2
   *                         The maximum length after the specified offset to search for the
6055
   *                         substring. It outputs a warning if the offset plus the length is
6056
   *                         greater than the haystack length.
6057
   *                         </p>
6058
   *
6059
   * @return int This functions returns an integer.
6060
   * @since 4.0
6061
   * @since 5.0
6062
   */
6063
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
6064
  {
6065
    $haystack = (string)$haystack;
6066
    $needle = (string)$needle;
6067
6068
    if (!isset($haystack[0], $needle[0])) {
6069
      return false;
6070
    }
6071
6072
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6073 26
      $offset = (int)$offset;
6074
      $length = (int)$length;
6075 26
6076
      if ($length + $offset <= 0) {
6077 26
        return false;
6078 5
      }
6079
6080
      $haystack = self::substr($haystack, $offset, $length);
6081
    }
6082 22
6083 6
    self::checkForSupport();
6084
6085
    return \mb_substr_count($haystack, $needle);
6086 16
  }
6087
6088
  /**
6089
   * Replace text within a portion of a string.
6090
   *
6091
   * source: https://gist.github.com/stemar/8287074
6092
   *
6093
   * @param string|array   $str
6094
   * @param string|array   $replacement
6095
   * @param int|array      $start
6096 14
   * @param null|int|array $length
6097
   *
6098 14
   * @return array|string
6099
   */
6100
  public static function substr_replace($str, $replacement, $start, $length = null)
6101
  {
6102
    if (is_array($str)) {
6103
      $num = count($str);
6104
6105
      // $replacement
6106
      if (is_array($replacement)) {
6107
        $replacement = array_slice($replacement, 0, $num);
6108
      } else {
6109
        $replacement = array_pad(array($replacement), $num, $replacement);
6110
      }
6111
6112
      // $start
6113 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6114
        $start = array_slice($start, 0, $num);
6115
        foreach ($start as &$valueTmp) {
6116
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6117
        }
6118
        unset($valueTmp);
6119
      } else {
6120
        $start = array_pad(array($start), $num, $start);
6121 8
      }
6122
6123 8
      // $length
6124 2
      if (!isset($length)) {
6125
        $length = array_fill(0, $num, 0);
6126 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6127
        $length = array_slice($length, 0, $num);
6128 7
        foreach ($length as &$valueTmpV2) {
6129 7
          if (isset($valueTmpV2)) {
6130
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6131 7
          } else {
6132 1
            $valueTmpV2 = 0;
6133 1
          }
6134 7
        }
6135
        unset($valueTmpV2);
6136
      } else {
6137 7
        $length = array_pad(array($length), $num, $length);
6138
      }
6139 7
6140
      // Recursive call
6141
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6142
    } else {
6143 1
      if (is_array($replacement)) {
6144 1
        if (count($replacement) > 0) {
6145 1
          $replacement = $replacement[0];
6146 7
        } else {
6147 7
          $replacement = '';
6148 7
        }
6149 7
      }
6150 7
    }
6151
6152 7
    preg_match_all('/./us', (string)$str, $smatches);
6153
    preg_match_all('/./us', (string)$replacement, $rmatches);
6154
6155
    if ($length === null) {
6156
      self::checkForSupport();
6157
6158
      $length = \mb_strlen($str);
6159
    }
6160
6161
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6162
6163
    return implode($smatches[0], null);
6164
  }
6165
6166
  /**
6167
   * Returns a case swapped version of the string.
6168
   *
6169
   * @param string $str
6170
   * @param string $encoding
6171
   *
6172 1
   * @return string each character's case swapped
6173
   */
6174 1
  public static function swapCase($str, $encoding = 'UTF-8')
6175
  {
6176 1
    $str = (string)$str;
6177 1
6178
    if (!isset($str[0])) {
6179
      return '';
6180 1
    }
6181
6182 1
    $encoding = self::normalizeEncoding($encoding);
6183
    $str = self::clean($str);
6184 1
6185 1
    $strSwappedCase = preg_replace_callback(
6186 1
        '/[\S]/u',
6187 1
        function ($match) use ($encoding) {
6188
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6189 1
6190 1
          if ($match[0] === $marchToUpper) {
6191 1
            return UTF8::strtolower($match[0], $encoding);
6192
          } else {
6193 1
            return $marchToUpper;
6194
          }
6195
        },
6196
        $str
6197
    );
6198
6199
    return $strSwappedCase;
6200
  }
6201
6202
  /**
6203
   * alias for "UTF8::to_ascii()"
6204
   *
6205
   * @param string $s The input string e.g. a UTF-8 String
6206
   * @param string $subst_chr
6207
   *
6208
   * @return string
6209
   */
6210
  public static function toAscii($s, $subst_chr = '?')
6211
  {
6212
    return self::to_ascii($s, $subst_chr);
6213
  }
6214
6215
  /**
6216
   * alias for "UTF8::to_latin1()"
6217
   *
6218
   * @param $str
6219
   *
6220
   * @return string
6221
   */
6222
  public static function toLatin1($str)
6223
  {
6224
    return self::to_latin1($str);
6225
  }
6226
6227
  /**
6228
   * alias for "UTF8::to_utf8"
6229
   *
6230
   * @param string $str
6231
   *
6232
   * @return string
6233
   */
6234
  public static function toUTF8($str)
6235
  {
6236
    return self::to_utf8($str);
6237
  }
6238
6239
  /**
6240
   * convert to ASCII
6241
   *
6242
   * @param string $s The input string e.g. a UTF-8 String
6243
   * @param string $subst_chr
6244
   *
6245
   * @return string
6246
   */
6247
  public static function to_ascii($s, $subst_chr = '?')
6248
  {
6249
    static $translitExtra = null;
6250
6251
    $s = (string)$s;
6252
6253
    if (!isset($s[0])) {
6254
      return '';
6255
    }
6256
6257
    $s = self::clean($s);
6258
6259
    if (preg_match("/[\x80-\xFF]/", $s)) {
6260
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6261
6262
      $glibc = 'glibc' === ICONV_IMPL;
6263
6264
      preg_match_all('/./u', $s, $s);
6265
6266
      /** @noinspection AlterInForeachInspection */
6267
      foreach ($s[0] as &$c) {
6268
6269
        if (!isset($c[1])) {
6270
          continue;
6271
        }
6272
6273
        if ($glibc) {
6274
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6275
        } else {
6276
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6277
6278
          if ($t !== false && is_string($t)) {
6279
            if (!isset($t[0])) {
6280
              $t = '?';
6281
            } elseif (isset($t[1])) {
6282
              $t = ltrim($t, '\'`"^~');
6283
            }
6284
          }
6285
        }
6286
6287
        if ('?' === $t) {
6288
6289
          if ($translitExtra === null) {
6290
            $translitExtra = (array)self::getData('translit_extra');
6291
          }
6292
6293
          if (isset($translitExtra[$c])) {
6294
            $t = $translitExtra[$c];
6295
          } else {
6296
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6297
6298
            if ($t[0] < "\x80") {
6299
              $t = $t[0];
6300
            } else {
6301
              $t = $subst_chr;
6302
            }
6303
          }
6304
        }
6305
6306
        if ('?' === $t) {
6307
          $t = self::str_transliterate($c, $subst_chr);
6308
        }
6309
6310
        $c = $t;
6311
      }
6312
6313
      $s = implode('', $s[0]);
6314
    }
6315
6316
    return $s;
6317
  }
6318
6319
  /**
6320
   * alias for "UTF8::to_win1252()"
6321
   *
6322
   * @param   string $str
6323
   *
6324
   * @return  array|string
6325
   */
6326
  public static function to_iso8859($str)
6327
  {
6328
    return self::to_win1252($str);
6329
  }
6330
6331
  /**
6332
   * alias for "UTF8::to_win1252()"
6333
   *
6334
   * @param string|array $str
6335
   *
6336
   * @return string|array
6337
   */
6338
  public static function to_latin1($str)
6339
  {
6340
    return self::to_win1252($str);
6341
  }
6342
6343
  /**
6344
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6345
   *
6346
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6347
   *
6348
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6349
   *
6350
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6351
   *    are followed by any of these:  ("group B")
6352
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6353
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6354
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6355
   * is also a valid unicode character, and will be left unchanged.
6356
   *
6357
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6358
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6359
   *
6360
   * @param string|array $str Any string or array.
6361
   *
6362
   * @return string The same string, but UTF8 encoded.
6363
   */
6364
  public static function to_utf8($str)
6365
  {
6366
    if (is_array($str)) {
6367
      foreach ($str as $k => $v) {
6368
        /** @noinspection AlterInForeachInspection */
6369
        $str[$k] = self::to_utf8($v);
6370
      }
6371
6372
      return $str;
6373
    }
6374
6375
    $str = (string)$str;
6376
6377
    if (!isset($str[0])) {
6378
      return $str;
6379
    }
6380
6381
    $max = strlen($str);
6382
    $buf = '';
6383
6384
    /** @noinspection ForeachInvariantsInspection */
6385
    for ($i = 0; $i < $max; $i++) {
6386
      $c1 = $str[$i];
6387
6388
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6389
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6390
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6391
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6392
6393
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6394
6395
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6396
            $buf .= $c1 . $c2;
6397
            $i++;
6398
          } else { // not valid UTF8 - convert it
6399
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6400
            $cc2 = ($c1 & "\x3f") | "\x80";
6401
            $buf .= $cc1 . $cc2;
6402
          }
6403
6404 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6405
6406
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6407
            $buf .= $c1 . $c2 . $c3;
6408
            $i += 2;
6409
          } else { // not valid UTF8 - convert it
6410
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6411
            $cc2 = ($c1 & "\x3f") | "\x80";
6412
            $buf .= $cc1 . $cc2;
6413
          }
6414
6415
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6416
6417 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6418
            $buf .= $c1 . $c2 . $c3 . $c4;
6419
            $i += 3;
6420
          } else { // not valid UTF8 - convert it
6421
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6422
            $cc2 = ($c1 & "\x3f") | "\x80";
6423
            $buf .= $cc1 . $cc2;
6424
          }
6425
6426
        } else { // doesn't look like UTF8, but should be converted
6427
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6428
          $cc2 = (($c1 & "\x3f") | "\x80");
6429
          $buf .= $cc1 . $cc2;
6430
        }
6431
6432
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6433
6434
        $ordC1 = ord($c1);
6435
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6436
          $buf .= self::$win1252ToUtf8[$ordC1];
6437
        } else {
6438
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6439
          $cc2 = (($c1 & "\x3f") | "\x80");
6440 6
          $buf .= $cc1 . $cc2;
6441
        }
6442 6
6443 6
      } else { // it doesn't need conversion
6444
        $buf .= $c1;
6445 6
      }
6446
    }
6447 6
6448 5
    self::checkForSupport();
6449
6450
    // decode unicode escape sequences
6451
    $buf = preg_replace_callback(
6452 6
        '/\\\\u([0-9a-f]{4})/i',
6453
        function ($match) {
6454 6
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6455
        },
6456 6
        $buf
6457 1
    );
6458 1
6459 1
    // decode UTF-8 codepoints
6460
    $buf = preg_replace_callback(
6461 6
        '/&#\d{2,4};/',
6462
        function ($match) {
6463
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6464
        },
6465
        $buf
6466
    );
6467
6468
    return $buf;
6469
  }
6470
6471 6
  /**
6472
   * Convert a string into "win1252"-encoding.
6473 6
   *
6474
   * @param  string|array $str
6475 6
   *
6476 6
   * @return string|array
6477
   */
6478
  protected static function to_win1252($str)
6479 5
  {
6480 5
    if (is_array($str)) {
6481
6482 5
      foreach ($str as $k => $v) {
6483 1
        /** @noinspection AlterInForeachInspection */
6484 1
        $str[$k] = self::to_win1252($v);
6485 1
      }
6486
6487 5
      return $str;
6488
    }
6489
6490
    $str = (string)$str;
6491
6492
    if (!isset($str[0])) {
6493
      return '';
6494
    }
6495
6496
    return self::utf8_decode($str);
6497
  }
6498
6499
  /**
6500
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6501
   *
6502
   * INFO: This is slower then "trim()"
6503
   *
6504
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6505
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6506
   *
6507
   * @param    string $str   The string to be trimmed
6508
   * @param    string $chars Optional characters to be stripped
6509
   *
6510
   * @return   string The trimmed string
6511
   */
6512
  public static function trim($str = '', $chars = INF)
6513
  {
6514
    $str = (string)$str;
6515
6516
    if (!isset($str[0])) {
6517
      return '';
6518
    }
6519 1
6520
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6521 1
    if ($chars === INF || !$chars) {
6522
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6523
    }
6524
6525
    return self::rtrim(self::ltrim($str, $chars), $chars);
6526
  }
6527
6528
  /**
6529
   * Makes string's first char uppercase.
6530
   *
6531
   * @param    string $str The input string
6532
   *
6533 1
   * @return   string The resulting string
6534
   */
6535 1
  public static function ucfirst($str)
6536
  {
6537
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6538
  }
6539 1
6540
  /**
6541 1
   * alias for "UTF8::ucfirst"
6542
   *
6543
   * @param $str
6544 1
   *
6545 1
   * @return string
6546 1
   */
6547 1
  public static function ucword($str)
6548 1
  {
6549
    return self::ucfirst($str);
6550
  }
6551 1
6552
  /**
6553
   * Uppercase for all words in the string.
6554
   *
6555
   * @param  string $str
6556
   * @param array   $exceptions
6557
   *
6558
   * @return string
6559
   */
6560
  public static function ucwords($str, $exceptions = array())
6561
  {
6562
    if (!$str) {
6563
      return '';
6564 4
    }
6565
6566 4
    // init
6567
    $words = explode(' ', $str);
6568
    $newwords = array();
6569
6570 4
    if (count($exceptions) > 0) {
6571 4
      $useExceptions = true;
6572 4
    } else {
6573
      $useExceptions = false;
6574 4
    }
6575 4
6576 4
    foreach ($words as $word) {
6577 4
      if (
6578
          ($useExceptions === false)
6579 4
          ||
6580
          (
6581
              $useExceptions === true
6582
              &&
6583
              !in_array($word, $exceptions, true)
6584 4
          )
6585
      ) {
6586 4
        $word = self::ucfirst($word);
6587
      }
6588
      $newwords[] = $word;
6589
    }
6590
6591 4
    return self::ucfirst(implode(' ', $newwords));
6592 4
  }
6593
6594 4
  /**
6595 4
   * Multi decode html entity & fix urlencoded-win1252-chars.
6596 4
   *
6597 4
   * e.g:
6598 4
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6599
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6600 4
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6601 4
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6602 4
   * 'Düsseldorf'                   => 'Düsseldorf'
6603 4
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6604
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6605 4
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6606 3
   *
6607 3
   * @param string $str
6608 3
   *
6609 3
   * @return string
6610
   */
6611 3
  public static function urldecode($str)
6612
  {
6613
    $str = (string)$str;
6614
6615 3
    if (!isset($str[0])) {
6616 3
      return '';
6617
    }
6618 4
6619
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6620
6621
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6622
6623
    $str = self::fix_simple_utf8(
6624
        rawurldecode(
6625
            self::html_entity_decode(
6626
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6627
                $flags
6628
            )
6629
        )
6630
    );
6631
6632
    return (string)$str;
6633
  }
6634
6635
  /**
6636
   * Return a array with "urlencoded"-win1252 -> UTF-8
6637
   *
6638
   * @return mixed
6639
   */
6640
  public static function urldecode_fix_win1252_chars()
6641
  {
6642
    static $array = array(
6643
        '%20' => ' ',
6644
        '%21' => '!',
6645
        '%22' => '"',
6646
        '%23' => '#',
6647
        '%24' => '$',
6648
        '%25' => '%',
6649
        '%26' => '&',
6650
        '%27' => "'",
6651
        '%28' => '(',
6652
        '%29' => ')',
6653
        '%2A' => '*',
6654
        '%2B' => '+',
6655
        '%2C' => ',',
6656
        '%2D' => '-',
6657
        '%2E' => '.',
6658
        '%2F' => '/',
6659
        '%30' => '0',
6660
        '%31' => '1',
6661
        '%32' => '2',
6662
        '%33' => '3',
6663
        '%34' => '4',
6664
        '%35' => '5',
6665
        '%36' => '6',
6666
        '%37' => '7',
6667
        '%38' => '8',
6668
        '%39' => '9',
6669
        '%3A' => ':',
6670
        '%3B' => ';',
6671
        '%3C' => '<',
6672
        '%3D' => '=',
6673
        '%3E' => '>',
6674
        '%3F' => '?',
6675
        '%40' => '@',
6676
        '%41' => 'A',
6677
        '%42' => 'B',
6678
        '%43' => 'C',
6679
        '%44' => 'D',
6680
        '%45' => 'E',
6681
        '%46' => 'F',
6682
        '%47' => 'G',
6683
        '%48' => 'H',
6684
        '%49' => 'I',
6685
        '%4A' => 'J',
6686
        '%4B' => 'K',
6687
        '%4C' => 'L',
6688
        '%4D' => 'M',
6689
        '%4E' => 'N',
6690
        '%4F' => 'O',
6691
        '%50' => 'P',
6692
        '%51' => 'Q',
6693
        '%52' => 'R',
6694
        '%53' => 'S',
6695
        '%54' => 'T',
6696
        '%55' => 'U',
6697
        '%56' => 'V',
6698
        '%57' => 'W',
6699
        '%58' => 'X',
6700
        '%59' => 'Y',
6701
        '%5A' => 'Z',
6702
        '%5B' => '[',
6703
        '%5C' => '\\',
6704
        '%5D' => ']',
6705
        '%5E' => '^',
6706
        '%5F' => '_',
6707
        '%60' => '`',
6708
        '%61' => 'a',
6709
        '%62' => 'b',
6710
        '%63' => 'c',
6711
        '%64' => 'd',
6712
        '%65' => 'e',
6713
        '%66' => 'f',
6714
        '%67' => 'g',
6715
        '%68' => 'h',
6716
        '%69' => 'i',
6717
        '%6A' => 'j',
6718
        '%6B' => 'k',
6719
        '%6C' => 'l',
6720
        '%6D' => 'm',
6721
        '%6E' => 'n',
6722
        '%6F' => 'o',
6723
        '%70' => 'p',
6724
        '%71' => 'q',
6725
        '%72' => 'r',
6726
        '%73' => 's',
6727
        '%74' => 't',
6728
        '%75' => 'u',
6729
        '%76' => 'v',
6730
        '%77' => 'w',
6731
        '%78' => 'x',
6732
        '%79' => 'y',
6733
        '%7A' => 'z',
6734
        '%7B' => '{',
6735
        '%7C' => '|',
6736
        '%7D' => '}',
6737
        '%7E' => '~',
6738
        '%7F' => '',
6739
        '%80' => '`',
6740
        '%81' => '',
6741
        '%82' => '‚',
6742
        '%83' => 'ƒ',
6743
        '%84' => '„',
6744
        '%85' => '…',
6745
        '%86' => '†',
6746
        '%87' => '‡',
6747
        '%88' => 'ˆ',
6748
        '%89' => '‰',
6749
        '%8A' => 'Š',
6750
        '%8B' => '‹',
6751
        '%8C' => 'Œ',
6752
        '%8D' => '',
6753
        '%8E' => 'Ž',
6754
        '%8F' => '',
6755
        '%90' => '',
6756
        '%91' => '‘',
6757
        '%92' => '’',
6758
        '%93' => '“',
6759
        '%94' => '”',
6760
        '%95' => '•',
6761
        '%96' => '–',
6762
        '%97' => '—',
6763
        '%98' => '˜',
6764
        '%99' => '™',
6765
        '%9A' => 'š',
6766
        '%9B' => '›',
6767
        '%9C' => 'œ',
6768
        '%9D' => '',
6769
        '%9E' => 'ž',
6770
        '%9F' => 'Ÿ',
6771
        '%A0' => '',
6772
        '%A1' => '¡',
6773
        '%A2' => '¢',
6774
        '%A3' => '£',
6775
        '%A4' => '¤',
6776
        '%A5' => '¥',
6777
        '%A6' => '¦',
6778
        '%A7' => '§',
6779
        '%A8' => '¨',
6780
        '%A9' => '©',
6781
        '%AA' => 'ª',
6782
        '%AB' => '«',
6783
        '%AC' => '¬',
6784
        '%AD' => '',
6785
        '%AE' => '®',
6786
        '%AF' => '¯',
6787
        '%B0' => '°',
6788
        '%B1' => '±',
6789
        '%B2' => '²',
6790
        '%B3' => '³',
6791
        '%B4' => '´',
6792
        '%B5' => 'µ',
6793
        '%B6' => '¶',
6794
        '%B7' => '·',
6795
        '%B8' => '¸',
6796
        '%B9' => '¹',
6797
        '%BA' => 'º',
6798
        '%BB' => '»',
6799
        '%BC' => '¼',
6800
        '%BD' => '½',
6801
        '%BE' => '¾',
6802
        '%BF' => '¿',
6803
        '%C0' => 'À',
6804
        '%C1' => 'Á',
6805
        '%C2' => 'Â',
6806
        '%C3' => 'Ã',
6807
        '%C4' => 'Ä',
6808
        '%C5' => 'Å',
6809
        '%C6' => 'Æ',
6810
        '%C7' => 'Ç',
6811
        '%C8' => 'È',
6812
        '%C9' => 'É',
6813
        '%CA' => 'Ê',
6814
        '%CB' => 'Ë',
6815
        '%CC' => 'Ì',
6816
        '%CD' => 'Í',
6817
        '%CE' => 'Î',
6818
        '%CF' => 'Ï',
6819
        '%D0' => 'Ð',
6820
        '%D1' => 'Ñ',
6821
        '%D2' => 'Ò',
6822
        '%D3' => 'Ó',
6823
        '%D4' => 'Ô',
6824
        '%D5' => 'Õ',
6825
        '%D6' => 'Ö',
6826
        '%D7' => '×',
6827
        '%D8' => 'Ø',
6828
        '%D9' => 'Ù',
6829
        '%DA' => 'Ú',
6830
        '%DB' => 'Û',
6831
        '%DC' => 'Ü',
6832
        '%DD' => 'Ý',
6833
        '%DE' => 'Þ',
6834
        '%DF' => 'ß',
6835
        '%E0' => 'à',
6836
        '%E1' => 'á',
6837
        '%E2' => 'â',
6838
        '%E3' => 'ã',
6839
        '%E4' => 'ä',
6840
        '%E5' => 'å',
6841
        '%E6' => 'æ',
6842
        '%E7' => 'ç',
6843
        '%E8' => 'è',
6844
        '%E9' => 'é',
6845
        '%EA' => 'ê',
6846
        '%EB' => 'ë',
6847
        '%EC' => 'ì',
6848
        '%ED' => 'í',
6849
        '%EE' => 'î',
6850
        '%EF' => 'ï',
6851
        '%F0' => 'ð',
6852
        '%F1' => 'ñ',
6853
        '%F2' => 'ò',
6854
        '%F3' => 'ó',
6855
        '%F4' => 'ô',
6856
        '%F5' => 'õ',
6857
        '%F6' => 'ö',
6858
        '%F7' => '÷',
6859
        '%F8' => 'ø',
6860
        '%F9' => 'ù',
6861
        '%FA' => 'ú',
6862
        '%FB' => 'û',
6863
        '%FC' => 'ü',
6864
        '%FD' => 'ý',
6865
        '%FE' => 'þ',
6866
        '%FF' => 'ÿ',
6867
    );
6868
6869
    return $array;
6870
  }
6871
6872
  /**
6873
   * Decodes an UTF-8 string to ISO-8859-1.
6874
   *
6875
   * @param string $str
6876
   *
6877
   * @return string
6878
   */
6879
  public static function utf8_decode($str)
6880
  {
6881
    static $utf8ToWin1252Keys = null;
6882
    static $utf8ToWin1252Values = null;
6883
6884
    $str = (string)$str;
6885
6886
    if (!isset($str[0])) {
6887
      return '';
6888
    }
6889
6890
    // init
6891
    self::checkForSupport();
6892
6893
    $str = self::to_utf8($str);
6894
6895
    if ($utf8ToWin1252Keys === null) {
6896
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6897
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6898
    }
6899
6900
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6901
  }
6902
6903
  /**
6904
   * Encodes an ISO-8859-1 string to UTF-8.
6905
   *
6906
   * @param string $str
6907
   *
6908
   * @return string
6909
   */
6910
  public static function utf8_encode($str)
6911
  {
6912
    $str = \utf8_encode($str);
6913
6914
    if (false === strpos($str, "\xC2")) {
6915
      return $str;
6916
    } else {
6917
6918
      static $cp1252ToUtf8Keys = null;
6919
      static $cp1252ToUtf8Values = null;
6920
6921
      if ($cp1252ToUtf8Keys === null) {
6922
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6923
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6924
      }
6925
6926
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6927
    }
6928
  }
6929
6930
  /**
6931
   * fix -> utf8-win1252 chars
6932
   *
6933
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6934
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6935
   * See: http://en.wikipedia.org/wiki/Windows-1252
6936
   *
6937
   * @deprecated use "UTF8::fix_simple_utf8()"
6938
   *
6939
   * @param   string $str
6940
   *
6941
   * @return  string
6942
   */
6943
  public static function utf8_fix_win1252_chars($str)
6944
  {
6945
    return self::fix_simple_utf8($str);
6946
  }
6947
6948
  /**
6949
   * Returns an array with all utf8 whitespace characters.
6950
   *
6951
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6952
   *
6953
   * @author: Derek E. [email protected]
6954
   *
6955
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6956
   *         as defined in above URL
6957
   */
6958
  public static function whitespace_table()
6959
  {
6960
    return self::$whitespaceTable;
6961
  }
6962
6963
  /**
6964
   * Limit the number of words in a string.
6965
   *
6966
   * @param  string $str
6967
   * @param  int    $words
6968
   * @param  string $strAddOn
6969
   *
6970
   * @return string
6971
   */
6972
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6973
  {
6974
    $str = (string)$str;
6975
6976
    if (!isset($str[0])) {
6977
      return '';
6978
    }
6979
6980
    $words = (int)$words;
6981
6982
    if ($words < 1) {
6983
      return '';
6984
    }
6985
6986
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6987
6988
    if (
6989
        !isset($matches[0])
6990
        ||
6991
        self::strlen($str) === self::strlen($matches[0])
6992
    ) {
6993
      return $str;
6994
    }
6995
6996
    return self::rtrim($matches[0]) . $strAddOn;
6997
  }
6998
6999
  /**
7000
   * Wraps a string to a given number of characters
7001
   *
7002
   * @link  http://php.net/manual/en/function.wordwrap.php
7003
   *
7004
   * @param string $str   <p>
7005
   *                      The input string.
7006
   *                      </p>
7007
   * @param int    $width [optional] <p>
7008
   *                      The column width.
7009
   *                      </p>
7010
   * @param string $break [optional] <p>
7011
   *                      The line is broken using the optional
7012
   *                      break parameter.
7013
   *                      </p>
7014
   * @param bool   $cut   [optional] <p>
7015
   *                      If the cut is set to true, the string is
7016
   *                      always wrapped at or before the specified width. So if you have
7017
   *                      a word that is larger than the given width, it is broken apart.
7018
   *                      (See second example).
7019
   *                      </p>
7020
   *
7021
   * @return string the given string wrapped at the specified column.
7022
   * @since 4.0.2
7023
   * @since 5.0
7024
   */
7025
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7026
  {
7027
    $str = (string)$str;
7028
    $break = (string)$break;
7029
7030
    if (!isset($str[0], $break[0])) {
7031
      return '';
7032
    }
7033
7034
    $w = '';
7035
    $strSplit = explode($break, $str);
7036
    $count = count($strSplit);
7037
7038
    if (1 === $count && '' === $strSplit[0]) {
7039
      return '';
7040
    }
7041
7042
    $chars = array();
7043
    /** @noinspection ForeachInvariantsInspection */
7044
    for ($i = 0; $i < $count; ++$i) {
7045
7046
      if ($i) {
7047
        $chars[] = $break;
7048
        $w .= '#';
7049
      }
7050
7051
      $c = $strSplit[$i];
7052
      unset($strSplit[$i]);
7053
7054
      foreach (self::split($c) as $c) {
7055
        $chars[] = $c;
7056
        $w .= ' ' === $c ? ' ' : '?';
7057
      }
7058
    }
7059
7060
    $strReturn = '';
7061
    $j = 0;
7062
    $b = $i = -1;
7063
    $w = wordwrap($w, $width, '#', $cut);
7064
7065
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7066
      for (++$i; $i < $b; ++$i) {
7067
        $strReturn .= $chars[$j];
7068
        unset($chars[$j++]);
7069
      }
7070
7071
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7072
        unset($chars[$j++]);
7073
      }
7074
7075
      $strReturn .= $break;
7076
    }
7077
7078
    return $strReturn . implode('', $chars);
7079
  }
7080
7081
  /**
7082
   * Returns an array of Unicode White Space characters.
7083
   *
7084
   * @return   array An array with numeric code point as key and White Space Character as value.
7085
   */
7086
  public static function ws()
7087
  {
7088
    return self::$whitespace;
7089
  }
7090
7091
}
7092