Completed
Push — master ( d5d534...316bdb )
by Lars
04:11
created

UTF8::toUTF8()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 3
Bugs 0 Features 1
Metric Value
c 3
b 0
f 1
dl 0
loc 4
ccs 3
cts 3
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  private static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  private static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  private static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  private static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  private static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  private static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  private static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  private static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792
      'ISO-IR-230',
793
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809 1
    self::checkForSupport();
810 1
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820 2
  public static function access($str, $pos)
821
  {
822 2
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833
   */
834
  public static function add_bom_to_string($str)
835
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Convert binary into an string.
845
   *
846
   * @param mixed $bin 1|0
847
   *
848
   * @return string
849
   */
850 1
  public static function binary_to_str($bin)
851
  {
852 1
    return pack('H*', base_convert($bin, 2, 16));
853
  }
854
855
  /**
856
   * Returns the UTF-8 Byte Order Mark Character.
857
   *
858
   * @return string UTF-8 Byte Order Mark
859
   */
860 1
  public static function bom()
861
  {
862 1
    return "\xEF\xBB\xBF";
863
  }
864
865
  /**
866
   * @alias of UTF8::chr_map()
867
   * @see   UTF8::chr_map()
868
   *
869
   * @param string|array $callback
870
   * @param string       $str
871
   *
872
   * @return array
873
   */
874 1
  public static function callback($callback, $str)
875
  {
876 1
    return self::chr_map($callback, $str);
877
  }
878
879
  /**
880
   * Returns an array of all lower and upper case UTF-8 encoded characters.
881
   *
882
   * @return   string An array with lower case chars as keys and upper chars as values.
883
   */
884
  private static function case_table()
885
  {
886
    static $case = array(
887
888
      // lower => upper
889
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
890
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
891
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
892
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
893
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
894
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
895
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
896
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
897
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
898
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
899
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
900
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
901
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
902
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
903
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
904
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
905
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
906
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
907
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
908
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
909
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
910
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
911
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
912
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
913
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
914
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
915
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
916
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
917
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
918
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
919
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
920
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
921
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
922
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
923
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
924
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
925
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
926
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
927
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
928
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
929
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
930
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
931
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
932
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
933
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
934
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
935
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
936
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
937
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
938
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
939
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
940
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
941
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
942
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
943
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
944
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
945
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
946
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
947
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
948
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
949
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
950
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
951
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
952
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
953
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
954
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
955
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
956
      "\xea\x9e\x87"     => "\xea\x9e\x86",
957
      "\xea\x9e\x85"     => "\xea\x9e\x84",
958
      "\xea\x9e\x83"     => "\xea\x9e\x82",
959
      "\xea\x9e\x81"     => "\xea\x9e\x80",
960
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
961
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
962
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
963
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
964
      "\xea\x9d\xad"     => "\xea\x9d\xac",
965
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
966
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
967
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
968
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
969
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
970
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
971
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
972
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
973
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
974
      "\xea\x9d\x99"     => "\xea\x9d\x98",
975
      "\xea\x9d\x97"     => "\xea\x9d\x96",
976
      "\xea\x9d\x95"     => "\xea\x9d\x94",
977
      "\xea\x9d\x93"     => "\xea\x9d\x92",
978
      "\xea\x9d\x91"     => "\xea\x9d\x90",
979
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
980
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
981
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
982
      "\xea\x9d\x89"     => "\xea\x9d\x88",
983
      "\xea\x9d\x87"     => "\xea\x9d\x86",
984
      "\xea\x9d\x85"     => "\xea\x9d\x84",
985
      "\xea\x9d\x83"     => "\xea\x9d\x82",
986
      "\xea\x9d\x81"     => "\xea\x9d\x80",
987
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
988
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
989
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
990
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
991
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
992
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
993
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
994
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
995
      "\xea\x9c\xad"     => "\xea\x9c\xac",
996
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
997
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
998
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
999
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
1000
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
1001
      "\xea\x9a\x97"     => "\xea\x9a\x96",
1002
      "\xea\x9a\x95"     => "\xea\x9a\x94",
1003
      "\xea\x9a\x93"     => "\xea\x9a\x92",
1004
      "\xea\x9a\x91"     => "\xea\x9a\x90",
1005
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
1006
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
1007
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
1008
      "\xea\x9a\x89"     => "\xea\x9a\x88",
1009
      "\xea\x9a\x87"     => "\xea\x9a\x86",
1010
      "\xea\x9a\x85"     => "\xea\x9a\x84",
1011
      "\xea\x9a\x83"     => "\xea\x9a\x82",
1012
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1013
      "\xea\x99\xad"     => "\xea\x99\xac",
1014
      "\xea\x99\xab"     => "\xea\x99\xaa",
1015
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1016
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1017
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1018
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1019
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1020
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1021
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1022
      "\xea\x99\x99"     => "\xea\x99\x98",
1023
      "\xea\x99\x97"     => "\xea\x99\x96",
1024
      "\xea\x99\x95"     => "\xea\x99\x94",
1025
      "\xea\x99\x93"     => "\xea\x99\x92",
1026
      "\xea\x99\x91"     => "\xea\x99\x90",
1027
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1028
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1029
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1030
      "\xea\x99\x89"     => "\xea\x99\x88",
1031
      "\xea\x99\x87"     => "\xea\x99\x86",
1032
      "\xea\x99\x85"     => "\xea\x99\x84",
1033
      "\xea\x99\x83"     => "\xea\x99\x82",
1034
      "\xea\x99\x81"     => "\xea\x99\x80",
1035
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1036
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1037
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1038
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1039
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1040
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1041
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1042
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1043
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1044
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1045
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1046
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1047
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1048
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1049
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1050
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1051
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1052
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1053
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1054
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1055
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1056
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1057
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1058
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1059
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1060
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1061
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1062
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1063
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1064
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1065
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1066
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1067
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1068
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1069
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1070
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1071
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1072
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1073
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1074
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1075
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1076
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1077
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1078
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1079
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1080
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1081
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1082
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1083
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1084
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1085
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1086
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1087
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1088
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1089
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1090
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1091
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1092
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1093
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1094
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1095
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1096
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1097
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1098
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1099
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1100
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1101
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1102
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1103
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1104
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1105
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1106
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1107
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1108
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1109
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1110
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1111
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1112
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1113
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1114
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1115
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1116
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1117
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1118
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1119
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1120
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1121
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1122
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1123
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1124
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1125
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1126
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1127
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1128
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1129
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1130
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1131
      "\xe2\xb1\xa5"     => "\xc8\xba",
1132
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1133
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1134
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1135
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1136
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1137
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1138
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1139
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1140
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1141
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1142
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1143
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1144
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1145
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1146
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1147
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1148
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1149
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1150
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1151
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1152
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1153
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1154
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1155
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1156
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1157
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1158
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1159
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1160
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1161
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1162
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1163
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1164
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1165
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1166
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1167
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1168
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1169
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1170
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1171
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1172
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1173
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1174
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1175
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1176
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1177
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1178
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1179
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1180
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1181
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1182
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1183
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1184
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1185
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1186
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1187
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1188
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1189
      "\xe1\xbe\xbe"     => "\xce\x99",
1190
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1191
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1192
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1193
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1194
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1195
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1196
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1197
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1198
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1199
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1200
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1201
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1202
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1203
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1204
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1205
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1206
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1207
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1208
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1209
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1210
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1211
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1212
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1213
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1214
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1215
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1216
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1217
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1218
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1219
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1220
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1221
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1222
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1223
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1224
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1225
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1226
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1227
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1228
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1229
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1230
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1231
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1232
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1233
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1234
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1235
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1236
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1237
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1238
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1239
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1240
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1241
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1242
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1243
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1244
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1245
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1246
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1247
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1248
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1249
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1250
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1251
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1252
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1253
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1254
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1255
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1256
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1257
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1258
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1259
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1260
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1261
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1262
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1263
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1264
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1265
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1266
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1267
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1268
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1269
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1270
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1271
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1272
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1273
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1274
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1275
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1276
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1277
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1278
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1279
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1280
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1281
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1282
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1283
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1284
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1285
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1286
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1287
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1288
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1289
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1290
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1291
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1292
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1293
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1294
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1295
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1296
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1297
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1298
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1299
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1300
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1301
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1302
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1303
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1304
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1305
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1306
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1307
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1308
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1309
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1310
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1311
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1312
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1313
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1314
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1315
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1316
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1317
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1318
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1319
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1320
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1321
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1322
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1323
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1324
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1325
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1326
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1327
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1328
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1329
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1330
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1331
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1332
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1333
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1334
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1335
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1336
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1337
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1338
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1339
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1340
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1341
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1342
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1343
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1344
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1345
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1346
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1347
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1348
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1349
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1350
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1351
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1352
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1353
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1354
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1355
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1356
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1357
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1358
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1359
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1360
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1361
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1362
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1363
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1364
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1365
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1366
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1367
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1368
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1369
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1370
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1371
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1372
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1373
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1374
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1375
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1376
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1377
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1378
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1379
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1380
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1381
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1382
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1383
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1384
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1385
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1386
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1387
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1388
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1389
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1390
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1391
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1392
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1393
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1394
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1395
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1396
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1397
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1398
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1399
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1400
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1401
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1402
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1403
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1404
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1405
      "\xd6\x86"         => "\xd5\x96",
1406
      "\xd6\x85"         => "\xd5\x95",
1407
      "\xd6\x84"         => "\xd5\x94",
1408
      "\xd6\x83"         => "\xd5\x93",
1409
      "\xd6\x82"         => "\xd5\x92",
1410
      "\xd6\x81"         => "\xd5\x91",
1411
      "\xd6\x80"         => "\xd5\x90",
1412
      "\xd5\xbf"         => "\xd5\x8f",
1413
      "\xd5\xbe"         => "\xd5\x8e",
1414
      "\xd5\xbd"         => "\xd5\x8d",
1415
      "\xd5\xbc"         => "\xd5\x8c",
1416
      "\xd5\xbb"         => "\xd5\x8b",
1417
      "\xd5\xba"         => "\xd5\x8a",
1418
      "\xd5\xb9"         => "\xd5\x89",
1419
      "\xd5\xb8"         => "\xd5\x88",
1420
      "\xd5\xb7"         => "\xd5\x87",
1421
      "\xd5\xb6"         => "\xd5\x86",
1422
      "\xd5\xb5"         => "\xd5\x85",
1423
      "\xd5\xb4"         => "\xd5\x84",
1424
      "\xd5\xb3"         => "\xd5\x83",
1425
      "\xd5\xb2"         => "\xd5\x82",
1426
      "\xd5\xb1"         => "\xd5\x81",
1427
      "\xd5\xb0"         => "\xd5\x80",
1428
      "\xd5\xaf"         => "\xd4\xbf",
1429
      "\xd5\xae"         => "\xd4\xbe",
1430
      "\xd5\xad"         => "\xd4\xbd",
1431
      "\xd5\xac"         => "\xd4\xbc",
1432
      "\xd5\xab"         => "\xd4\xbb",
1433
      "\xd5\xaa"         => "\xd4\xba",
1434
      "\xd5\xa9"         => "\xd4\xb9",
1435
      "\xd5\xa8"         => "\xd4\xb8",
1436
      "\xd5\xa7"         => "\xd4\xb7",
1437
      "\xd5\xa6"         => "\xd4\xb6",
1438
      "\xd5\xa5"         => "\xd4\xb5",
1439
      "\xd5\xa4"         => "\xd4\xb4",
1440
      "\xd5\xa3"         => "\xd4\xb3",
1441
      "\xd5\xa2"         => "\xd4\xb2",
1442
      "\xd5\xa1"         => "\xd4\xb1",
1443
      "\xd4\xa5"         => "\xd4\xa4",
1444
      "\xd4\xa3"         => "\xd4\xa2",
1445
      "\xd4\xa1"         => "\xd4\xa0",
1446
      "\xd4\x9f"         => "\xd4\x9e",
1447
      "\xd4\x9d"         => "\xd4\x9c",
1448
      "\xd4\x9b"         => "\xd4\x9a",
1449
      "\xd4\x99"         => "\xd4\x98",
1450
      "\xd4\x97"         => "\xd4\x96",
1451
      "\xd4\x95"         => "\xd4\x94",
1452
      "\xd4\x93"         => "\xd4\x92",
1453
      "\xd4\x91"         => "\xd4\x90",
1454
      "\xd4\x8f"         => "\xd4\x8e",
1455
      "\xd4\x8d"         => "\xd4\x8c",
1456
      "\xd4\x8b"         => "\xd4\x8a",
1457
      "\xd4\x89"         => "\xd4\x88",
1458
      "\xd4\x87"         => "\xd4\x86",
1459
      "\xd4\x85"         => "\xd4\x84",
1460
      "\xd4\x83"         => "\xd4\x82",
1461
      "\xd4\x81"         => "\xd4\x80",
1462
      "\xd3\xbf"         => "\xd3\xbe",
1463
      "\xd3\xbd"         => "\xd3\xbc",
1464
      "\xd3\xbb"         => "\xd3\xba",
1465
      "\xd3\xb9"         => "\xd3\xb8",
1466
      "\xd3\xb7"         => "\xd3\xb6",
1467
      "\xd3\xb5"         => "\xd3\xb4",
1468
      "\xd3\xb3"         => "\xd3\xb2",
1469
      "\xd3\xb1"         => "\xd3\xb0",
1470
      "\xd3\xaf"         => "\xd3\xae",
1471
      "\xd3\xad"         => "\xd3\xac",
1472
      "\xd3\xab"         => "\xd3\xaa",
1473
      "\xd3\xa9"         => "\xd3\xa8",
1474
      "\xd3\xa7"         => "\xd3\xa6",
1475
      "\xd3\xa5"         => "\xd3\xa4",
1476
      "\xd3\xa3"         => "\xd3\xa2",
1477
      "\xd3\xa1"         => "\xd3\xa0",
1478
      "\xd3\x9f"         => "\xd3\x9e",
1479
      "\xd3\x9d"         => "\xd3\x9c",
1480
      "\xd3\x9b"         => "\xd3\x9a",
1481
      "\xd3\x99"         => "\xd3\x98",
1482
      "\xd3\x97"         => "\xd3\x96",
1483
      "\xd3\x95"         => "\xd3\x94",
1484
      "\xd3\x93"         => "\xd3\x92",
1485
      "\xd3\x91"         => "\xd3\x90",
1486
      "\xd3\x8f"         => "\xd3\x80",
1487
      "\xd3\x8e"         => "\xd3\x8d",
1488
      "\xd3\x8c"         => "\xd3\x8b",
1489
      "\xd3\x8a"         => "\xd3\x89",
1490
      "\xd3\x88"         => "\xd3\x87",
1491
      "\xd3\x86"         => "\xd3\x85",
1492
      "\xd3\x84"         => "\xd3\x83",
1493
      "\xd3\x82"         => "\xd3\x81",
1494
      "\xd2\xbf"         => "\xd2\xbe",
1495
      "\xd2\xbd"         => "\xd2\xbc",
1496
      "\xd2\xbb"         => "\xd2\xba",
1497
      "\xd2\xb9"         => "\xd2\xb8",
1498
      "\xd2\xb7"         => "\xd2\xb6",
1499
      "\xd2\xb5"         => "\xd2\xb4",
1500
      "\xd2\xb3"         => "\xd2\xb2",
1501
      "\xd2\xb1"         => "\xd2\xb0",
1502
      "\xd2\xaf"         => "\xd2\xae",
1503
      "\xd2\xad"         => "\xd2\xac",
1504
      "\xd2\xab"         => "\xd2\xaa",
1505
      "\xd2\xa9"         => "\xd2\xa8",
1506
      "\xd2\xa7"         => "\xd2\xa6",
1507
      "\xd2\xa5"         => "\xd2\xa4",
1508
      "\xd2\xa3"         => "\xd2\xa2",
1509
      "\xd2\xa1"         => "\xd2\xa0",
1510
      "\xd2\x9f"         => "\xd2\x9e",
1511
      "\xd2\x9d"         => "\xd2\x9c",
1512
      "\xd2\x9b"         => "\xd2\x9a",
1513
      "\xd2\x99"         => "\xd2\x98",
1514
      "\xd2\x97"         => "\xd2\x96",
1515
      "\xd2\x95"         => "\xd2\x94",
1516
      "\xd2\x93"         => "\xd2\x92",
1517
      "\xd2\x91"         => "\xd2\x90",
1518
      "\xd2\x8f"         => "\xd2\x8e",
1519
      "\xd2\x8d"         => "\xd2\x8c",
1520
      "\xd2\x8b"         => "\xd2\x8a",
1521
      "\xd2\x81"         => "\xd2\x80",
1522
      "\xd1\xbf"         => "\xd1\xbe",
1523
      "\xd1\xbd"         => "\xd1\xbc",
1524
      "\xd1\xbb"         => "\xd1\xba",
1525
      "\xd1\xb9"         => "\xd1\xb8",
1526
      "\xd1\xb7"         => "\xd1\xb6",
1527
      "\xd1\xb5"         => "\xd1\xb4",
1528
      "\xd1\xb3"         => "\xd1\xb2",
1529
      "\xd1\xb1"         => "\xd1\xb0",
1530
      "\xd1\xaf"         => "\xd1\xae",
1531
      "\xd1\xad"         => "\xd1\xac",
1532
      "\xd1\xab"         => "\xd1\xaa",
1533
      "\xd1\xa9"         => "\xd1\xa8",
1534
      "\xd1\xa7"         => "\xd1\xa6",
1535
      "\xd1\xa5"         => "\xd1\xa4",
1536
      "\xd1\xa3"         => "\xd1\xa2",
1537
      "\xd1\xa1"         => "\xd1\xa0",
1538
      "\xd1\x9f"         => "\xd0\x8f",
1539
      "\xd1\x9e"         => "\xd0\x8e",
1540
      "\xd1\x9d"         => "\xd0\x8d",
1541
      "\xd1\x9c"         => "\xd0\x8c",
1542
      "\xd1\x9b"         => "\xd0\x8b",
1543
      "\xd1\x9a"         => "\xd0\x8a",
1544
      "\xd1\x99"         => "\xd0\x89",
1545
      "\xd1\x98"         => "\xd0\x88",
1546
      "\xd1\x97"         => "\xd0\x87",
1547
      "\xd1\x96"         => "\xd0\x86",
1548
      "\xd1\x95"         => "\xd0\x85",
1549
      "\xd1\x94"         => "\xd0\x84",
1550
      "\xd1\x93"         => "\xd0\x83",
1551
      "\xd1\x92"         => "\xd0\x82",
1552
      "\xd1\x91"         => "\xd0\x81",
1553
      "\xd1\x90"         => "\xd0\x80",
1554
      "\xd1\x8f"         => "\xd0\xaf",
1555
      "\xd1\x8e"         => "\xd0\xae",
1556
      "\xd1\x8d"         => "\xd0\xad",
1557
      "\xd1\x8c"         => "\xd0\xac",
1558
      "\xd1\x8b"         => "\xd0\xab",
1559
      "\xd1\x8a"         => "\xd0\xaa",
1560
      "\xd1\x89"         => "\xd0\xa9",
1561
      "\xd1\x88"         => "\xd0\xa8",
1562
      "\xd1\x87"         => "\xd0\xa7",
1563
      "\xd1\x86"         => "\xd0\xa6",
1564
      "\xd1\x85"         => "\xd0\xa5",
1565
      "\xd1\x84"         => "\xd0\xa4",
1566
      "\xd1\x83"         => "\xd0\xa3",
1567
      "\xd1\x82"         => "\xd0\xa2",
1568
      "\xd1\x81"         => "\xd0\xa1",
1569
      "\xd1\x80"         => "\xd0\xa0",
1570
      "\xd0\xbf"         => "\xd0\x9f",
1571
      "\xd0\xbe"         => "\xd0\x9e",
1572
      "\xd0\xbd"         => "\xd0\x9d",
1573
      "\xd0\xbc"         => "\xd0\x9c",
1574
      "\xd0\xbb"         => "\xd0\x9b",
1575
      "\xd0\xba"         => "\xd0\x9a",
1576
      "\xd0\xb9"         => "\xd0\x99",
1577
      "\xd0\xb8"         => "\xd0\x98",
1578
      "\xd0\xb7"         => "\xd0\x97",
1579
      "\xd0\xb6"         => "\xd0\x96",
1580
      "\xd0\xb5"         => "\xd0\x95",
1581
      "\xd0\xb4"         => "\xd0\x94",
1582
      "\xd0\xb3"         => "\xd0\x93",
1583
      "\xd0\xb2"         => "\xd0\x92",
1584
      "\xd0\xb1"         => "\xd0\x91",
1585
      "\xd0\xb0"         => "\xd0\x90",
1586
      "\xcf\xbb"         => "\xcf\xba",
1587
      "\xcf\xb8"         => "\xcf\xb7",
1588
      "\xcf\xb5"         => "\xce\x95",
1589
      "\xcf\xb2"         => "\xcf\xb9",
1590
      "\xcf\xb1"         => "\xce\xa1",
1591
      "\xcf\xb0"         => "\xce\x9a",
1592
      "\xcf\xaf"         => "\xcf\xae",
1593
      "\xcf\xad"         => "\xcf\xac",
1594
      "\xcf\xab"         => "\xcf\xaa",
1595
      "\xcf\xa9"         => "\xcf\xa8",
1596
      "\xcf\xa7"         => "\xcf\xa6",
1597
      "\xcf\xa5"         => "\xcf\xa4",
1598
      "\xcf\xa3"         => "\xcf\xa2",
1599
      "\xcf\xa1"         => "\xcf\xa0",
1600
      "\xcf\x9f"         => "\xcf\x9e",
1601
      "\xcf\x9d"         => "\xcf\x9c",
1602
      "\xcf\x9b"         => "\xcf\x9a",
1603
      "\xcf\x99"         => "\xcf\x98",
1604
      "\xcf\x97"         => "\xcf\x8f",
1605
      "\xcf\x96"         => "\xce\xa0",
1606
      "\xcf\x95"         => "\xce\xa6",
1607
      "\xcf\x91"         => "\xce\x98",
1608
      "\xcf\x90"         => "\xce\x92",
1609
      "\xcf\x8e"         => "\xce\x8f",
1610
      "\xcf\x8d"         => "\xce\x8e",
1611
      "\xcf\x8c"         => "\xce\x8c",
1612
      "\xcf\x8b"         => "\xce\xab",
1613
      "\xcf\x8a"         => "\xce\xaa",
1614
      "\xcf\x89"         => "\xce\xa9",
1615
      "\xcf\x88"         => "\xce\xa8",
1616
      "\xcf\x87"         => "\xce\xa7",
1617
      "\xcf\x86"         => "\xce\xa6",
1618
      "\xcf\x85"         => "\xce\xa5",
1619
      "\xcf\x84"         => "\xce\xa4",
1620
      "\xcf\x83"         => "\xce\xa3",
1621
      "\xcf\x82"         => "\xce\xa3",
1622
      "\xcf\x81"         => "\xce\xa1",
1623
      "\xcf\x80"         => "\xce\xa0",
1624
      "\xce\xbf"         => "\xce\x9f",
1625
      "\xce\xbe"         => "\xce\x9e",
1626
      "\xce\xbd"         => "\xce\x9d",
1627
      "\xce\xbc"         => "\xce\x9c",
1628
      "\xce\xbb"         => "\xce\x9b",
1629
      "\xce\xba"         => "\xce\x9a",
1630
      "\xce\xb9"         => "\xce\x99",
1631
      "\xce\xb8"         => "\xce\x98",
1632
      "\xce\xb7"         => "\xce\x97",
1633
      "\xce\xb6"         => "\xce\x96",
1634
      "\xce\xb5"         => "\xce\x95",
1635
      "\xce\xb4"         => "\xce\x94",
1636
      "\xce\xb3"         => "\xce\x93",
1637
      "\xce\xb2"         => "\xce\x92",
1638
      "\xce\xb1"         => "\xce\x91",
1639
      "\xce\xaf"         => "\xce\x8a",
1640
      "\xce\xae"         => "\xce\x89",
1641
      "\xce\xad"         => "\xce\x88",
1642
      "\xce\xac"         => "\xce\x86",
1643
      "\xcd\xbd"         => "\xcf\xbf",
1644
      "\xcd\xbc"         => "\xcf\xbe",
1645
      "\xcd\xbb"         => "\xcf\xbd",
1646
      "\xcd\xb7"         => "\xcd\xb6",
1647
      "\xcd\xb3"         => "\xcd\xb2",
1648
      "\xcd\xb1"         => "\xcd\xb0",
1649
      "\xca\x92"         => "\xc6\xb7",
1650
      "\xca\x8c"         => "\xc9\x85",
1651
      "\xca\x8b"         => "\xc6\xb2",
1652
      "\xca\x8a"         => "\xc6\xb1",
1653
      "\xca\x89"         => "\xc9\x84",
1654
      "\xca\x88"         => "\xc6\xae",
1655
      "\xca\x83"         => "\xc6\xa9",
1656
      "\xca\x80"         => "\xc6\xa6",
1657
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1658
      "\xc9\xb5"         => "\xc6\x9f",
1659
      "\xc9\xb2"         => "\xc6\x9d",
1660
      "\xc9\xb1"         => "\xe2\xb1\xae",
1661
      "\xc9\xaf"         => "\xc6\x9c",
1662
      "\xc9\xab"         => "\xe2\xb1\xa2",
1663
      "\xc9\xa9"         => "\xc6\x96",
1664
      "\xc9\xa8"         => "\xc6\x97",
1665
      "\xc9\xa5"         => "\xea\x9e\x8d",
1666
      "\xc9\xa3"         => "\xc6\x94",
1667
      "\xc9\xa0"         => "\xc6\x93",
1668
      "\xc9\x9b"         => "\xc6\x90",
1669
      "\xc9\x99"         => "\xc6\x8f",
1670
      "\xc9\x97"         => "\xc6\x8a",
1671
      "\xc9\x96"         => "\xc6\x89",
1672
      "\xc9\x94"         => "\xc6\x86",
1673
      "\xc9\x93"         => "\xc6\x81",
1674
      "\xc9\x92"         => "\xe2\xb1\xb0",
1675
      "\xc9\x91"         => "\xe2\xb1\xad",
1676
      "\xc9\x90"         => "\xe2\xb1\xaf",
1677
      "\xc9\x8f"         => "\xc9\x8e",
1678
      "\xc9\x8d"         => "\xc9\x8c",
1679
      "\xc9\x8b"         => "\xc9\x8a",
1680
      "\xc9\x89"         => "\xc9\x88",
1681
      "\xc9\x87"         => "\xc9\x86",
1682
      "\xc9\x82"         => "\xc9\x81",
1683
      "\xc9\x80"         => "\xe2\xb1\xbf",
1684
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1685
      "\xc8\xbc"         => "\xc8\xbb",
1686
      "\xc8\xb3"         => "\xc8\xb2",
1687
      "\xc8\xb1"         => "\xc8\xb0",
1688
      "\xc8\xaf"         => "\xc8\xae",
1689
      "\xc8\xad"         => "\xc8\xac",
1690
      "\xc8\xab"         => "\xc8\xaa",
1691
      "\xc8\xa9"         => "\xc8\xa8",
1692
      "\xc8\xa7"         => "\xc8\xa6",
1693
      "\xc8\xa5"         => "\xc8\xa4",
1694
      "\xc8\xa3"         => "\xc8\xa2",
1695
      "\xc8\x9f"         => "\xc8\x9e",
1696
      "\xc8\x9d"         => "\xc8\x9c",
1697
      "\xc8\x9b"         => "\xc8\x9a",
1698
      "\xc8\x99"         => "\xc8\x98",
1699
      "\xc8\x97"         => "\xc8\x96",
1700
      "\xc8\x95"         => "\xc8\x94",
1701
      "\xc8\x93"         => "\xc8\x92",
1702
      "\xc8\x91"         => "\xc8\x90",
1703
      "\xc8\x8f"         => "\xc8\x8e",
1704
      "\xc8\x8d"         => "\xc8\x8c",
1705
      "\xc8\x8b"         => "\xc8\x8a",
1706
      "\xc8\x89"         => "\xc8\x88",
1707
      "\xc8\x87"         => "\xc8\x86",
1708
      "\xc8\x85"         => "\xc8\x84",
1709
      "\xc8\x83"         => "\xc8\x82",
1710
      "\xc8\x81"         => "\xc8\x80",
1711
      "\xc7\xbf"         => "\xc7\xbe",
1712
      "\xc7\xbd"         => "\xc7\xbc",
1713
      "\xc7\xbb"         => "\xc7\xba",
1714
      "\xc7\xb9"         => "\xc7\xb8",
1715
      "\xc7\xb5"         => "\xc7\xb4",
1716
      "\xc7\xb3"         => "\xc7\xb2",
1717
      "\xc7\xaf"         => "\xc7\xae",
1718
      "\xc7\xad"         => "\xc7\xac",
1719
      "\xc7\xab"         => "\xc7\xaa",
1720
      "\xc7\xa9"         => "\xc7\xa8",
1721
      "\xc7\xa7"         => "\xc7\xa6",
1722
      "\xc7\xa5"         => "\xc7\xa4",
1723
      "\xc7\xa3"         => "\xc7\xa2",
1724
      "\xc7\xa1"         => "\xc7\xa0",
1725
      "\xc7\x9f"         => "\xc7\x9e",
1726
      "\xc7\x9d"         => "\xc6\x8e",
1727
      "\xc7\x9c"         => "\xc7\x9b",
1728
      "\xc7\x9a"         => "\xc7\x99",
1729
      "\xc7\x98"         => "\xc7\x97",
1730
      "\xc7\x96"         => "\xc7\x95",
1731
      "\xc7\x94"         => "\xc7\x93",
1732
      "\xc7\x92"         => "\xc7\x91",
1733
      "\xc7\x90"         => "\xc7\x8f",
1734
      "\xc7\x8e"         => "\xc7\x8d",
1735
      "\xc7\x8c"         => "\xc7\x8b",
1736
      "\xc7\x89"         => "\xc7\x88",
1737
      "\xc7\x86"         => "\xc7\x85",
1738
      "\xc6\xbf"         => "\xc7\xb7",
1739
      "\xc6\xbd"         => "\xc6\xbc",
1740
      "\xc6\xb9"         => "\xc6\xb8",
1741
      "\xc6\xb6"         => "\xc6\xb5",
1742
      "\xc6\xb4"         => "\xc6\xb3",
1743
      "\xc6\xb0"         => "\xc6\xaf",
1744
      "\xc6\xad"         => "\xc6\xac",
1745
      "\xc6\xa8"         => "\xc6\xa7",
1746
      "\xc6\xa5"         => "\xc6\xa4",
1747
      "\xc6\xa3"         => "\xc6\xa2",
1748
      "\xc6\xa1"         => "\xc6\xa0",
1749
      "\xc6\x9e"         => "\xc8\xa0",
1750
      "\xc6\x9a"         => "\xc8\xbd",
1751
      "\xc6\x99"         => "\xc6\x98",
1752
      "\xc6\x95"         => "\xc7\xb6",
1753
      "\xc6\x92"         => "\xc6\x91",
1754
      "\xc6\x8c"         => "\xc6\x8b",
1755
      "\xc6\x88"         => "\xc6\x87",
1756
      "\xc6\x85"         => "\xc6\x84",
1757
      "\xc6\x83"         => "\xc6\x82",
1758
      "\xc6\x80"         => "\xc9\x83",
1759
      "\xc5\xbf"         => "\x53",
1760
      "\xc5\xbe"         => "\xc5\xbd",
1761
      "\xc5\xbc"         => "\xc5\xbb",
1762
      "\xc5\xba"         => "\xc5\xb9",
1763
      "\xc5\xb7"         => "\xc5\xb6",
1764
      "\xc5\xb5"         => "\xc5\xb4",
1765
      "\xc5\xb3"         => "\xc5\xb2",
1766
      "\xc5\xb1"         => "\xc5\xb0",
1767
      "\xc5\xaf"         => "\xc5\xae",
1768
      "\xc5\xad"         => "\xc5\xac",
1769
      "\xc5\xab"         => "\xc5\xaa",
1770
      "\xc5\xa9"         => "\xc5\xa8",
1771
      "\xc5\xa7"         => "\xc5\xa6",
1772
      "\xc5\xa5"         => "\xc5\xa4",
1773
      "\xc5\xa3"         => "\xc5\xa2",
1774
      "\xc5\xa1"         => "\xc5\xa0",
1775
      "\xc5\x9f"         => "\xc5\x9e",
1776
      "\xc5\x9d"         => "\xc5\x9c",
1777
      "\xc5\x9b"         => "\xc5\x9a",
1778
      "\xc5\x99"         => "\xc5\x98",
1779
      "\xc5\x97"         => "\xc5\x96",
1780
      "\xc5\x95"         => "\xc5\x94",
1781
      "\xc5\x93"         => "\xc5\x92",
1782
      "\xc5\x91"         => "\xc5\x90",
1783
      "\xc5\x8f"         => "\xc5\x8e",
1784
      "\xc5\x8d"         => "\xc5\x8c",
1785
      "\xc5\x8b"         => "\xc5\x8a",
1786
      "\xc5\x88"         => "\xc5\x87",
1787
      "\xc5\x86"         => "\xc5\x85",
1788
      "\xc5\x84"         => "\xc5\x83",
1789
      "\xc5\x82"         => "\xc5\x81",
1790
      "\xc5\x80"         => "\xc4\xbf",
1791
      "\xc4\xbe"         => "\xc4\xbd",
1792
      "\xc4\xbc"         => "\xc4\xbb",
1793
      "\xc4\xba"         => "\xc4\xb9",
1794
      "\xc4\xb7"         => "\xc4\xb6",
1795
      "\xc4\xb5"         => "\xc4\xb4",
1796
      "\xc4\xb3"         => "\xc4\xb2",
1797
      "\xc4\xb1"         => "\x49",
1798
      "\xc4\xaf"         => "\xc4\xae",
1799
      "\xc4\xad"         => "\xc4\xac",
1800
      "\xc4\xab"         => "\xc4\xaa",
1801
      "\xc4\xa9"         => "\xc4\xa8",
1802
      "\xc4\xa7"         => "\xc4\xa6",
1803
      "\xc4\xa5"         => "\xc4\xa4",
1804
      "\xc4\xa3"         => "\xc4\xa2",
1805
      "\xc4\xa1"         => "\xc4\xa0",
1806
      "\xc4\x9f"         => "\xc4\x9e",
1807
      "\xc4\x9d"         => "\xc4\x9c",
1808
      "\xc4\x9b"         => "\xc4\x9a",
1809
      "\xc4\x99"         => "\xc4\x98",
1810
      "\xc4\x97"         => "\xc4\x96",
1811
      "\xc4\x95"         => "\xc4\x94",
1812
      "\xc4\x93"         => "\xc4\x92",
1813
      "\xc4\x91"         => "\xc4\x90",
1814
      "\xc4\x8f"         => "\xc4\x8e",
1815
      "\xc4\x8d"         => "\xc4\x8c",
1816
      "\xc4\x8b"         => "\xc4\x8a",
1817
      "\xc4\x89"         => "\xc4\x88",
1818
      "\xc4\x87"         => "\xc4\x86",
1819
      "\xc4\x85"         => "\xc4\x84",
1820
      "\xc4\x83"         => "\xc4\x82",
1821
      "\xc4\x81"         => "\xc4\x80",
1822
      "\xc3\xbf"         => "\xc5\xb8",
1823
      "\xc3\xbe"         => "\xc3\x9e",
1824
      "\xc3\xbd"         => "\xc3\x9d",
1825
      "\xc3\xbc"         => "\xc3\x9c",
1826
      "\xc3\xbb"         => "\xc3\x9b",
1827
      "\xc3\xba"         => "\xc3\x9a",
1828
      "\xc3\xb9"         => "\xc3\x99",
1829
      "\xc3\xb8"         => "\xc3\x98",
1830
      "\xc3\xb6"         => "\xc3\x96",
1831
      "\xc3\xb5"         => "\xc3\x95",
1832
      "\xc3\xb4"         => "\xc3\x94",
1833
      "\xc3\xb3"         => "\xc3\x93",
1834
      "\xc3\xb2"         => "\xc3\x92",
1835
      "\xc3\xb1"         => "\xc3\x91",
1836
      "\xc3\xb0"         => "\xc3\x90",
1837
      "\xc3\xaf"         => "\xc3\x8f",
1838
      "\xc3\xae"         => "\xc3\x8e",
1839
      "\xc3\xad"         => "\xc3\x8d",
1840
      "\xc3\xac"         => "\xc3\x8c",
1841
      "\xc3\xab"         => "\xc3\x8b",
1842
      "\xc3\xaa"         => "\xc3\x8a",
1843
      "\xc3\xa9"         => "\xc3\x89",
1844
      "\xc3\xa8"         => "\xc3\x88",
1845
      "\xc3\xa7"         => "\xc3\x87",
1846
      "\xc3\xa6"         => "\xc3\x86",
1847
      "\xc3\xa5"         => "\xc3\x85",
1848
      "\xc3\xa4"         => "\xc3\x84",
1849
      "\xc3\xa3"         => "\xc3\x83",
1850
      "\xc3\xa2"         => "\xc3\x82",
1851
      "\xc3\xa1"         => "\xc3\x81",
1852
      "\xc3\xa0"         => "\xc3\x80",
1853
      "\xc2\xb5"         => "\xce\x9c",
1854
      "\x7a"             => "\x5a",
1855
      "\x79"             => "\x59",
1856
      "\x78"             => "\x58",
1857
      "\x77"             => "\x57",
1858
      "\x76"             => "\x56",
1859
      "\x75"             => "\x55",
1860
      "\x74"             => "\x54",
1861
      "\x73"             => "\x53",
1862
      "\x72"             => "\x52",
1863
      "\x71"             => "\x51",
1864
      "\x70"             => "\x50",
1865
      "\x6f"             => "\x4f",
1866
      "\x6e"             => "\x4e",
1867
      "\x6d"             => "\x4d",
1868
      "\x6c"             => "\x4c",
1869
      "\x6b"             => "\x4b",
1870
      "\x6a"             => "\x4a",
1871
      "\x69"             => "\x49",
1872
      "\x68"             => "\x48",
1873
      "\x67"             => "\x47",
1874
      "\x66"             => "\x46",
1875
      "\x65"             => "\x45",
1876
      "\x64"             => "\x44",
1877
      "\x63"             => "\x43",
1878
      "\x62"             => "\x42",
1879
      "\x61"             => "\x41",
1880
1881
    );
1882
1883
    return $case;
1884
  }
1885
1886
  /**
1887
   * This method will auto-detect your server environment for UTF-8 support.
1888
   *
1889
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1890
   */
1891 194
  public static function checkForSupport()
1892
  {
1893 194
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1894
1895 1
      self::$support['already_checked_via_portable_utf8'] = true;
1896 1
1897 1
      self::$support['mbstring'] = self::mbstring_loaded();
1898 1
      self::$support['iconv'] = self::iconv_loaded();
1899 1
      self::$support['intl'] = self::intl_loaded();
1900 1
      self::$support['intlChar'] = self::intlChar_loaded();
1901 194
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1902
    }
1903
  }
1904
1905
  /**
1906
   * Generates a UTF-8 encoded character from the given code point.
1907
   *
1908
   * INFO: opposite to UTF8::ord()
1909
   *
1910
   * @param    int $code_point The code point for which to generate a character.
1911
   *
1912 9
   * @return   string|null Multi-Byte character, returns null on failure to encode.
1913
   */
1914 9
  public static function chr($code_point)
1915
  {
1916 9
    // init
1917
    $i = (int)$code_point;
1918 9
1919
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1920
      self::checkForSupport();
1921
    }
1922 9
1923 1
    if (self::$support['intlChar'] === true) {
1924 1
      return \IntlChar::chr($code_point);
1925
    }
1926 9
1927 2
    if ($i !== $code_point) {
1928
      $i = self::hex_to_int($code_point);
1929
    }
1930 9
1931
    if (!$i) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $i of type integer|false is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1932
      return null;
1933
    }
1934
1935
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1936
  }
1937
1938
  /**
1939
   * Applies callback to all characters of a string.
1940
   *
1941 1
   * @param  string|array $callback The callback function.
1942
   * @param  string       $str      UTF-8 string to run callback on.
1943 1
   *
1944
   * @return array The outcome of callback.
1945 1
   */
1946
  public static function chr_map($callback, $str)
1947
  {
1948
    $chars = self::split($str);
1949
1950
    return array_map($callback, $chars);
1951
  }
1952
1953
  /**
1954
   * Generates an array of byte length of each character of a Unicode string.
1955
   *
1956
   * 1 byte => U+0000  - U+007F
1957
   * 2 byte => U+0080  - U+07FF
1958
   * 3 byte => U+0800  - U+FFFF
1959
   * 4 byte => U+10000 - U+10FFFF
1960 4
   *
1961
   * @param    string $str The original Unicode string.
1962 4
   *
1963 3
   * @return   array An array of byte lengths of each character.
1964
   */
1965
  public static function chr_size_list($str)
1966 4
  {
1967
    if (!$str) {
1968
      return array();
1969
    }
1970
1971
    return array_map('strlen', self::split($str));
1972
  }
1973
1974
  /**
1975
   * Get a decimal code representation of a specific character.
1976 2
   *
1977
   * @param   string $char The input character
1978 2
   *
1979 2
   * @return  int
1980 2
   */
1981
  public static function chr_to_decimal($char)
1982 2
  {
1983
    $char = (string)$char;
1984 2
    $code = self::ord($char[0]);
1985
    $bytes = 1;
1986
1987 2
    if (!($code & 0x80)) {
1988
      // 0xxxxxxx
1989 2
      return $code;
1990 2
    }
1991 2
1992
    if (($code & 0xe0) === 0xc0) {
1993 1
      // 110xxxxx
1994 1
      $bytes = 2;
1995 1
      $code &= ~0xc0;
1996
    } elseif (($code & 0xf0) === 0xe0) {
1997
      // 1110xxxx
1998
      $bytes = 3;
1999
      $code &= ~0xe0;
2000
    } elseif (($code & 0xf8) === 0xf0) {
2001 2
      // 11110xxx
2002
      $bytes = 4;
2003 2
      $code &= ~0xf0;
2004 2
    }
2005
2006 2
    for ($i = 2; $i <= $bytes; $i++) {
2007
      // 10xxxxxx
2008
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
2009
    }
2010
2011
    return $code;
2012
  }
2013
2014
  /**
2015
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
2016
   *
2017
   * @param    string $char The input character
2018
   * @param    string $pfix
2019
   *
2020
   * @return   string The code point encoded as U+xxxx
2021
   */
2022
  public static function chr_to_hex($char, $pfix = 'U+')
2023
  {
2024
    return self::int_to_hex(self::ord($char), $pfix);
2025
  }
2026
2027
  /**
2028
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2029
   *
2030
   * @param    string $body     The original string to be split.
2031 1
   * @param    int    $chunklen The maximum character length of a chunk.
2032
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2033 1
   *
2034
   * @return   string The chunked string
2035
   */
2036
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2037
  {
2038
    return implode($end, self::split($body, $chunklen));
2039
  }
2040
2041
  /**
2042
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2043
   *
2044
   * @param string $str                     The string to be sanitized.
2045
   * @param bool   $remove_bom
2046
   * @param bool   $normalize_whitespace
2047 41
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2048
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2049
   *
2050
   * @return string Clean UTF-8 encoded string
2051
   */
2052
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2053
  {
2054
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2055
    // caused connection reset problem on larger strings
2056
2057
    $regx = '/
2058
      (
2059
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2060
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2061
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2062 41
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2063 41
        ){1,100}                      # ...one or more times
2064
      )
2065 41
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2066 41
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2067
    /x';
2068 41
    $str = preg_replace($regx, '$1', $str);
2069 6
2070 6
    $str = self::replace_diamond_question_mark($str, '');
2071
    $str = self::remove_invisible_characters($str);
2072 41
2073 1
    if ($normalize_whitespace === true) {
2074 1
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2075
    }
2076 41
2077 5
    if ($normalize_msword === true) {
2078 5
      $str = self::normalize_msword($str);
2079
    }
2080 41
2081
    if ($remove_bom === true) {
2082
      $str = self::removeBOM($str);
2083
    }
2084
2085
    return $str;
2086
  }
2087
2088
  /**
2089
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2090 4
   *
2091
   * @param string $str
2092 4
   *
2093
   * @return string
2094 4
   */
2095 1
  public static function cleanup($str)
2096
  {
2097
    $str = (string)$str;
2098
2099 4
    if (!isset($str[0])) {
2100
      return '';
2101
    }
2102
2103
    // fixed ISO <-> UTF-8 Errors
2104
    $str = self::fix_simple_utf8($str);
2105
2106 4
    // remove all none UTF-8 symbols
2107
    // && remove diamond question mark (�)
2108 4
    // && remove remove invisible characters (e.g. "\0")
2109
    // && remove BOM
2110
    // && normalize whitespace chars (but keep non-breaking-spaces)
2111
    $str = self::clean($str, true, true, false, true);
2112
2113
    return (string)$str;
2114
  }
2115
2116
  /**
2117
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2118
   *
2119
   * INFO: opposite to UTF8::string()
2120
   *
2121
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2122 5
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2123
   *                                    default, code points will be returned as integers.
2124 5
   *
2125 5
   * @return   array The array of code points
2126 5
   */
2127
  public static function codepoints($arg, $u_style = false)
2128 5
  {
2129
    if (is_string($arg)) {
2130 5
      $arg = self::split($arg);
2131 5
    }
2132 5
2133
    $arg = array_map(
2134 5
        array(
2135
            '\\voku\\helper\\UTF8',
2136 5
            'ord',
2137 1
        ),
2138
        $arg
2139 1
    );
2140 1
2141 1
    if ($u_style) {
2142
      $arg = array_map(
2143 1
          array(
2144 1
              '\\voku\\helper\\UTF8',
2145
              'int_to_hex',
2146 5
          ),
2147
          $arg
2148
      );
2149
    }
2150
2151
    return $arg;
2152
  }
2153
2154
  /**
2155
   * Returns count of characters used in a string.
2156
   *
2157
   * @param    string $str       The input string.
2158 6
   * @param    bool   $cleanUtf8 Clean non UTF-8 chars from the string.
2159
   *
2160 6
   * @return   array An associative array of Character as keys and
2161
   *           their count as values.
2162
   */
2163
  public static function count_chars($str, $cleanUtf8 = false)
2164
  {
2165
    return array_count_values(self::split($str, 1, $cleanUtf8));
2166
  }
2167
2168
  /**
2169
   * Get a UTF-8 character from its decimal code representation.
2170 1
   *
2171
   * @param   int $code Code.
2172 1
   *
2173
   * @return  string
2174 1
   */
2175 1
  public static function decimal_to_chr($code)
2176 1
  {
2177
    return \mb_convert_encoding(
2178 1
        '&#x' . dechex($code) . ';',
2179
        'UTF-8',
2180
        'HTML-ENTITIES'
2181
    );
2182
  }
2183
2184
  /**
2185
   * Encode a string with a new charset-encoding.
2186
   *
2187
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2188
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2189
   *
2190
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2191
   * @param string $str      the string
2192
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2193
   *                         otherwise we auto-detect the current string-encoding
2194 11
   *
2195
   * @return string
2196 11
   */
2197 11
  public static function encode($encoding, $str, $force = true)
2198
  {
2199 11
    $str = (string)$str;
2200 5
    $encoding = (string)$encoding;
2201
2202
    if (!isset($str[0], $encoding[0])) {
2203 11
      return $str;
2204 11
    }
2205
2206
    if ($encoding !== 'UTF-8') {
2207
      $encoding = self::normalize_encoding($encoding);
2208 11
    }
2209
2210
    $encodingDetected = self::str_detect_encoding($str);
2211 11
2212
    if (
2213 1
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2214 11
        &&
2215 11
        (
2216
            $force === true
2217
            ||
2218
            $encodingDetected !== $encoding
2219 11
        )
2220
    ) {
2221
2222 11 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2223 1
          $encoding === 'UTF-8'
2224 1
          &&
2225 1
          (
2226 11
              $force === true
2227 11
              || $encodingDetected === 'UTF-8'
2228
              || $encodingDetected === 'WINDOWS-1252'
2229
              || $encodingDetected === 'ISO-8859-1'
2230
          )
2231
      ) {
2232 2
        return self::to_utf8($str);
2233
      }
2234
2235 1 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2236
          $encoding === 'ISO-8859-1'
2237
          &&
2238 2
          (
2239 1
              $force === true
2240
              || $encodingDetected === 'ISO-8859-1'
2241
              || $encodingDetected === 'UTF-8'
2242 2
          )
2243 2
      ) {
2244 2
        return self::to_win1252($str);
2245
      }
2246 2
2247
      $strEncoded = \mb_convert_encoding(
2248 2
          $str,
2249 2
          $encoding,
2250
          $encodingDetected
2251
      );
2252
2253 1
      if ($strEncoded) {
2254
        return $strEncoded;
2255
      }
2256
    }
2257
2258
    return $str;
2259
  }
2260
2261
  /**
2262
   * Reads entire file into a string.
2263
   *
2264
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2265
   *
2266
   * @link http://php.net/manual/en/function.file-get-contents.php
2267
   *
2268
   * @param string        $filename      <p>
2269
   *                                     Name of the file to read.
2270
   *                                     </p>
2271
   * @param int|null      $flags         [optional] <p>
2272
   *                                     Prior to PHP 6, this parameter is called
2273
   *                                     use_include_path and is a bool.
2274
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2275
   *                                     to trigger include path
2276
   *                                     search.
2277
   *                                     </p>
2278
   *                                     <p>
2279
   *                                     The value of flags can be any combination of
2280
   *                                     the following flags (with some restrictions), joined with the
2281
   *                                     binary OR (|)
2282
   *                                     operator.
2283
   *                                     </p>
2284
   *                                     <p>
2285
   *                                     <table>
2286
   *                                     Available flags
2287
   *                                     <tr valign="top">
2288
   *                                     <td>Flag</td>
2289
   *                                     <td>Description</td>
2290
   *                                     </tr>
2291
   *                                     <tr valign="top">
2292
   *                                     <td>
2293
   *                                     FILE_USE_INCLUDE_PATH
2294
   *                                     </td>
2295
   *                                     <td>
2296
   *                                     Search for filename in the include directory.
2297
   *                                     See include_path for more
2298
   *                                     information.
2299
   *                                     </td>
2300
   *                                     </tr>
2301
   *                                     <tr valign="top">
2302
   *                                     <td>
2303
   *                                     FILE_TEXT
2304
   *                                     </td>
2305
   *                                     <td>
2306
   *                                     As of PHP 6, the default encoding of the read
2307
   *                                     data is UTF-8. You can specify a different encoding by creating a
2308
   *                                     custom context or by changing the default using
2309
   *                                     stream_default_encoding. This flag cannot be
2310
   *                                     used with FILE_BINARY.
2311
   *                                     </td>
2312
   *                                     </tr>
2313
   *                                     <tr valign="top">
2314
   *                                     <td>
2315
   *                                     FILE_BINARY
2316
   *                                     </td>
2317
   *                                     <td>
2318
   *                                     With this flag, the file is read in binary mode. This is the default
2319
   *                                     setting and cannot be used with FILE_TEXT.
2320
   *                                     </td>
2321
   *                                     </tr>
2322
   *                                     </table>
2323
   *                                     </p>
2324
   * @param resource|null $context       [optional] <p>
2325
   *                                     A valid context resource created with
2326
   *                                     stream_context_create. If you don't need to use a
2327
   *                                     custom context, you can skip this parameter by &null;.
2328
   *                                     </p>
2329
   * @param int|null      $offset        [optional] <p>
2330
   *                                     The offset where the reading starts.
2331
   *                                     </p>
2332
   * @param int|null      $maxlen        [optional] <p>
2333
   *                                     Maximum length of data read. The default is to read until end
2334
   *                                     of file is reached.
2335
   *                                     </p>
2336
   * @param int           $timeout
2337
   *
2338 2
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2339
   *                                     non default utf-8 chars
2340
   *
2341 2
   * @return string The function returns the read data or false on failure.
2342 2
   */
2343
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2344 2
  {
2345 2
    // init
2346
    $timeout = (int)$timeout;
2347
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2348
2349 2
    if ($timeout && $context === null) {
2350 2
      $context = stream_context_create(
2351
          array(
2352 2
              'http' =>
2353 2
                  array(
2354
                      'timeout' => $timeout,
2355 2
                  ),
2356 1
          )
2357 1
      );
2358 2
    }
2359
2360
    if (is_int($maxlen)) {
2361
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2362 2
    } else {
2363 1
      $data = file_get_contents($filename, $flags, $context, $offset);
2364
    }
2365
2366 1
    // return false on error
2367 1
    if ($data === false) {
2368
      return false;
2369 1
    }
2370 1
2371 1
    if ($convertToUtf8 === true) {
2372
      $data = self::encode('UTF-8', $data, false);
2373
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2374 1
    }
2375
2376
    // clean utf-8 string
2377
    return $data;
2378
  }
2379
2380
  /**
2381
   * Checks if a file starts with BOM (Byte Order Mark) character.
2382
   *
2383
   * @param    string $file_path Path to a valid file.
2384 1
   *
2385
   * @return   bool True if the file has BOM at the start, False otherwise.
2386 1
   */
2387
  public static function file_has_bom($file_path)
2388
  {
2389
    return self::string_has_bom(file_get_contents($file_path));
2390
  }
2391
2392
  /**
2393
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2394
   *
2395
   * @param mixed  $var
2396
   * @param int    $normalization_form
2397
   * @param string $leading_combining
2398 9
   *
2399
   * @return mixed
2400 9
   */
2401 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
2402 3
  {
2403
    switch (gettype($var)) {
2404 3 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2405 3
        foreach ($var as $k => $v) {
2406 3
          /** @noinspection AlterInForeachInspection */
2407 9
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2408 2
        }
2409 2
        break;
2410 2 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2411 2
        foreach ($var as $k => $v) {
2412 9
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
2413 8
        }
2414
        break;
2415 2
      case 'string':
2416 2
        if (false !== strpos($var, "\r")) {
2417 8
          // Workaround https://bugs.php.net/65732
2418 8
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2419 6
        }
2420 6
        if (preg_match('/[\x80-\xFF]/', $var)) {
2421 6
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2422
            $n = '-';
2423 6
          } else {
2424 3
            $n = \Normalizer::normalize($var, $normalization_form);
2425 3
2426 5
            if (isset($n[0])) {
2427
              $var = $n;
2428
            } else {
2429
              $var = self::encode('UTF-8', $var);
2430 8
            }
2431
2432
          }
2433 2
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2434 2
            // Prevent leading combining chars
2435 8
            // for NFC-safe concatenations.
2436 8
            $var = $leading_combining . $var;
2437 9
          }
2438
        }
2439 9
        break;
2440
    }
2441
2442
    return $var;
2443
  }
2444
2445
  /**
2446
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2447
   *
2448
   * @param int    $type
2449
   * @param string $var
2450
   * @param int    $filter
2451
   * @param mixed  $option
2452
   *
2453
   * @return mixed
2454
   */
2455 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2456
  {
2457
    if (4 > func_num_args()) {
2458
      $var = filter_input($type, $var, $filter);
2459
    } else {
2460
      $var = filter_input($type, $var, $filter, $option);
2461
    }
2462
2463
    return self::filter($var);
2464
  }
2465
2466
  /**
2467
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2468
   *
2469
   * @param int   $type
2470
   * @param mixed $definition
2471
   * @param bool  $add_empty
2472
   *
2473
   * @return mixed
2474
   */
2475 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2476
  {
2477
    if (2 > func_num_args()) {
2478
      $a = filter_input_array($type);
2479
    } else {
2480
      $a = filter_input_array($type, $definition, $add_empty);
2481
    }
2482
2483
    return self::filter($a);
2484
  }
2485
2486
  /**
2487
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2488
   *
2489
   * @param mixed $var
2490
   * @param int   $filter
2491
   * @param mixed $option
2492 1
   *
2493
   * @return mixed
2494 1
   */
2495 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2496 1
  {
2497 1
    if (3 > func_num_args()) {
2498
      $var = filter_var($var, $filter);
2499
    } else {
2500 1
      $var = filter_var($var, $filter, $option);
2501
    }
2502
2503
    return self::filter($var);
2504
  }
2505
2506
  /**
2507
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2508
   *
2509
   * @param array $data
2510
   * @param mixed $definition
2511
   * @param bool  $add_empty
2512 1
   *
2513
   * @return mixed
2514 1
   */
2515 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2516 1
  {
2517 1
    if (2 > func_num_args()) {
2518
      $a = filter_var_array($data);
2519
    } else {
2520 1
      $a = filter_var_array($data, $definition, $add_empty);
2521
    }
2522
2523
    return self::filter($a);
2524
  }
2525
2526
  /**
2527
   * Check if the number of unicode characters are not more than the specified integer.
2528
   *
2529
   * @param    string $str      The original string to be checked.
2530
   * @param    int    $box_size The size in number of chars to be checked against string.
2531 1
   *
2532
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2533 1
   */
2534
  public static function fits_inside($str, $box_size)
2535
  {
2536
    return (self::strlen($str) <= $box_size);
2537
  }
2538
2539
  /**
2540
   * Try to fix simple broken UTF-8 strings.
2541
   *
2542
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2543
   *
2544
   * @param string $str
2545 7
   *
2546
   * @return string
2547 7
   */
2548 7
  public static function fix_simple_utf8($str)
2549
  {
2550 7
    static $brokenUtf8ToUtf8Keys = null;
2551
    static $brokenUtf8ToUtf8Values = null;
2552 7
2553 2
    $str = (string)$str;
2554
2555
    if (!isset($str[0])) {
2556 7
      return '';
2557 1
    }
2558 1
2559 1
    if ($brokenUtf8ToUtf8Keys === null) {
2560
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2561 7
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2562
    }
2563
2564
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2565
  }
2566
2567
  /**
2568
   * Fix a double (or multiple) encoded UTF8 string.
2569
   *
2570
   * @param string|string[] $str You can use a string or an array of strings.
2571 1
   *
2572
   * @return mixed
2573 1
   */
2574
  public static function fix_utf8($str)
2575 1
  {
2576
    if (is_array($str)) {
2577
2578 1
      foreach ($str as $k => $v) {
2579 1
        /** @noinspection AlterInForeachInspection */
2580
        /** @noinspection OffsetOperationsInspection */
2581 1
        $str[$k] = self::fix_utf8($v);
2582
      }
2583
2584 1
      return $str;
2585 1
    }
2586 1
2587 1
    $last = '';
2588 1
    while ($last !== $str) {
2589
      $last = $str;
2590 1
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2590 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2591
    }
2592
2593
    return $str;
2594
  }
2595
2596
  /**
2597
   * Get character of a specific character.
2598
   *
2599
   * @param   string $char Character.
2600 1
   *
2601
   * @return  string 'RTL' or 'LTR'
2602
   */
2603 1
  public static function getCharDirection($char)
2604
  {
2605 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
2606
      self::checkForSupport();
2607
    }
2608
2609
    if (self::$support['intlChar'] === true) {
2610
      $tmpReturn = \IntlChar::charDirection($char);
2611
2612
      // from "IntlChar"-Class
2613
      $charDirection = array(
2614
          'RTL' => array(1, 13, 14, 15, 21),
2615
          'LTR' => array(0, 11, 12, 20),
2616
      );
2617
2618
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2619
        return 'LTR';
2620
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2621 1
        return 'RTL';
2622
      }
2623 1
    }
2624 1
2625
    $c = static::chr_to_decimal($char);
2626
2627 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2628
      return 'LTR';
2629 1
    }
2630 1
2631 1
    if (0x85e >= $c) {
2632 1
2633 1
      if (0x5be === $c ||
2634 1
          0x5c0 === $c ||
2635 1
          0x5c3 === $c ||
2636 1
          0x5c6 === $c ||
2637 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
2638 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2639 1
          0x608 === $c ||
2640
          0x60b === $c ||
2641
          0x60d === $c ||
2642
          0x61b === $c ||
2643
          (0x61e <= $c && 0x64a >= $c) ||
2644
          (0x66d <= $c && 0x66f >= $c) ||
2645
          (0x671 <= $c && 0x6d5 >= $c) ||
2646
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2647
          (0x6ee <= $c && 0x6ef >= $c) ||
2648
          (0x6fa <= $c && 0x70d >= $c) ||
2649
          0x710 === $c ||
2650
          (0x712 <= $c && 0x72f >= $c) ||
2651
          (0x74d <= $c && 0x7a5 >= $c) ||
2652
          0x7b1 === $c ||
2653
          (0x7c0 <= $c && 0x7ea >= $c) ||
2654
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2655
          0x7fa === $c ||
2656
          (0x800 <= $c && 0x815 >= $c) ||
2657
          0x81a === $c ||
2658
          0x824 === $c ||
2659 1
          0x828 === $c ||
2660 1
          (0x830 <= $c && 0x83e >= $c) ||
2661
          (0x840 <= $c && 0x858 >= $c) ||
2662
          0x85e === $c
2663
      ) {
2664
        return 'RTL';
2665
      }
2666
2667
    } elseif (0x200f === $c) {
2668
2669
      return 'RTL';
2670
2671
    } elseif (0xfb1d <= $c) {
2672
2673
      if (0xfb1d === $c ||
2674
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2675
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2676
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2677
          0xfb3e === $c ||
2678
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2679
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2680
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2681
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2682
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2683
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2684
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2685
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2686
          (0xfe76 <= $c && 0xfefc >= $c) ||
2687
          (0x10800 <= $c && 0x10805 >= $c) ||
2688
          0x10808 === $c ||
2689
          (0x1080a <= $c && 0x10835 >= $c) ||
2690
          (0x10837 <= $c && 0x10838 >= $c) ||
2691
          0x1083c === $c ||
2692
          (0x1083f <= $c && 0x10855 >= $c) ||
2693
          (0x10857 <= $c && 0x1085f >= $c) ||
2694
          (0x10900 <= $c && 0x1091b >= $c) ||
2695
          (0x10920 <= $c && 0x10939 >= $c) ||
2696
          0x1093f === $c ||
2697
          0x10a00 === $c ||
2698
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2699
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2700
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2701
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2702
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2703
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2704
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2705
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2706
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2707
          (0x10b78 <= $c && 0x10b7f >= $c)
2708
      ) {
2709
        return 'RTL';
2710
      }
2711
    }
2712
2713
    return 'LTR';
2714
  }
2715
2716
  /**
2717
   * get data from "/data/*.ser"
2718
   *
2719 1
   * @param string $file
2720
   *
2721 1
   * @return bool|string|array|int false on error
2722 1
   */
2723
  private static function getData($file)
2724 1
  {
2725
    $file = __DIR__ . '/data/' . $file . '.php';
2726
    if (file_exists($file)) {
2727
      /** @noinspection PhpIncludeInspection */
2728
      return require $file;
2729
    } else {
2730
      return false;
2731
    }
2732
  }
2733
2734
  /**
2735
   * Converts hexadecimal U+xxxx code point representation to integer.
2736
   *
2737
   * INFO: opposite to UTF8::int_to_hex()
2738
   *
2739 2
   * @param    string $str The hexadecimal code point representation.
2740
   *
2741 2
   * @return   int|false The code point, or false on failure.
2742 1
   */
2743
  public static function hex_to_int($str)
2744
  {
2745 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2746
      return intval($match[1], 16);
2747
    }
2748
2749
    return false;
2750
  }
2751
2752
  /**
2753
   * alias for "UTF8::html_entity_decode()"
2754
   *
2755
   * @see UTF8::html_entity_decode()
2756
   *
2757
   * @param string $str
2758
   * @param int    $flags
2759 1
   * @param string $encoding
2760
   *
2761 1
   * @return string
2762
   */
2763
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2764
  {
2765
    return self::html_entity_decode($str, $flags, $encoding);
2766
  }
2767
2768
  /**
2769
   * Converts a UTF-8 string to a series of HTML numbered entities.
2770
   *
2771
   * INFO: opposite to UTF8::html_decode()
2772
   *
2773
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2774
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2775 2
   * @param  string $encoding
2776
   *
2777
   * @return string HTML numbered entities.
2778 2
   */
2779
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2780 2
  {
2781 2
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2782 1
    if (function_exists('mb_encode_numericentity')) {
2783 1
2784
      $startCode = 0x00;
2785 2
      if ($keepAsciiChars === true) {
2786
        $startCode = 0x80;
2787 2
      }
2788 2
2789 2
      if ($encoding !== 'UTF-8') {
2790
        $encoding = self::normalize_encoding($encoding);
2791 2
      }
2792
2793
      return mb_encode_numericentity(
2794
          $str,
2795
          array($startCode, 0xffff, 0, 0xffff,),
2796
          $encoding
2797
      );
2798
    }
2799
2800
    return implode(
2801
        array_map(
2802
            function ($data) use ($keepAsciiChars) {
2803
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
2804
            },
2805
            self::split($str)
2806
        )
2807
    );
2808
  }
2809
2810
  /**
2811
   * UTF-8 version of html_entity_decode()
2812
   *
2813
   * The reason we are not using html_entity_decode() by itself is because
2814
   * while it is not technically correct to leave out the semicolon
2815
   * at the end of an entity most browsers will still interpret the entity
2816
   * correctly. html_entity_decode() does not convert entities without
2817
   * semicolons, so we are left with our own little solution here. Bummer.
2818
   *
2819
   * Convert all HTML entities to their applicable characters
2820
   *
2821
   * INFO: opposite to UTF8::html_encode()
2822
   *
2823
   * @link http://php.net/manual/en/function.html-entity-decode.php
2824
   *
2825
   * @param string $str      <p>
2826
   *                         The input string.
2827
   *                         </p>
2828
   * @param int    $flags    [optional] <p>
2829
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2830
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2831
   *                         <table>
2832
   *                         Available <i>flags</i> constants
2833
   *                         <tr valign="top">
2834
   *                         <td>Constant Name</td>
2835
   *                         <td>Description</td>
2836
   *                         </tr>
2837
   *                         <tr valign="top">
2838
   *                         <td><b>ENT_COMPAT</b></td>
2839
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2840
   *                         </tr>
2841
   *                         <tr valign="top">
2842
   *                         <td><b>ENT_QUOTES</b></td>
2843
   *                         <td>Will convert both double and single quotes.</td>
2844
   *                         </tr>
2845
   *                         <tr valign="top">
2846
   *                         <td><b>ENT_NOQUOTES</b></td>
2847
   *                         <td>Will leave both double and single quotes unconverted.</td>
2848
   *                         </tr>
2849
   *                         <tr valign="top">
2850
   *                         <td><b>ENT_HTML401</b></td>
2851
   *                         <td>
2852
   *                         Handle code as HTML 4.01.
2853
   *                         </td>
2854
   *                         </tr>
2855
   *                         <tr valign="top">
2856
   *                         <td><b>ENT_XML1</b></td>
2857
   *                         <td>
2858
   *                         Handle code as XML 1.
2859
   *                         </td>
2860
   *                         </tr>
2861
   *                         <tr valign="top">
2862
   *                         <td><b>ENT_XHTML</b></td>
2863
   *                         <td>
2864
   *                         Handle code as XHTML.
2865
   *                         </td>
2866
   *                         </tr>
2867
   *                         <tr valign="top">
2868
   *                         <td><b>ENT_HTML5</b></td>
2869
   *                         <td>
2870
   *                         Handle code as HTML 5.
2871
   *                         </td>
2872
   *                         </tr>
2873
   *                         </table>
2874
   *                         </p>
2875 17
   * @param string $encoding [optional] <p>
2876
   *                         Encoding to use.
2877 17
   *                         </p>
2878
   *
2879 17
   * @return string the decoded string.
2880 4
   */
2881
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2882
  {
2883 17
    $str = (string)$str;
2884 5
2885
    if (!isset($str[0])) {
2886
      return '';
2887 17
    }
2888
2889 17
    if (strpos($str, '&') === false) {
2890
      return $str;
2891 17
    }
2892 4
2893
    if ($encoding !== 'UTF-8') {
2894
      $encoding = self::normalize_encoding($encoding);
2895 4
    }
2896
2897 4
    if ($flags === null) {
2898
      if (Bootup::is_php('5.4') === true) {
2899
        $flags = ENT_COMPAT | ENT_HTML5;
2900 17
      } else {
2901
        $flags = ENT_COMPAT;
2902
      }
2903 14
    }
2904
2905 14
    do {
2906 14
      $str_compare = $str;
2907
2908 6
      $str = preg_replace_callback(
2909
          "/&#\d{2,5};/",
2910 17
          function ($matches) {
2911
            $returnTmp = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2912
2913 17
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2914 17
              return $returnTmp;
2915 17
            } else {
2916
              return $matches[0];
2917 17
            }
2918
          },
2919 17
          $str
2920
      );
2921 17
2922
      // decode numeric & UTF16 two byte entities
2923
      $str = html_entity_decode(
2924
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2925
          $flags,
2926
          $encoding
2927
      );
2928
2929
    } while ($str_compare !== $str);
2930
2931
    return $str;
2932
  }
2933
2934
  /**
2935
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2936
   *
2937
   * @link http://php.net/manual/en/function.htmlentities.php
2938
   *
2939
   * @param string $str           <p>
2940
   *                              The input string.
2941
   *                              </p>
2942
   * @param int    $flags         [optional] <p>
2943
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2944
   *                              invalid code unit sequences and the used document type. The default is
2945
   *                              ENT_COMPAT | ENT_HTML401.
2946
   *                              <table>
2947
   *                              Available <i>flags</i> constants
2948
   *                              <tr valign="top">
2949
   *                              <td>Constant Name</td>
2950
   *                              <td>Description</td>
2951
   *                              </tr>
2952
   *                              <tr valign="top">
2953
   *                              <td><b>ENT_COMPAT</b></td>
2954
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2955
   *                              </tr>
2956
   *                              <tr valign="top">
2957
   *                              <td><b>ENT_QUOTES</b></td>
2958
   *                              <td>Will convert both double and single quotes.</td>
2959
   *                              </tr>
2960
   *                              <tr valign="top">
2961
   *                              <td><b>ENT_NOQUOTES</b></td>
2962
   *                              <td>Will leave both double and single quotes unconverted.</td>
2963
   *                              </tr>
2964
   *                              <tr valign="top">
2965
   *                              <td><b>ENT_IGNORE</b></td>
2966
   *                              <td>
2967
   *                              Silently discard invalid code unit sequences instead of returning
2968
   *                              an empty string. Using this flag is discouraged as it
2969
   *                              may have security implications.
2970
   *                              </td>
2971
   *                              </tr>
2972
   *                              <tr valign="top">
2973
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2974
   *                              <td>
2975
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2976
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2977
   *                              </td>
2978
   *                              </tr>
2979
   *                              <tr valign="top">
2980
   *                              <td><b>ENT_DISALLOWED</b></td>
2981
   *                              <td>
2982
   *                              Replace invalid code points for the given document type with a
2983
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2984
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2985
   *                              instance, to ensure the well-formedness of XML documents with
2986
   *                              embedded external content.
2987
   *                              </td>
2988
   *                              </tr>
2989
   *                              <tr valign="top">
2990
   *                              <td><b>ENT_HTML401</b></td>
2991
   *                              <td>
2992
   *                              Handle code as HTML 4.01.
2993
   *                              </td>
2994
   *                              </tr>
2995
   *                              <tr valign="top">
2996
   *                              <td><b>ENT_XML1</b></td>
2997
   *                              <td>
2998
   *                              Handle code as XML 1.
2999
   *                              </td>
3000
   *                              </tr>
3001
   *                              <tr valign="top">
3002
   *                              <td><b>ENT_XHTML</b></td>
3003
   *                              <td>
3004
   *                              Handle code as XHTML.
3005
   *                              </td>
3006
   *                              </tr>
3007
   *                              <tr valign="top">
3008
   *                              <td><b>ENT_HTML5</b></td>
3009
   *                              <td>
3010
   *                              Handle code as HTML 5.
3011
   *                              </td>
3012
   *                              </tr>
3013
   *                              </table>
3014
   *                              </p>
3015
   * @param string $encoding      [optional] <p>
3016
   *                              Like <b>htmlspecialchars</b>,
3017
   *                              <b>htmlentities</b> takes an optional third argument
3018
   *                              <i>encoding</i> which defines encoding used in
3019
   *                              conversion.
3020
   *                              Although this argument is technically optional, you are highly
3021
   *                              encouraged to specify the correct value for your code.
3022
   *                              </p>
3023
   * @param bool   $double_encode [optional] <p>
3024
   *                              When <i>double_encode</i> is turned off PHP will not
3025
   *                              encode existing html entities. The default is to convert everything.
3026
   *                              </p>
3027 2
   *
3028
   *
3029 2
   * @return string the encoded string.
3030
   * </p>
3031 2
   * <p>
3032
   * If the input <i>string</i> contains an invalid code unit
3033 2
   * sequence within the given <i>encoding</i> an empty string
3034
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3035
   * <b>ENT_SUBSTITUTE</b> flags are set.
3036
   */
3037 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3038 2
  {
3039 2
    if ($encoding !== 'UTF-8') {
3040 2
      $encoding = self::normalize_encoding($encoding);
3041 2
    }
3042 1
3043
    $str = htmlentities($str, $flags, $encoding, $double_encode);
3044 1
3045 1
    if ($encoding !== 'UTF-8') {
3046 1
      return $str;
3047 1
    }
3048 1
3049 2
    $byteLengths = self::chr_size_list($str);
3050
    $search = array();
3051 2
    $replacements = array();
3052
    foreach ($byteLengths as $counter => $byteLength) {
3053
      if ($byteLength >= 3) {
3054
        $char = self::access($str, $counter);
3055
3056
        if (!isset($replacements[$char])) {
3057
          $search[$char] = $char;
3058
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 3054 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3059
        }
3060
      }
3061
    }
3062
3063
    return str_replace($search, $replacements, $str);
3064
  }
3065
3066
  /**
3067
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3068
   *
3069
   * INFO: Take a look at "UTF8::htmlentities()"
3070
   *
3071
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3072
   *
3073
   * @param string $str           <p>
3074
   *                              The string being converted.
3075
   *                              </p>
3076
   * @param int    $flags         [optional] <p>
3077
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3078
   *                              invalid code unit sequences and the used document type. The default is
3079
   *                              ENT_COMPAT | ENT_HTML401.
3080
   *                              <table>
3081
   *                              Available <i>flags</i> constants
3082
   *                              <tr valign="top">
3083
   *                              <td>Constant Name</td>
3084
   *                              <td>Description</td>
3085
   *                              </tr>
3086
   *                              <tr valign="top">
3087
   *                              <td><b>ENT_COMPAT</b></td>
3088
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3089
   *                              </tr>
3090
   *                              <tr valign="top">
3091
   *                              <td><b>ENT_QUOTES</b></td>
3092
   *                              <td>Will convert both double and single quotes.</td>
3093
   *                              </tr>
3094
   *                              <tr valign="top">
3095
   *                              <td><b>ENT_NOQUOTES</b></td>
3096
   *                              <td>Will leave both double and single quotes unconverted.</td>
3097
   *                              </tr>
3098
   *                              <tr valign="top">
3099
   *                              <td><b>ENT_IGNORE</b></td>
3100
   *                              <td>
3101
   *                              Silently discard invalid code unit sequences instead of returning
3102
   *                              an empty string. Using this flag is discouraged as it
3103
   *                              may have security implications.
3104
   *                              </td>
3105
   *                              </tr>
3106
   *                              <tr valign="top">
3107
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3108
   *                              <td>
3109
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3110
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3111
   *                              </td>
3112
   *                              </tr>
3113
   *                              <tr valign="top">
3114
   *                              <td><b>ENT_DISALLOWED</b></td>
3115
   *                              <td>
3116
   *                              Replace invalid code points for the given document type with a
3117
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3118
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3119
   *                              instance, to ensure the well-formedness of XML documents with
3120
   *                              embedded external content.
3121
   *                              </td>
3122
   *                              </tr>
3123
   *                              <tr valign="top">
3124
   *                              <td><b>ENT_HTML401</b></td>
3125
   *                              <td>
3126
   *                              Handle code as HTML 4.01.
3127
   *                              </td>
3128
   *                              </tr>
3129
   *                              <tr valign="top">
3130
   *                              <td><b>ENT_XML1</b></td>
3131
   *                              <td>
3132
   *                              Handle code as XML 1.
3133
   *                              </td>
3134
   *                              </tr>
3135
   *                              <tr valign="top">
3136
   *                              <td><b>ENT_XHTML</b></td>
3137
   *                              <td>
3138
   *                              Handle code as XHTML.
3139
   *                              </td>
3140
   *                              </tr>
3141
   *                              <tr valign="top">
3142
   *                              <td><b>ENT_HTML5</b></td>
3143
   *                              <td>
3144
   *                              Handle code as HTML 5.
3145
   *                              </td>
3146
   *                              </tr>
3147
   *                              </table>
3148
   *                              </p>
3149
   * @param string $encoding      [optional] <p>
3150
   *                              Defines encoding used in conversion.
3151
   *                              </p>
3152
   *                              <p>
3153
   *                              For the purposes of this function, the encodings
3154
   *                              ISO-8859-1, ISO-8859-15,
3155
   *                              UTF-8, cp866,
3156
   *                              cp1251, cp1252, and
3157
   *                              KOI8-R are effectively equivalent, provided the
3158
   *                              <i>string</i> itself is valid for the encoding, as
3159
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3160
   *                              the same positions in all of these encodings.
3161
   *                              </p>
3162
   * @param bool   $double_encode [optional] <p>
3163 1
   *                              When <i>double_encode</i> is turned off PHP will not
3164
   *                              encode existing html entities, the default is to convert everything.
3165 1
   *                              </p>
3166
   *
3167 1
   * @return string The converted string.
3168
   * </p>
3169
   * <p>
3170
   * If the input <i>string</i> contains an invalid code unit
3171
   * sequence within the given <i>encoding</i> an empty string
3172
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3173
   * <b>ENT_SUBSTITUTE</b> flags are set.
3174
   */
3175 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3176
  {
3177 1
    if ($encoding !== 'UTF-8') {
3178
      $encoding = self::normalize_encoding($encoding);
3179
    }
3180
3181
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3182
  }
3183
3184
  /**
3185
   * checks whether iconv is available on the server
3186
   *
3187
   * @return   bool True if available, False otherwise
3188
   */
3189
  public static function iconv_loaded()
3190 2
  {
3191
    return extension_loaded('iconv') ? true : false;
3192 2
  }
3193 2
3194
  /**
3195 2
   * Converts Integer to hexadecimal U+xxxx code point representation.
3196
   *
3197 2
   * INFO: opposite to UTF8::hex_to_int()
3198
   *
3199
   * @param    int    $int The integer to be converted to hexadecimal code point.
3200
   * @param    string $pfix
3201
   *
3202
   * @return   string The code point, or empty string on failure.
3203
   */
3204
  public static function int_to_hex($int, $pfix = 'U+')
3205
  {
3206
    if (ctype_digit((string)$int)) {
3207
      $hex = dechex((int)$int);
3208 1
3209
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3210 1
3211
      return $pfix . $hex;
3212
    }
3213
3214
    return '';
3215
  }
3216
3217
  /**
3218 3
   * checks whether intl-char is available on the server
3219
   *
3220 3
   * @return   bool True if available, False otherwise
3221
   */
3222
  public static function intlChar_loaded()
3223
  {
3224
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3225
  }
3226
3227
  /**
3228
   * checks whether intl is available on the server
3229
   *
3230
   * @return   bool True if available, False otherwise
3231
   */
3232 2
  public static function intl_loaded()
3233
  {
3234 2
    return extension_loaded('intl') ? true : false;
3235
  }
3236
3237
  /**
3238
   * alias for "UTF8::is_ascii()"
3239
   *
3240
   * @see UTF8::is_ascii()
3241
   *
3242
   * @param string $str
3243
   *
3244
   * @return boolean
3245
   */
3246 1
  public static function isAscii($str)
3247
  {
3248 1
    return self::is_ascii($str);
3249
  }
3250
3251
  /**
3252
   * alias for "UTF8::is_base64()"
3253
   *
3254
   * @see UTF8::is_base64()
3255
   *
3256
   * @param string $str
3257
   *
3258
   * @return bool
3259
   */
3260
  public static function isBase64($str)
3261
  {
3262
    return self::is_base64($str);
3263
  }
3264
3265
  /**
3266
   * alias for "UTF8::is_binary()"
3267
   *
3268
   * @see UTF8::is_binary()
3269
   *
3270
   * @param string $str
3271
   *
3272
   * @return bool
3273
   */
3274
  public static function isBinary($str)
3275
  {
3276
    return self::is_binary($str);
3277
  }
3278
3279
  /**
3280
   * alias for "UTF8::is_bom()"
3281
   *
3282
   * @see UTF8::is_bom()
3283
   *
3284
   * @param string $utf8_chr
3285
   *
3286
   * @return boolean
3287
   */
3288 1
  public static function isBom($utf8_chr)
3289
  {
3290 1
    return self::is_bom($utf8_chr);
3291
  }
3292
3293
  /**
3294
   * alias for "UTF8::is_html()"
3295
   *
3296
   * @see UTF8::is_html()
3297
   *
3298
   * @param string $str
3299
   *
3300
   * @return boolean
3301
   */
3302
  public static function isHtml($str)
3303
  {
3304
    return self::is_html($str);
3305
  }
3306
3307
  /**
3308
   * alias for "UTF8::is_json()"
3309
   *
3310
   * @see UTF8::is_json()
3311
   *
3312
   * @param string $str
3313
   *
3314
   * @return bool
3315
   */
3316 1
  public static function isJson($str)
3317
  {
3318 1
    return self::is_json($str);
3319
  }
3320
3321
  /**
3322
   * alias for "UTF8::is_utf16()"
3323
   *
3324
   * @see UTF8::is_utf16()
3325
   *
3326
   * @param string $str
3327
   *
3328
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3329
   */
3330 1
  public static function isUtf16($str)
3331
  {
3332 1
    return self::is_utf16($str);
3333
  }
3334
3335
  /**
3336
   * alias for "UTF8::is_utf32()"
3337
   *
3338
   * @see UTF8::is_utf32()
3339
   *
3340
   * @param string $str
3341
   *
3342
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3343
   */
3344
  public static function isUtf32($str)
3345 16
  {
3346
    return self::is_utf32($str);
3347 16
  }
3348
3349
  /**
3350
   * alias for "UTF8::is_utf8()"
3351
   *
3352
   * @see UTF8::is_utf8()
3353
   *
3354
   * @param string $str
3355
   * @param  bool  $strict
3356
   *
3357
   * @return bool
3358 14
   */
3359
  public static function isUtf8($str, $strict = false)
3360 14
  {
3361
    return self::is_utf8($str, $strict);
3362
  }
3363
3364
  /**
3365
   * Checks if a string is 7 bit ASCII.
3366
   *
3367
   * @param    string $str The string to check.
3368
   *
3369
   * @return   bool <strong>true</strong> if it is ASCII<br />
3370 1
   *                <strong>false</strong> otherwise
3371
   */
3372 1
  public static function is_ascii($str)
3373
  {
3374 1
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3375 1
  }
3376
3377
  /**
3378 1
   * Returns true if the string is base64 encoded, false otherwise.
3379 1
   *
3380
   * @param string $str
3381 1
   *
3382
   * @return bool Whether or not $str is base64 encoded
3383
   */
3384
  public static function is_base64($str)
3385
  {
3386
    $str = (string)$str;
3387
3388
    if (!isset($str[0])) {
3389
      return false;
3390
    }
3391
3392 16
    if (base64_encode(base64_decode($str, true)) === $str) {
3393
      return true;
3394
    } else {
3395 16
      return false;
3396
    }
3397
  }
3398 16
3399
  /**
3400 16
   * Check if the input is binary... (is look like a hack).
3401 16
   *
3402 15
   * @param mixed $input
3403 16
   *
3404 6
   * @return bool
3405
   */
3406 15
  public static function is_binary($input)
3407
  {
3408
3409
    $testLength = strlen($input);
3410
3411
    if (
3412
        preg_match('~^[01]+$~', $input)
3413
        ||
3414
        substr_count($input, "\x00") > 0
3415
        ||
3416
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3417
    ) {
3418
      return true;
3419
    } else {
3420
      return false;
3421
    }
3422
  }
3423
3424
  /**
3425
   * Check if the file is binary.
3426
   *
3427
   * @param string $file
3428
   *
3429
   * @return boolean
3430
   */
3431
  public static function is_binary_file($file)
3432
  {
3433
    try {
3434
      $fp = fopen($file, 'r');
3435
      $block = fread($fp, 512);
3436
      fclose($fp);
3437
    } catch (\Exception $e) {
3438
      $block = '';
3439
    }
3440
3441
    return self::is_binary($block);
3442
  }
3443
3444
  /**
3445
   * Checks if the given string is equal to any "Byte Order Mark".
3446
   *
3447
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3448
   *
3449
   * @param    string $str The input string.
3450
   *
3451
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3452
   */
3453
  public static function is_bom($str)
3454
  {
3455
    foreach (self::$bom as $bomString => $bomByteLength) {
3456
      if ($str === $bomString) {
3457 1
        return true;
3458
      }
3459 1
    }
3460
3461 1
    return false;
3462
  }
3463
3464
  /**
3465
   * Check if the string contains any html-tags <lall>.
3466 1
   *
3467
   * @param string $str
3468 1
   *
3469
   * @return boolean
3470 1
   */
3471 1
  public static function is_html($str)
3472
  {
3473 1
    $str = (string)$str;
3474
3475
    if (!isset($str[0])) {
3476
      return false;
3477
    }
3478
3479
    // init
3480
    $matches = array();
3481
3482
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3483
3484 1
    if (count($matches) == 0) {
3485
      return false;
3486 1
    } else {
3487
      return true;
3488 1
    }
3489
  }
3490
3491
  /**
3492
   * Try to check if "$str" is an json-string.
3493 1
   *
3494 1
   * @param string $str
3495 1
   *
3496 1
   * @return bool
3497 1
   */
3498
  public static function is_json($str)
3499 1
  {
3500
    $str = (string)$str;
3501
3502
    if (!isset($str[0])) {
3503
      return false;
3504
    }
3505
3506
    if (
3507
        is_object(self::json_decode($str))
3508
        &&
3509
        json_last_error() === JSON_ERROR_NONE
3510 4
    ) {
3511
      return true;
3512 4
    } else {
3513
      return false;
3514 4
    }
3515 4
  }
3516
3517 4
  /**
3518 4
   * Check if the string is UTF-16.
3519 4
   *
3520 4
   * @param string $str
3521 4
   *
3522 4
   * @return int|false false if is't not UTF-16, 1 for UTF-16LE, 2 for UTF-16BE.
3523 4
   */
3524 4 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3525 4
  {
3526 2
    $str = self::remove_bom($str);
3527 2
3528 4
    if (self::is_binary($str)) {
3529 4
3530 4
      $maybeUTF16LE = 0;
3531
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3532 4
      if ($test) {
3533 4
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3534 4
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3535 4
        if ($test3 === $test) {
3536 4
          $strChars = self::count_chars($str, true);
3537 4
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3538 4
            if (in_array($test3char, $strChars, true) === true) {
3539 4
              $maybeUTF16LE++;
3540 4
            }
3541 3
          }
3542 3
        }
3543 4
      }
3544 4
3545 4
      $maybeUTF16BE = 0;
3546
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3547 4
      if ($test) {
3548 3
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3549 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3550
        if ($test3 === $test) {
3551 3
          $strChars = self::count_chars($str, true);
3552
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3553
            if (in_array($test3char, $strChars, true) === true) {
3554
              $maybeUTF16BE++;
3555 3
            }
3556
          }
3557 3
        }
3558
      }
3559
3560
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3561
        if ($maybeUTF16LE > $maybeUTF16BE) {
3562
          return 1;
3563
        } else {
3564
          return 2;
3565
        }
3566
      }
3567 3
3568
    }
3569 3
3570
    return false;
3571 3
  }
3572 3
3573
  /**
3574 3
   * Check if the string is UTF-32.
3575 3
   *
3576 3
   * @param string $str
3577 2
   *
3578 2
   * @return int|false false if is't not UTF-16, 1 for UTF-32LE, 2 for UTF-32BE.
3579 2
   */
3580 2 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3581 2
  {
3582 2
    $str = self::remove_bom($str);
3583 1
3584 1
    if (self::is_binary($str)) {
3585 2
3586 2
      $maybeUTF32LE = 0;
3587 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3588
      if ($test) {
3589 3
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3590 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3591 3
        if ($test3 === $test) {
3592 2
          $strChars = self::count_chars($str, true);
3593 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3594 2
            if (in_array($test3char, $strChars, true) === true) {
3595 2
              $maybeUTF32LE++;
3596 2
            }
3597 2
          }
3598 1
        }
3599 1
      }
3600 2
3601 2
      $maybeUTF32BE = 0;
3602 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3603
      if ($test) {
3604 3
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3605 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3606 1
        if ($test3 === $test) {
3607
          $strChars = self::count_chars($str, true);
3608 1
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3609
            if (in_array($test3char, $strChars, true) === true) {
3610
              $maybeUTF32BE++;
3611
            }
3612 3
          }
3613
        }
3614 3
      }
3615
3616
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3617
        if ($maybeUTF32LE > $maybeUTF32BE) {
3618
          return 1;
3619
        } else {
3620
          return 2;
3621
        }
3622
      }
3623
3624
    }
3625
3626
    return false;
3627 43
  }
3628
3629 43
  /**
3630
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3631 43
   *
3632 3
   * @see    http://hsivonen.iki.fi/php-utf8/
3633
   *
3634
   * @param  string $str    The string to be checked.
3635 41
   * @param  bool   $strict Check also if the string is not UTF-16 or UTF-32.
3636 1
   *
3637 1
   * @return bool
3638
   */
3639
  public static function is_utf8($str, $strict = false)
3640
  {
3641
    $str = (string)$str;
3642
3643
    if (!isset($str[0])) {
3644
      return true;
3645 41
    }
3646
3647
    if ($strict === true) {
3648
      if (self::is_utf16($str) !== false) {
3649
        return false;
3650
      }
3651
3652
      if (self::is_utf32($str) !== false) {
3653
        return false;
3654
      }
3655 41
    }
3656
3657 41
    if (self::pcre_utf8_support() !== true) {
3658 41
3659 41
      // If even just the first character can be matched, when the /u
3660
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3661
      // invalid, nothing at all will match, even if the string contains
3662 41
      // some valid sequences
3663 41
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3664 41
3665
    } else {
3666
3667 41
      $mState = 0; // cached expected number of octets after the current octet
3668
      // until the beginning of the next UTF8 character sequence
3669 36
      $mUcs4 = 0; // cached Unicode character
3670 41
      $mBytes = 1; // cached expected number of octets in the current sequence
3671
      $len = strlen($str);
3672 34
3673 34
      /** @noinspection ForeachInvariantsInspection */
3674 34
      for ($i = 0; $i < $len; $i++) {
3675 34
        $in = ord($str[$i]);
3676 39
        if ($mState === 0) {
3677
          // When mState is zero we expect either a US-ASCII character or a
3678 21
          // multi-octet sequence.
3679 21
          if (0 === (0x80 & $in)) {
3680 21
            // US-ASCII, pass straight through.
3681 21
            $mBytes = 1;
3682 33 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3683
            // First octet of 2 octet sequence.
3684 9
            $mUcs4 = $in;
3685 9
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3686 9
            $mState = 1;
3687 9
            $mBytes = 2;
3688 16
          } elseif (0xE0 === (0xF0 & $in)) {
3689
            // First octet of 3 octet sequence.
3690
            $mUcs4 = $in;
3691
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3692
            $mState = 2;
3693
            $mBytes = 3;
3694 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3695
            // First octet of 4 octet sequence.
3696
            $mUcs4 = $in;
3697 3
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3698 3
            $mState = 3;
3699 3
            $mBytes = 4;
3700 3
          } elseif (0xF8 === (0xFC & $in)) {
3701 9
            /* First octet of 5 octet sequence.
3702
            *
3703 3
            * This is illegal because the encoded codepoint must be either
3704 3
            * (a) not the shortest form or
3705 3
            * (b) outside the Unicode range of 0-0x10FFFF.
3706 3
            * Rather than trying to resynchronize, we will carry on until the end
3707 3
            * of the sequence and let the later error handling code catch it.
3708
            */
3709
            $mUcs4 = $in;
3710
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3711 5
            $mState = 4;
3712
            $mBytes = 5;
3713 41 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3714
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3715
            $mUcs4 = $in;
3716 36
            $mUcs4 = ($mUcs4 & 1) << 30;
3717
            $mState = 5;
3718 33
            $mBytes = 6;
3719 33
          } else {
3720 33
            /* Current octet is neither in the US-ASCII range nor a legal first
3721 33
             * octet of a multi-octet sequence.
3722
             */
3723
            return false;
3724
          }
3725
        } else {
3726 33
          // When mState is non-zero, we expect a continuation of the multi-octet
3727
          // sequence
3728
          if (0x80 === (0xC0 & $in)) {
3729
            // Legal continuation.
3730
            $shift = ($mState - 1) * 6;
3731
            $tmp = $in;
3732 33
            $tmp = ($tmp & 0x0000003F) << $shift;
3733 33
            $mUcs4 |= $tmp;
3734 33
            /**
3735 33
             * End of the multi-octet sequence. mUcs4 now contains the final
3736
             * Unicode code point to be output
3737 33
             */
3738
            if (0 === --$mState) {
3739 33
              /*
3740 33
              * Check for illegal sequences and code points.
3741 5
              */
3742
              // From Unicode 3.1, non-shortest form is illegal
3743
              if (
3744 33
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3745 33
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3746 33
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3747 33
                  (4 < $mBytes) ||
3748 33
                  // From Unicode 3.2, surrogate characters are illegal.
3749
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3750
                  // Code points outside the Unicode range are illegal.
3751
                  ($mUcs4 > 0x10FFFF)
3752
              ) {
3753 18
                return false;
3754
              }
3755
              // initialize UTF8 cache
3756 41
              $mState = 0;
3757
              $mUcs4 = 0;
3758 20
              $mBytes = 1;
3759
            }
3760
          } else {
3761
            /**
3762
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3763
             * Incomplete multi-octet sequence.
3764
             */
3765
            return false;
3766
          }
3767
        }
3768
      }
3769
3770
      return true;
3771
    }
3772
  }
3773
3774
  /**
3775
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3776
   * Decodes a JSON string
3777
   *
3778
   * @link http://php.net/manual/en/function.json-decode.php
3779
   *
3780
   * @param string $json    <p>
3781
   *                        The <i>json</i> string being decoded.
3782
   *                        </p>
3783
   *                        <p>
3784
   *                        This function only works with UTF-8 encoded strings.
3785
   *                        </p>
3786
   *                        <p>PHP implements a superset of
3787
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3788
   *                        only supports these values when they are nested inside an array or an object.
3789
   *                        </p>
3790
   * @param bool   $assoc   [optional] <p>
3791
   *                        When <b>TRUE</b>, returned objects will be converted into
3792
   *                        associative arrays.
3793
   *                        </p>
3794
   * @param int    $depth   [optional] <p>
3795
   *                        User specified recursion depth.
3796
   *                        </p>
3797
   * @param int    $options [optional] <p>
3798 2
   *                        Bitmask of JSON decode options. Currently only
3799
   *                        <b>JSON_BIGINT_AS_STRING</b>
3800 2
   *                        is supported (default is to cast large integers as floats)
3801
   *                        </p>
3802 2
   *
3803
   * @return mixed the value encoded in <i>json</i> in appropriate
3804
   * PHP type. Values true, false and
3805 2
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3806
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3807
   * <i>json</i> cannot be decoded or if the encoded
3808 2
   * data is deeper than the recursion limit.
3809
   */
3810
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3811
  {
3812
    $json = self::filter($json);
3813
3814
    if (Bootup::is_php('5.4') === true) {
3815
      $json = json_decode($json, $assoc, $depth, $options);
3816
    } else {
3817
      $json = json_decode($json, $assoc, $depth);
3818
    }
3819
3820
    return $json;
3821
  }
3822
3823
  /**
3824
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3825
   * Returns the JSON representation of a value.
3826
   *
3827
   * @link http://php.net/manual/en/function.json-encode.php
3828
   *
3829
   * @param mixed $value   <p>
3830
   *                       The <i>value</i> being encoded. Can be any type except
3831
   *                       a resource.
3832
   *                       </p>
3833
   *                       <p>
3834
   *                       All string data must be UTF-8 encoded.
3835
   *                       </p>
3836
   *                       <p>PHP implements a superset of
3837
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3838
   *                       only supports these values when they are nested inside an array or an object.
3839
   *                       </p>
3840
   * @param int   $options [optional] <p>
3841
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3842
   *                       <b>JSON_HEX_TAG</b>,
3843
   *                       <b>JSON_HEX_AMP</b>,
3844
   *                       <b>JSON_HEX_APOS</b>,
3845
   *                       <b>JSON_NUMERIC_CHECK</b>,
3846
   *                       <b>JSON_PRETTY_PRINT</b>,
3847 2
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3848
   *                       <b>JSON_FORCE_OBJECT</b>,
3849 2
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3850
   *                       constants is described on
3851 2
   *                       the JSON constants page.
3852
   *                       </p>
3853
   * @param int   $depth   [optional] <p>
3854 2
   *                       Set the maximum depth. Must be greater than zero.
3855
   *                       </p>
3856
   *
3857 2
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3858
   */
3859
  public static function json_encode($value, $options = 0, $depth = 512)
3860
  {
3861
    $value = self::filter($value);
3862
3863
    if (Bootup::is_php('5.5')) {
3864
      $json = json_encode($value, $options, $depth);
3865
    } else {
3866
      $json = json_encode($value, $options);
3867 6
    }
3868
3869 6
    return $json;
3870
  }
3871
3872
  /**
3873
   * Makes string's first char lowercase.
3874
   *
3875
   * @param    string $str The input string
3876
   *
3877
   * @return   string The resulting string
3878
   */
3879
  public static function lcfirst($str)
3880 24
  {
3881
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3882 24
  }
3883
3884 24
  /**
3885 2
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3886
   *
3887
   * @param  string $str   The string to be trimmed
3888
   * @param  string $chars Optional characters to be stripped
3889 23
   *
3890 2
   * @return string The string with unwanted characters stripped from the left
3891
   */
3892 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3893 23
  {
3894
    $str = (string)$str;
3895 23
3896
    if (!isset($str[0])) {
3897
      return '';
3898
    }
3899
3900
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3901
    if ($chars === INF || !$chars) {
3902
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3903
    }
3904
3905 1
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3906
3907 1
    return preg_replace("/^{$chars}+/u", '', $str);
3908
  }
3909
3910
  /**
3911 1
   * Returns the UTF-8 character with the maximum code point in the given data.
3912
   *
3913
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3914
   *
3915
   * @return   string The character with the highest code point than others.
3916
   */
3917 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3918
  {
3919
    if (is_array($arg)) {
3920
      $arg = implode($arg);
3921
    }
3922 1
3923
    return self::chr(max(self::codepoints($arg)));
3924 1
  }
3925 1
3926 1
  /**
3927
   * Calculates and returns the maximum number of bytes taken by any
3928 1
   * UTF-8 encoded character in the given string.
3929
   *
3930
   * @param  string $str The original Unicode string.
3931
   *
3932
   * @return int Max byte lengths of the given chars.
3933
   */
3934
  public static function max_chr_width($str)
3935
  {
3936
    $bytes = self::chr_size_list($str);
3937 2
    if (count($bytes) > 0) {
3938
      return (int)max($bytes);
3939 2
    } else {
3940
      return 0;
3941 2
    }
3942 2
  }
3943 2
3944
  /**
3945 2
   * checks whether mbstring is available on the server
3946
   *
3947
   * @return   bool True if available, False otherwise
3948
   */
3949
  public static function mbstring_loaded()
3950
  {
3951
    $return = extension_loaded('mbstring');
3952
3953
    if ($return === true) {
3954
      \mb_internal_encoding('UTF-8');
3955 1
    }
3956
3957 1
    return $return;
3958
  }
3959
3960
  /**
3961 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3962
   *
3963
   * @param  mixed $arg A UTF-8 encoded string or an array of such strings.
3964
   *
3965
   * @return string The character with the lowest code point than others.
3966
   */
3967 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3968
  {
3969
    if (is_array($arg)) {
3970
      $arg = implode($arg);
3971
    }
3972
3973 125
    return self::chr(min(self::codepoints($arg)));
3974
  }
3975 125
3976
  /**
3977
   * alias for "UTF8::normalize_encoding()"
3978
   *
3979
   * @see UTF8::normalize_encoding()
3980
   *
3981
   * @param string $encoding
3982
   *
3983
   * @return string
3984
   */
3985 125
  public static function normalizeEncoding($encoding)
3986
  {
3987 125
    return self::normalize_encoding($encoding);
3988
  }
3989 125
3990 1
  /**
3991
   * Normalize the encoding-"name" input.
3992
   *
3993 125
   * @param  string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3994 125
   *
3995
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3996
   */
3997 2
  public static function normalize_encoding($encoding)
3998 2
  {
3999
    static $staticNormalizeEncodingCache = array();
4000
4001 2
    if (!$encoding) {
4002 2
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
4003
    }
4004
4005 2
    if ('UTF-8' === $encoding) {
4006 2
      return $encoding;
4007 2
    }
4008
4009
    if (in_array($encoding, self::$iconvEncoding, true)) {
4010 2
      return $encoding;
4011 2
    }
4012 2
4013 2
    if (isset($staticNormalizeEncodingCache[$encoding])) {
4014 2
      return $staticNormalizeEncodingCache[$encoding];
4015 2
    }
4016 2
4017 2
    $encodingOrig = $encoding;
4018 2
    $encoding = strtoupper($encoding);
4019 2
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4020 2
4021 2
    $equivalences = array(
4022 2
        'ISO88591'    => 'ISO-8859-1',
4023 2
        'ISO8859'     => 'ISO-8859-1',
4024 2
        'ISO'         => 'ISO-8859-1',
4025
        'LATIN1'      => 'ISO-8859-1',
4026 2
        'LATIN'       => 'ISO-8859-1',
4027 2
        'WIN1252'     => 'ISO-8859-1',
4028 2
        'WINDOWS1252' => 'ISO-8859-1',
4029
        'UTF16'       => 'UTF-16',
4030 2
        'UTF32'       => 'UTF-32',
4031
        'UTF8'        => 'UTF-8',
4032 2
        'UTF'         => 'UTF-8',
4033
        'UTF7'        => 'UTF-7',
4034
        '8BIT'        => 'CP850',
4035
        'BINARY'      => 'CP850',
4036
    );
4037
4038
    if (!empty($equivalences[$encodingUpperHelper])) {
4039
      $encoding = $equivalences[$encodingUpperHelper];
4040
    }
4041
4042 2
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
4043
4044 2
    return $encoding;
4045 2
  }
4046
4047 2
  /**
4048 1
   * Normalize some MS Word special characters.
4049 1
   *
4050 1
   * @param string $str The string to be normalized.
4051
   *
4052 2
   * @return string
4053
   */
4054
  public static function normalize_msword($str)
4055
  {
4056
    static $utf8MSWordKeys = null;
4057
    static $utf8MSWordValues = null;
4058
4059
    if ($utf8MSWordKeys === null) {
4060
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
4061
      $utf8MSWordValues = array_values(self::$utf8MSWord);
4062
    }
4063
4064 7
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
4065
  }
4066 7
4067 7
  /**
4068
   * Normalize the whitespace.
4069 7
   *
4070
   * @param string $str                     The string to be normalized.
4071 7
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
4072
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
4073 2
   *
4074
   * @return string
4075 2
   */
4076
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
4077 1
  {
4078 1
    static $whitespaces = array();
4079
    static $bidiUniCodeControls = null;
4080 2
4081 2
    $cacheKey = (int)$keepNonBreakingSpace;
4082
4083 7
    if (!isset($whitespaces[$cacheKey])) {
4084 7
4085 1
      $whitespaces[$cacheKey] = self::$whitespaceTable;
4086 1
4087
      if ($keepNonBreakingSpace === true) {
4088 7
        /** @noinspection OffsetOperationsInspection */
4089 7
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
4090
      }
4091 7
4092
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
4093
    }
4094
4095
    if ($keepBidiUnicodeControls === false) {
4096
      if ($bidiUniCodeControls === null) {
4097
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
4098
      }
4099
4100
      $str = str_replace($bidiUniCodeControls, '', $str);
4101
    }
4102
4103
    return str_replace($whitespaces[$cacheKey], ' ', $str);
4104
  }
4105
4106
  /**
4107
   * Format a number with grouped thousands.
4108
   *
4109
   * @param float  $number
4110
   * @param int    $decimals
4111
   * @param string $dec_point
4112
   * @param string $thousands_sep
4113
   *
4114
   * @deprecated
4115
   *
4116
   * @return string
4117
   */
4118
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4119
  {
4120
    $thousands_sep = (string)$thousands_sep;
4121
    $dec_point = (string)$dec_point;
4122
4123
    if (
4124
        isset($thousands_sep[1], $dec_point[1])
4125
        &&
4126
        Bootup::is_php('5.4') === true
4127
    ) {
4128
      return str_replace(
4129
          array(
4130
              '.',
4131
              ',',
4132
          ),
4133
          array(
4134
              $dec_point,
4135
              $thousands_sep,
4136
          ),
4137
          number_format($number, $decimals, '.', ',')
4138
      );
4139
    }
4140
4141
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4142 16
  }
4143
4144 16
  /**
4145 2
   * Calculates Unicode code point of the given UTF-8 encoded character.
4146
   *
4147
   * INFO: opposite to UTF8::chr()
4148
   *
4149 15
   * @param  string $chr The character of which to calculate code point.
4150
   *
4151 15
   * @return int Unicode code point of the given character,<br />
4152
   *         0 on invalid UTF-8 byte sequence.
4153
   */
4154
  public static function ord($chr)
4155
  {
4156
    if (!$chr && $chr !== '0') {
4157
      return 0;
4158 15
    }
4159 15
4160
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4161 15
      self::checkForSupport();
4162 3
    }
4163
4164
    if (self::$support['intlChar'] === true) {
4165 14
      $tmpReturn = \IntlChar::ord($chr);
4166 9
      if ($tmpReturn) {
4167
        return $tmpReturn;
4168
      }
4169 12
    }
4170 9
4171
    $chr = unpack('C*', substr($chr, 0, 4));
4172
    $a = $chr ? $chr[1] : 0;
4173 11
4174
    if (0xF0 <= $a && isset($chr[4])) {
4175
      return (($a - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
4176
    }
4177
4178
    if (0xE0 <= $a && isset($chr[3])) {
4179
      return (($a - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
4180
    }
4181
4182
    if (0xC0 <= $a && isset($chr[2])) {
4183
      return (($a - 0xC0) << 6) + $chr[2] - 0x80;
4184
    }
4185
4186
    return $a;
4187
  }
4188
4189
  /**
4190
   * Parses the string into an array (into the the second parameter).
4191
   *
4192
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4193 1
   *          if the second parameter is not set!
4194
   *
4195
   * @link http://php.net/manual/en/function.parse-str.php
4196 1
   *
4197
   * @param string $str     <p>
4198 1
   *                        The input string.
4199
   *                        </p>
4200 1
   * @param array  $result  <p>
4201 1
   *                        The result will be returned into this reference parameter.
4202 1
   *                        </p>
4203
   *
4204
   * @return bool will return false if php can't parse the string and we haven't any $result
4205 1
   */
4206
  public static function parse_str($str, &$result)
4207
  {
4208
    // init
4209
    $str = self::clean($str);
4210
4211
    $return = \mb_parse_str($str, $result);
4212
    if ($return === false || empty($result)) {
4213 41
      return false;
4214
    }
4215
4216 41
    return true;
4217
  }
4218
4219
  /**
4220
   * checks if \u modifier is available that enables Unicode support in PCRE.
4221
   *
4222
   * @return   bool True if support is available, false otherwise
4223
   */
4224
  public static function pcre_utf8_support()
4225
  {
4226
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4227 1
    return (bool)@preg_match('//u', '');
4228
  }
4229 1
4230 1
  /**
4231
   * Create an array containing a range of UTF-8 characters.
4232
   *
4233 1
   * @param  mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4234 1
   * @param  mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4235 1
   *
4236
   * @return array
4237
   */
4238 1
  public static function range($var1, $var2)
4239
  {
4240
    if (!$var1 || !$var2) {
4241 1
      return array();
4242
    }
4243
4244 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4245 1
      $start = (int)$var1;
4246 1
    } elseif (ctype_xdigit($var1)) {
4247 1
      $start = (int)self::hex_to_int($var1);
4248
    } else {
4249
      $start = self::ord($var1);
4250 1
    }
4251
4252
    if (!$start) {
4253 1
      return array();
4254
    }
4255
4256 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4257 1
      $end = (int)$var2;
4258
    } elseif (ctype_xdigit($var2)) {
4259 1
      $end = (int)self::hex_to_int($var2);
4260 1
    } else {
4261 1
      $end = self::ord($var2);
4262 1
    }
4263 1
4264
    if (!$end) {
4265
      return array();
4266
    }
4267
4268
    return array_map(
4269
        array(
4270
            '\\voku\\helper\\UTF8',
4271
            'chr',
4272
        ),
4273 10
        range($start, $end)
4274
    );
4275 10
  }
4276 10
4277 5
  /**
4278 5
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4279 10
   *
4280
   * @param string $str
4281 10
   *
4282
   * @return string
4283
   */
4284
  public static function remove_bom($str)
4285
  {
4286
    foreach (self::$bom as $bomString => $bomByteLength) {
4287
      if (0 === strpos($str, $bomString)) {
4288
        $str = substr($str, $bomByteLength);
4289
      }
4290
    }
4291
4292
    return $str;
4293 5
  }
4294
4295 5
  /**
4296
   * alias for "UTF8::remove_bom()"
4297
   *
4298
   * @see UTF8::remove_bom()
4299
   *
4300
   * @param string $str
4301
   *
4302
   * @return string
4303
   */
4304
  public static function removeBOM($str)
4305
  {
4306 1
    return self::remove_bom($str);
4307
  }
4308 1
4309 1
  /**
4310 1
   * Removes duplicate occurrences of a string in another string.
4311
   *
4312 1
   * @param    string       $str  The base string
4313 1
   * @param    string|array $what String to search for in the base string
4314 1
   *
4315 1
   * @return   string The result string with removed duplicates
4316 1
   */
4317
  public static function remove_duplicates($str, $what = ' ')
4318 1
  {
4319
    if (is_string($what)) {
4320
      $what = array($what);
4321
    }
4322
4323
    if (is_array($what)) {
4324
      foreach ($what as $item) {
4325
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4326
      }
4327
    }
4328
4329
    return $str;
4330
  }
4331
4332
  /**
4333
   * Remove invisible characters from a string.
4334 42
   *
4335
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4336
   *
4337 42
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4338
   *
4339
   * @param  string $str
4340
   * @param  bool   $url_encoded
4341 42
   * @param  string $replacement
4342 42
   *
4343 42
   * @return  string
4344 42
   */
4345
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4346 42
  {
4347
    // init
4348
    $non_displayables = array();
4349 42
4350 42
    // every control character except newline (dec 10),
4351
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4352 42
    if ($url_encoded) {
4353
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4354
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4355
    }
4356
4357
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4358
4359
    do {
4360
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4361
    } while ($count !== 0);
4362
4363 42
    return $str;
4364
  }
4365 42
4366
  /**
4367 42
   * Replace the diamond question mark (�) with the replacement.
4368 42
   *
4369 42
   * @param string $str
4370
   * @param string $unknown
4371 42
   *
4372 42
   * @return string
4373 42
   */
4374
  public static function replace_diamond_question_mark($str, $unknown = '?')
4375 42
  {
4376
    return str_replace(
4377
        array(
4378
            "\xEF\xBF\xBD",
4379
            '�',
4380
        ),
4381
        array(
4382
            $unknown,
4383
            $unknown,
4384
        ),
4385
        $str
4386 23
    );
4387
  }
4388 23
4389
  /**
4390 23
   * Strip whitespace or other characters from end of a UTF-8 string.
4391 5
   *
4392
   * @param    string $str   The string to be trimmed
4393
   * @param    string $chars Optional characters to be stripped
4394
   *
4395 19
   * @return   string The string with unwanted characters stripped from the right
4396 3
   */
4397 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4398
  {
4399 18
    $str = (string)$str;
4400
4401 18
    if (!isset($str[0])) {
4402
      return '';
4403
    }
4404
4405
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4406
    if ($chars === INF || !$chars) {
4407
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4408
    }
4409
4410
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4411
4412 45
    return preg_replace("/{$chars}+$/u", '', $str);
4413
  }
4414 45
4415
  /**
4416 45
   * rxClass
4417
   *
4418 45
   * @param string $s
4419 34
   * @param string $class
4420
   *
4421
   * @return string
4422 17
   */
4423
  private static function rxClass($s, $class = '')
4424
  {
4425 17
    static $rxClassCache = array();
4426 17
4427
    $cacheKey = $s . $class;
4428 17
4429 17
    if (isset($rxClassCache[$cacheKey])) {
4430 17
      return $rxClassCache[$cacheKey];
4431 2
    }
4432 2
4433
    $class = array($class);
4434
4435 17
    /** @noinspection SuspiciousLoopInspection */
4436
    foreach (self::str_split($s) as $s) {
4437 17
      if ('-' === $s) {
4438 17
        $class[0] = '-' . $class[0];
4439 17
      } elseif (!isset($s[2])) {
4440
        $class[0] .= preg_quote($s, '/');
4441 17
      } elseif (1 === self::strlen($s)) {
4442 17
        $class[0] .= $s;
4443 17
      } else {
4444
        $class[] = $s;
4445
      }
4446
    }
4447 17
4448
    if ($class[0]) {
4449 17
      $class[0] = '[' . $class[0] . ']';
4450
    }
4451
4452
    if (1 === count($class)) {
4453
      $return = $class[0];
4454
    } else {
4455
      $return = '(?:' . implode('|', $class) . ')';
4456
    }
4457
4458
    $rxClassCache[$cacheKey] = $return;
4459
4460
    return $return;
4461
  }
4462
4463
  /**
4464
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
4465
   */
4466
  public static function showSupport()
4467
  {
4468
    foreach (self::$support as $utf8Support) {
4469
      echo $utf8Support . "\n<br>";
4470 1
    }
4471
  }
4472 1
4473 1
  /**
4474
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4475
   *
4476
   * @param    string $char           The Unicode character to be encoded as numbered entity.
4477
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4478 1
   *
4479 1
   * @return   string The HTML numbered entity.
4480 1
   */
4481 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
4482
  {
4483
    if (!$char) {
4484 1
      return '';
4485
    }
4486
4487
    if (
4488
        $keepAsciiChars === true
4489
        &&
4490
        self::isAscii($char) === true
4491
    ) {
4492
      return $char;
4493
    }
4494
4495
    return '&#' . self::ord($char) . ';';
4496 36
  }
4497
4498 36
  /**
4499
   * Convert a string to an array of Unicode characters.
4500 36
   *
4501 4
   * @param    string  $str       The string to split into array.
4502
   * @param    int     $length    Max character length of each array element.
4503
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4504
   *
4505 35
   * @return   array An array containing chunks of the string.
4506 35
   */
4507 35
  public static function split($str, $length = 1, $cleanUtf8 = false)
4508
  {
4509 35
    $str = (string)$str;
4510
4511 35
    if (!isset($str[0])) {
4512 6
      return array();
4513 6
    }
4514
4515 35
    // init
4516 35
    $str = (string)$str;
4517 35
    $ret = array();
4518 35
4519 35
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4520
      self::checkForSupport();
4521 35
    }
4522
4523
    if (self::$support['pcre_utf8'] === true) {
4524
4525
      if ($cleanUtf8 === true) {
4526
        $str = self::clean($str);
4527
      }
4528
4529
      preg_match_all('/./us', $str, $retArray);
4530
      if (isset($retArray[0])) {
4531
        $ret = $retArray[0];
4532
      }
4533
      unset($retArray);
4534
4535
    } else {
4536
4537
      // fallback
4538
4539
      $len = strlen($str);
4540
4541
      /** @noinspection ForeachInvariantsInspection */
4542
      for ($i = 0; $i < $len; $i++) {
4543
        if (($str[$i] & "\x80") === "\x00") {
4544
          $ret[] = $str[$i];
4545
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4546
          if (($str[$i + 1] & "\xC0") === "\x80") {
4547
            $ret[] = $str[$i] . $str[$i + 1];
4548
4549
            $i++;
4550
          }
4551 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4552
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4553 35
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4554 5
4555
            $i += 2;
4556 5
          }
4557 5
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4558 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4559
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4560 35
4561
            $i += 3;
4562
          }
4563
        }
4564 35
      }
4565
    }
4566
4567
    if ($length > 1) {
4568
      $ret = array_chunk($ret, $length);
4569
4570
      $ret = array_map('implode', $ret);
4571
    }
4572
4573
    /** @noinspection OffsetOperationsInspection */
4574
    if (isset($ret[0]) && $ret[0] === '') {
4575 12
      return array();
4576
    }
4577
4578
    return $ret;
4579
  }
4580
4581
  /**
4582 12
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4583 2
   *
4584 1
   * @param string $str
4585 2
   *
4586 1
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4587 2
   *                      otherwise it will return false.
4588
   */
4589 2
  public static function str_detect_encoding($str)
4590
  {
4591
4592 2
    //
4593
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4594
    //
4595
4596
    if (self::is_binary($str)) {
4597
      if (self::is_utf16($str) === 1) {
4598 12
        return 'UTF-16LE';
4599 3
      } elseif (self::is_utf16($str) === 2) {
4600
        return 'UTF-16BE';
4601
      } elseif (self::is_utf32($str) === 1) {
4602
        return 'UTF-32LE';
4603
      } elseif (self::is_utf32($str) === 2) {
4604
        return 'UTF-32BE';
4605
      }
4606 12
    }
4607 9
4608
    //
4609
    // 2.) simple check for ASCII chars
4610
    //
4611
4612
    if (self::is_ascii($str) === true) {
4613
      return 'ASCII';
4614
    }
4615
4616 6
    //
4617 6
    // 3.) simple check for UTF-8 chars
4618 6
    //
4619 6
4620 6
    if (self::is_utf8($str) === true) {
4621 6
      return 'UTF-8';
4622
    }
4623 6
4624 6
    //
4625 6
    // 4.) check via "\mb_detect_encoding()"
4626 6
    //
4627
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4628
4629
    $detectOrder = array(
4630
        'ISO-8859-1',
4631
        'ISO-8859-2',
4632
        'ISO-8859-3',
4633
        'ISO-8859-4',
4634
        'ISO-8859-5',
4635
        'ISO-8859-6',
4636
        'ISO-8859-7',
4637
        'ISO-8859-8',
4638
        'ISO-8859-9',
4639
        'ISO-8859-10',
4640
        'ISO-8859-13',
4641
        'ISO-8859-14',
4642
        'ISO-8859-15',
4643
        'ISO-8859-16',
4644
        'WINDOWS-1251',
4645
        'WINDOWS-1252',
4646
        'WINDOWS-1254',
4647
        'ISO-2022-JP',
4648
        'JIS',
4649
        'EUC-JP',
4650
    );
4651
4652
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4653
    if ($encoding) {
4654
      return $encoding;
4655
    }
4656
4657
    //
4658
    // 5.) check via "iconv()"
4659
    //
4660
4661
    $md5 = md5($str);
4662
    foreach (self::$iconvEncoding as $encodingTmp) {
4663
      # INFO: //IGNORE and //TRANSLIT still throw notice
4664
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4665
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4666
        return $encodingTmp;
4667
      }
4668
    }
4669
4670 13
    return false;
4671
  }
4672 13
4673
  /**
4674
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4675 13
   *
4676 13
   * @link  http://php.net/manual/en/function.str-ireplace.php
4677 1
   *
4678 1
   * @param mixed $search  <p>
4679 12
   *                       Every replacement with search array is
4680
   *                       performed on the result of previous replacement.
4681 13
   *                       </p>
4682
   * @param mixed $replace <p>
4683 13
   *                       </p>
4684 13
   * @param mixed $subject <p>
4685
   *                       If subject is an array, then the search and
4686 13
   *                       replace is performed with every entry of
4687
   *                       subject, and the return value is an array as
4688
   *                       well.
4689
   *                       </p>
4690
   * @param int   $count   [optional] <p>
4691
   *                       The number of matched and replaced needles will
4692
   *                       be returned in count which is passed by
4693
   *                       reference.
4694
   *                       </p>
4695
   *
4696
   * @return mixed A string or an array of replacements.
4697
   */
4698 1
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4699
  {
4700 1
    $search = (array)$search;
4701
4702 1
    /** @noinspection AlterInForeachInspection */
4703
    foreach ($search as &$s) {
4704
      if ('' === $s .= '') {
4705
        $s = '/^(?<=.)$/';
4706 1
      } else {
4707
        $s = '/' . preg_quote($s, '/') . '/ui';
4708 1
      }
4709
    }
4710
4711
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4712 1
    $count = $replace; // used as reference parameter
4713 1
4714
    return $subject;
4715
  }
4716 1
4717 1
  /**
4718 1
   * Limit the number of characters in a string, but also after the next word.
4719 1
   *
4720
   * @param  string $str
4721 1
   * @param  int    $length
4722
   * @param  string $strAddOn
4723
   *
4724 1
   * @return string
4725
   */
4726
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4727 1
  {
4728
    $str = (string)$str;
4729
4730
    if (!isset($str[0])) {
4731
      return '';
4732
    }
4733
4734
    $length = (int)$length;
4735
4736
    if (self::strlen($str) <= $length) {
4737
      return $str;
4738
    }
4739
4740 2
    if (self::substr($str, $length - 1, 1) === ' ') {
4741
      return self::substr($str, 0, $length - 1) . $strAddOn;
4742 2
    }
4743
4744 2
    $str = self::substr($str, 0, $length);
4745 2
    $array = explode(' ', $str);
4746
    array_pop($array);
4747 2
    $new_str = implode(' ', $array);
4748
4749
    if ($new_str === '') {
4750 2
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4751 2
    } else {
4752 2
      $str = $new_str . $strAddOn;
4753 2
    }
4754 2
4755
    return $str;
4756 2
  }
4757 2
4758 2
  /**
4759 2
   * Pad a UTF-8 string to given length with another string.
4760 2
   *
4761 2
   * @param    string $str        The input string
4762
   * @param    int    $pad_length The length of return string
4763 2
   * @param    string $pad_string String to use for padding the input string
4764 2
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4765 2
   *
4766 2
   * @return   string Returns the padded string
4767 2
   */
4768 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4769
  {
4770 2
    $str_length = self::strlen($str);
4771
4772
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $str_length)) {
4773 2
      $ps_length = self::strlen($pad_string);
4774
4775
      $diff = $pad_length - $str_length;
4776
4777
      switch ($pad_type) {
4778 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4779
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4780
          $pre = self::substr($pre, 0, $diff);
4781
          $post = '';
4782
          break;
4783
4784
        case STR_PAD_BOTH:
4785
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4786
          $pre = self::substr($pre, 0, (int)$diff / 2);
4787
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4788
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4789
          break;
4790
4791
        case STR_PAD_RIGHT:
4792 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4793
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4794 1
          $post = self::substr($post, 0, $diff);
4795
          $pre = '';
4796 1
      }
4797
4798 1
      return $pre . $str . $post;
4799
    }
4800
4801
    return $str;
4802
  }
4803
4804
  /**
4805
   * Repeat a string.
4806
   *
4807
   * @param string $str        <p>
4808
   *                           The string to be repeated.
4809
   *                           </p>
4810
   * @param int    $multiplier <p>
4811
   *                           Number of time the input string should be
4812
   *                           repeated.
4813
   *                           </p>
4814
   *                           <p>
4815
   *                           multiplier has to be greater than or equal to 0.
4816
   *                           If the multiplier is set to 0, the function
4817
   *                           will return an empty string.
4818
   *                           </p>
4819
   *
4820
   * @return string the repeated string.
4821
   */
4822
  public static function str_repeat($str, $multiplier)
4823
  {
4824
    $str = self::filter($str);
4825
4826
    return str_repeat($str, $multiplier);
4827
  }
4828
4829
  /**
4830
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4831 12
   *
4832
   * Replace all occurrences of the search string with the replacement string
4833 12
   *
4834
   * @link http://php.net/manual/en/function.str-replace.php
4835
   *
4836
   * @param mixed $search  <p>
4837
   *                       The value being searched for, otherwise known as the needle.
4838
   *                       An array may be used to designate multiple needles.
4839
   *                       </p>
4840
   * @param mixed $replace <p>
4841
   *                       The replacement value that replaces found search
4842
   *                       values. An array may be used to designate multiple replacements.
4843 1
   *                       </p>
4844
   * @param mixed $subject <p>
4845 1
   *                       The string or array being searched and replaced on,
4846
   *                       otherwise known as the haystack.
4847 1
   *                       </p>
4848
   *                       <p>
4849 1
   *                       If subject is an array, then the search and
4850
   *                       replace is performed with every entry of
4851
   *                       subject, and the return value is an array as
4852
   *                       well.
4853
   *                       </p>
4854
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4855
   *
4856
   * @return mixed This function returns a string or an array with the replaced values.
4857
   */
4858
  public static function str_replace($search, $replace, $subject, &$count = null)
4859
  {
4860
    return str_replace($search, $replace, $subject, $count);
4861 1
  }
4862
4863 1
  /**
4864
   * Shuffles all the characters in the string.
4865 1
   *
4866 1
   * @param    string $str The input string
4867 1
   *
4868
   * @return   string The shuffled string.
4869 1
   */
4870 1
  public static function str_shuffle($str)
4871 1
  {
4872 1
    $array = self::split($str);
4873
4874
    shuffle($array);
4875 1
4876
    return implode('', $array);
4877
  }
4878
4879
  /**
4880
   * Sort all characters according to code points.
4881
   *
4882
   * @param    string $str    A UTF-8 string.
4883
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4884
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4885
   *
4886 20
   * @return   string String of sorted characters
4887
   */
4888
  public static function str_sort($str, $unique = false, $desc = false)
4889 20
  {
4890 20
    $array = self::codepoints($str);
4891
4892 20
    if ($unique) {
4893
      $array = array_flip(array_flip($array));
4894
    }
4895
4896 20
    if ($desc) {
4897 20
      arsort($array);
4898
    } else {
4899 20
      asort($array);
4900 20
    }
4901
4902
    return self::string($array);
4903 1
  }
4904 1
4905
  /**
4906
   * Split a string into an array.
4907 1
   *
4908 1
   * @param string $str
4909 1
   * @param int    $len
4910 1
   *
4911 1
   * @return array
4912
   */
4913 1
  public static function str_split($str, $len = 1)
4914
  {
4915 1
    // init
4916
    $len = (int)$len;
4917
4918
    if ($len < 1) {
4919
      return str_split($str, $len);
4920
    }
4921
4922
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4923
    $a = $a[0];
4924
4925 1
    if ($len === 1) {
4926
      return $a;
4927 1
    }
4928
4929 1
    $arrayOutput = array();
4930
    $p = -1;
4931 1
4932
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4933
    foreach ($a as $l => $a) {
4934
      if ($l % $len) {
4935
        $arrayOutput[$p] .= $a;
4936
      } else {
4937
        $arrayOutput[++$p] = $a;
4938
      }
4939
    }
4940
4941
    return $arrayOutput;
4942
  }
4943
4944 7
  /**
4945
   * Get a binary representation of a specific string.
4946 7
   *
4947
   * @param  string $str The input string.
4948
   *
4949
   * @return string
4950
   */
4951
  public static function str_to_binary($str)
4952
  {
4953
    $str = (string)$str;
4954
4955
    $value = unpack('H*', $str);
4956
4957
    return base_convert($value[1], 16, 2);
4958
  }
4959
4960 1
  /**
4961
   * alias for "UTF8::to_ascii()"
4962 1
   *
4963 1
   * @see UTF8::to_ascii()
4964
   *
4965 1
   * @param string $str
4966
   * @param string $unknown
4967 1
   *
4968
   * @return string
4969 1
   */
4970 1
  public static function str_transliterate($str, $unknown = '?')
4971 1
  {
4972 1
    return self::to_ascii($str, $unknown);
4973
  }
4974 1
4975
  /**
4976 1
   * Counts number of words in the UTF-8 string.
4977
   *
4978 1
   * @param string $str      The input string.
4979 1
   * @param int    $format   <strong>0</strong> => return a number of words<br />
4980 1
   *                         <strong>1</strong> => return an array of words<br />
4981 1
   *                         <strong>2</strong> => return an array of words with word-offset as key
4982 1
   * @param string $charlist Additional chars that contains to words and do not start a new word (default: "'", "’")
4983 1
   *
4984
   * @return array|int The number of words in the string
4985 1
   */
4986
  public static function str_word_count($str, $format = 0, $charlist = '')
4987 1
  {
4988
    $charlist = self::rxClass($charlist, '\pL');
4989
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4990
4991 1
    $len = count($strParts);
4992
4993
    if ($format === 1) {
4994
4995
      $numberOfWords = array();
4996
      for ($i = 1; $i < $len; $i += 2) {
4997
        $numberOfWords[] = $strParts[$i];
4998
      }
4999
5000
    } elseif ($format === 2) {
5001
5002
      $numberOfWords = array();
5003
      $offset = self::strlen($strParts[0]);
5004
      for ($i = 1; $i < $len; $i += 2) {
5005
        $numberOfWords[$offset] = $strParts[$i];
5006 9
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5007
      }
5008 9
5009
    } else {
5010
5011
      $numberOfWords = ($len - 1) / 2;
5012
5013
    }
5014
5015
    return $numberOfWords;
5016
  }
5017
5018
  /**
5019
   * Case-insensitive string comparison.
5020
   *
5021 12
   * INFO: Case-insensitive version of UTF8::strcmp()
5022
   *
5023 12
   * @param string $str1
5024 11
   * @param string $str2
5025 11
   *
5026 12
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5027
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
5028
   *             <strong>0</strong> if they are equal.
5029
   */
5030
  public static function strcasecmp($str1, $str2)
5031
  {
5032
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5033
  }
5034
5035
  /**
5036
   * Case-sensitive string comparison.
5037
   *
5038
   * @param string $str1
5039 8
   * @param string $str2
5040
   *
5041 8
   * @return int  <strong>&lt; 0</strong> if str1 is less than str2<br />
5042 1
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
5043
   *              <strong>0</strong> if they are equal.
5044
   */
5045 7
  public static function strcmp($str1, $str2)
5046 2
  {
5047 2
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5048 5
        \Normalizer::normalize($str1, \Normalizer::NFD),
5049
        \Normalizer::normalize($str2, \Normalizer::NFD)
5050
    );
5051 7
  }
5052
5053 7
  /**
5054
   * Find length of initial segment not matching mask.
5055 1
   *
5056
   * @param string $str
5057
   * @param string $charList
5058
   * @param int    $offset
5059
   * @param int    $length
5060
   *
5061
   * @return int|null
5062
   */
5063
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5064
  {
5065
    if ('' === $charList .= '') {
5066
      return null;
5067
    }
5068 2
5069
    if ($offset || 2147483647 !== $length) {
5070 2
      $str = (string)self::substr($str, $offset, $length);
5071 2
    } else {
5072
      $str = (string)$str;
5073 2
    }
5074 2
5075 2
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5076
      /** @noinspection OffsetOperationsInspection */
5077 2
      return self::strlen($length[1]);
5078 2
    } else {
5079
      return self::strlen($str);
5080
    }
5081
  }
5082
5083
  /**
5084
   * Create a UTF-8 string from code points.
5085
   *
5086
   * INFO: opposite to UTF8::codepoints()
5087
   *
5088
   * @param  array $array Integer or Hexadecimal codepoints
5089
   *
5090
   * @return string UTF-8 encoded string
5091
   */
5092
  public static function string(array $array)
5093
  {
5094
    return implode(
5095
        array_map(
5096
            array(
5097
                '\\voku\\helper\\UTF8',
5098
                'chr',
5099
            ),
5100
            $array
5101
        )
5102 2
    );
5103
  }
5104 2
5105 2
  /**
5106 2
   * alias for "UTF8::string_has_bom()"
5107
   *
5108 2
   * @see UTF8::string_has_bom()
5109
   *
5110 2
   * @param string $str
5111
   *
5112
   * @return bool
5113
   */
5114
  public static function hasBom($str)
5115
  {
5116
    return self::string_has_bom($str);
5117
  }
5118
5119
  /**
5120
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5121
   *
5122
   * @param    string $str The input string.
5123
   *
5124
   * @return   bool True if the string has BOM at the start, False otherwise.
5125
   */
5126
  public static function string_has_bom($str)
5127
  {
5128
    foreach (self::$bom as $bomString => $bomByteLength) {
5129
      if (0 === strpos($str, $bomString)) {
5130
        return true;
5131
      }
5132 2
    }
5133
5134
    return false;
5135 2
  }
5136
5137 2
  /**
5138
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5139
   *
5140
   * @link http://php.net/manual/en/function.strip-tags.php
5141
   *
5142
   * @param string $str            <p>
5143
   *                               The input string.
5144
   *                               </p>
5145
   * @param string $allowable_tags [optional] <p>
5146
   *                               You can use the optional second parameter to specify tags which should
5147
   *                               not be stripped.
5148
   *                               </p>
5149
   *                               <p>
5150
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5151
   *                               can not be changed with allowable_tags.
5152
   *                               </p>
5153
   *
5154
   * @return string the stripped string.
5155
   */
5156
  public static function strip_tags($str, $allowable_tags = null)
5157
  {
5158
    // clean broken utf8
5159
    $str = self::clean($str);
5160
5161
    return strip_tags($str, $allowable_tags);
5162 8
  }
5163
5164 8
  /**
5165 8
   * Finds position of first occurrence of a string within another, case insensitive.
5166
   *
5167 8
   * @link http://php.net/manual/en/function.mb-stripos.php
5168 3
   *
5169
   * @param string  $haystack  <p>
5170
   *                           The string from which to get the position of the first occurrence
5171
   *                           of needle
5172 7
   *                           </p>
5173
   * @param string  $needle    <p>
5174 7
   *                           The string to find in haystack
5175 1
   *                           </p>
5176 1
   * @param int     $offset    [optional] <p>
5177 1
   *                           The position in haystack
5178
   *                           to start searching
5179
   *                           </p>
5180 7
   * @param string  $encoding
5181 1
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string.
5182 1
   *
5183 7
   * @return int|false Return the numeric position of the first occurrence of needle in the haystack string,<br />
5184
   *                   or false if needle is not found.
5185
   */
5186 7
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5187
  {
5188
    $haystack = (string)$haystack;
5189
    $needle = (string)$needle;
5190
5191
    if (!isset($haystack[0], $needle[0])) {
5192
      return false;
5193
    }
5194
5195
    if ($cleanUtf8 === true) {
5196
      $haystack = self::clean($haystack);
5197
      $needle = self::clean($needle);
5198 7
    }
5199
5200 7
    // INFO: this is only a fallback for old versions
5201 2
    if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
5202
      $encoding = 'UTF-8';
5203
    } else {
5204
      $encoding = self::normalize_encoding($encoding);
5205 5
    }
5206
5207 5
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5208
  }
5209
5210
  /**
5211
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5212
   *
5213
   * @param string $str
5214
   * @param string $needle
5215
   * @param bool   $before_needle
5216
   *
5217
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5218
   */
5219
  public static function stristr($str, $needle, $before_needle = false)
5220
  {
5221 61
    if ('' === $needle .= '') {
5222
      return false;
5223 61
    }
5224
5225 61
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5226 4
  }
5227
5228
  /**
5229
   * Get the string length, not the byte-length!
5230 60
   *
5231
   * @link     http://php.net/manual/en/function.mb-strlen.php
5232
   *
5233 60
   * @param string  $str       The string being checked for length.
5234
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5235
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5236
   *
5237 60
   * @return int the number of characters in the string $str having character encoding $encoding. (One multi-byte
5238 60
   *             character counted as +1)
5239
   */
5240
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5241
  {
5242 60
    $str = (string)$str;
5243
5244 60
    if (!isset($str[0])) {
5245 1
      return 0;
5246 1
    }
5247
5248 60
    // INFO: this is only a fallback for old versions
5249
    if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
5250
      $encoding = 'UTF-8';
5251
    } else {
5252
      $encoding = self::normalize_encoding($encoding);
5253
    }
5254
5255
    switch ($encoding) {
5256
      case 'ASCII':
5257
      case 'CP850':
5258
        return strlen($str);
5259
    }
5260
5261
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5262
      $str = self::clean($str);
5263 1
    }
5264
5265 1
    return \mb_strlen($str, $encoding);
5266
  }
5267
5268
  /**
5269
   * Case insensitive string comparisons using a "natural order" algorithm.
5270
   *
5271
   * INFO: natural order version of UTF8::strcasecmp()
5272
   *
5273
   * @param string $str1
5274
   * @param string $str2
5275
   *
5276
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5277
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5278
   *             <strong>0</strong> if they are equal
5279
   */
5280
  public static function strnatcasecmp($str1, $str2)
5281
  {
5282
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5283
  }
5284
5285
  /**
5286
   * String comparisons using a "natural order" algorithm
5287 2
   *
5288
   * INFO: natural order version of UTF8::strcmp()
5289 2
   *
5290
   * @link  http://php.net/manual/en/function.strnatcmp.php
5291
   *
5292
   * @param string $str1 <p>
5293
   *                     The first string.
5294
   *                     </p>
5295
   * @param string $str2 <p>
5296
   *                     The second string.
5297
   *                     </p>
5298
   *
5299
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5300
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5301
   *             <strong>0</strong> if they are equal
5302
   */
5303
  public static function strnatcmp($str1, $str2)
5304
  {
5305
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5306
  }
5307
5308
  /**
5309
   * Case-insensitive string comparison of the first n characters.
5310
   *
5311 1
   * @link  http://php.net/manual/en/function.strncasecmp.php
5312
   *
5313 1
   * @param string $str1 <p>
5314
   *                     The first string.
5315
   *                     </p>
5316
   * @param string $str2 <p>
5317
   *                     The second string.
5318
   *                     </p>
5319
   * @param int    $len  <p>
5320
   *                     The length of strings to be used in the comparison.
5321
   *                     </p>
5322
   *
5323
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5324
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5325
   *             <strong>0</strong> if they are equal
5326
   */
5327
  public static function strncasecmp($str1, $str2, $len)
5328
  {
5329
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5330
  }
5331
5332
  /**
5333
   * String comparison of the first n characters.
5334
   *
5335
   * @link  http://php.net/manual/en/function.strncmp.php
5336 2
   *
5337
   * @param string $str1 <p>
5338 2
   *                     The first string.
5339 2
   *                     </p>
5340
   * @param string $str2 <p>
5341 2
   *                     The second string.
5342
   *                     </p>
5343
   * @param int    $len  <p>
5344
   *                     Number of characters to use in the comparison.
5345
   *                     </p>
5346
   *
5347
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5348
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5349
   *             <strong>0</strong> if they are equal
5350
   */
5351
  public static function strncmp($str1, $str2, $len)
5352
  {
5353
    $str1 = self::substr($str1, 0, $len);
5354
    $str2 = self::substr($str2, 0, $len);
5355
5356
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5353 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5354 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5357
  }
5358 1
5359
  /**
5360 1
   * Search a string for any of a set of characters.
5361 1
   *
5362
   * @link  http://php.net/manual/en/function.strpbrk.php
5363 1
   *
5364 1
   * @param string $haystack  <p>
5365
   *                          The string where char_list is looked for.
5366
   *                          </p>
5367 1
   * @param string $char_list <p>
5368 1
   *                          This parameter is case sensitive.
5369
   *                          </p>
5370
   *
5371
   * @return string String starting from the character found, or false if it is not found.
5372
   */
5373
  public static function strpbrk($haystack, $char_list)
5374
  {
5375
    $haystack = (string)$haystack;
5376
    $char_list = (string)$char_list;
5377
5378
    if (!isset($haystack[0], $char_list[0])) {
5379
      return false;
5380
    }
5381
5382
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5383
      return substr($haystack, strpos($haystack, $m[0]));
5384
    } else {
5385
      return false;
5386
    }
5387
  }
5388
5389
  /**
5390
   * Find position of first occurrence of string in a string.
5391
   *
5392
   * @link http://php.net/manual/en/function.mb-strpos.php
5393
   *
5394 15
   * @param string  $haystack     <p>
5395
   *                              The string being checked.
5396 15
   *                              </p>
5397 15
   * @param string  $needle       <p>
5398
   *                              The position counted from the beginning of haystack.
5399 15
   *                              </p>
5400 2
   * @param int     $offset       [optional] <p>
5401
   *                              The search offset. If it is not specified, 0 is used.
5402
   *                              </p>
5403
   * @param string  $encoding
5404 14
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5405 14
   *
5406
   * @return int|false The numeric position of the first occurrence of needle in the haystack string.<br />
5407
   *                   If needle is not found it returns false.
5408
   */
5409 14
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5410
  {
5411
    $haystack = (string)$haystack;
5412
    $needle = (string)$needle;
5413 14
5414
    if (!isset($haystack[0], $needle[0])) {
5415
      return false;
5416
    }
5417 1
5418 1
    // init
5419 1
    $offset = (int)$offset;
5420
5421 14
    // iconv and mbstring do not support integer $needle
5422
5423
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5424 14
      $needle = (string)self::chr($needle);
5425 1
    }
5426 1
5427 14
    if ($cleanUtf8 === true) {
5428
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5429
      // iconv_strpos is not tolerant to invalid characters
5430 14
5431
      $needle = self::clean((string)$needle);
5432
      $haystack = self::clean($haystack);
5433
    }
5434
5435
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5436
      self::checkForSupport();
5437
    }
5438
5439 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5440
5441
      // INFO: this is only a fallback for old versions
5442
      if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
5443
        $encoding = 'UTF-8';
5444
      } else {
5445
        $encoding = self::normalize_encoding($encoding);
5446
      }
5447
5448
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5449
    }
5450
5451
    if (self::$support['iconv'] === true) {
5452
      // ignore invalid negative offset to keep compatility
5453
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5454
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5455
    }
5456
5457
    if ($offset > 0) {
5458
      $haystack = self::substr($haystack, $offset);
5459
    }
5460
5461 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5462
      $left = substr($haystack, 0, $pos);
5463
5464
      // negative offset not supported in PHP strpos(), ignoring
5465
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5466
    }
5467
5468
    return false;
5469
  }
5470
5471
  /**
5472
   * Finds the last occurrence of a character in a string within another.
5473
   *
5474
   * @link http://php.net/manual/en/function.mb-strrchr.php
5475
   *
5476
   * @param string $haystack <p>
5477
   *                         The string from which to get the last occurrence
5478
   *                         of needle
5479
   *                         </p>
5480
   * @param string $needle   <p>
5481 1
   *                         The string to find in haystack
5482
   *                         </p>
5483 1
   * @param bool   $part     [optional] <p>
5484 1
   *                         Determines which portion of haystack
5485
   *                         this function returns.
5486 1
   *                         If set to true, it returns all of haystack
5487
   *                         from the beginning to the last occurrence of needle.
5488
   *                         If set to false, it returns all of haystack
5489
   *                         from the last occurrence of needle to the end,
5490
   *                         </p>
5491
   * @param string $encoding [optional] <p>
5492
   *                         Character encoding name to use.
5493
   *                         If it is omitted, internal character encoding is used.
5494
   *                         </p>
5495
   *
5496 4
   * @return string|false The portion of haystack or false if needle is not found.
5497
   */
5498 4 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5499
  {
5500
    if ($encoding !== 'UTF-8') {
5501
      $encoding = self::normalize_encoding($encoding);
5502
    }
5503
5504
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5505
  }
5506
5507
  /**
5508
   * alias for "UTF8::strstr()"
5509
   *
5510
   * @see UTF8::strstr()
5511
   *
5512
   * @param string $haystack
5513
   * @param string $needle
5514
   * @param bool   $before_needle
5515
   *
5516
   * @return string|false
5517
   */
5518
  public static function strchr($haystack, $needle, $before_needle = false)
5519
  {
5520
    return self::strstr($haystack, $needle, $before_needle);
5521
  }
5522
5523
  /**
5524
   * alias for "UTF8::stristr()"
5525
   *
5526
   * @see UTF8::stristr()
5527
   *
5528
   * @param string $haystack
5529 1
   * @param string $needle
5530
   * @param bool   $before_needle
5531 1
   *
5532 1
   * @return string|false
5533
   */
5534 1
  public static function strichr($haystack, $needle, $before_needle = false)
5535
  {
5536
    return self::stristr($haystack, $needle, $before_needle);
5537
  }
5538
5539
  /**
5540
   * Reverses characters order in the string.
5541
   *
5542
   * @param  string $str The input string
5543
   *
5544
   * @return string The string with characters in the reverse sequence
5545
   */
5546 1
  public static function strrev($str)
5547
  {
5548 1
    $str = (string)$str;
5549
5550
    if (!isset($str[0])) {
5551
      return '';
5552
    }
5553
5554
    return implode(array_reverse(self::split($str)));
5555
  }
5556
5557
  /**
5558
   * Finds the last occurrence of a character in a string within another, case insensitive.
5559
   *
5560
   * @link http://php.net/manual/en/function.mb-strrichr.php
5561
   *
5562
   * @param string $haystack <p>
5563
   *                         The string from which to get the last occurrence
5564
   *                         of needle
5565
   *                         </p>
5566
   * @param string $needle   <p>
5567
   *                         The string to find in haystack
5568
   *                         </p>
5569
   * @param bool   $part     [optional] <p>
5570
   *                         Determines which portion of haystack
5571
   *                         this function returns.
5572
   *                         If set to true, it returns all of haystack
5573
   *                         from the beginning to the last occurrence of needle.
5574 11
   *                         If set to false, it returns all of haystack
5575
   *                         from the last occurrence of needle to the end,
5576 11
   *                         </p>
5577
   * @param string $encoding [optional] <p>
5578 11
   *                         Character encoding name to use.
5579 2
   *                         If it is omitted, internal character encoding is used.
5580 2
   *                         </p>
5581
   *
5582 11
   * @return string|false The portion of haystack or false if needle is not found.
5583
   */
5584 11 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5585 2
  {
5586
    if ($encoding !== 'UTF-8') {
5587
      $encoding = self::normalize_encoding($encoding);
5588
    }
5589 10
5590
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5591 10
  }
5592 10
5593
  /**
5594 10
   * Find position of last occurrence of a case-insensitive string.
5595
   *
5596
   * @param string  $haystack  The string to look in
5597 2
   * @param string  $needle    The string to look for
5598 2
   * @param int     $offset    (Optional) Number of characters to ignore in the beginning or end
5599 2
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5600
   *
5601 10
   * @return int|false The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5602 10
   *                   not found, it returns false.
5603
   */
5604
  public static function strripos($haystack, $needle, $offset = 0, $cleanUtf8 = false)
5605
  {
5606
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset, $cleanUtf8);
5607
  }
5608
5609
  /**
5610
   * Find position of last occurrence of a string in a string.
5611
   *
5612
   * @link http://php.net/manual/en/function.mb-strrpos.php
5613
   *
5614
   * @param string     $haystack  <p>
5615
   *                              The string being checked, for the last occurrence
5616
   *                              of needle
5617
   *                              </p>
5618
   * @param string|int $needle    <p>
5619
   *                              The string to find in haystack.
5620
   *                              Or a code point as int.
5621
   *                              </p>
5622
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5623
   *                              the string. Negative values will stop searching at an arbitrary point
5624
   *                              prior to the end of the string.
5625
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5626
   *
5627
   * @return int|false The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5628
   *                   not found, it returns false.
5629
   */
5630
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5631
  {
5632
    $haystack = (string)$haystack;
5633
5634
    if (((int)$needle) === $needle && ($needle >= 0)) {
5635
      $needle = self::chr($needle);
5636
    }
5637
5638 8
    $needle = (string)$needle;
5639
5640 8
    if (!isset($haystack[0], $needle[0])) {
5641 2
      return false;
5642 2
    }
5643
5644 8
    // init
5645
    $needle = (string)$needle;
5646
    $offset = (int)$offset;
5647
5648
    if ($cleanUtf8 === true) {
5649
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5650
5651
      $needle = self::clean($needle);
5652
      $haystack = self::clean($haystack);
5653
    }
5654
5655
5656
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5657
      self::checkForSupport();
5658
    }
5659
5660
    if (self::$support['mbstring'] === true) {
5661
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5662
    }
5663
5664
    if (self::$support['iconv'] === true) {
5665 1
      return \grapheme_strrpos($haystack, $needle, $offset);
5666
    }
5667 1
5668
    // fallback
5669 1
5670
    if ($offset > 0) {
5671
      $haystack = self::substr($haystack, $offset);
5672
    } elseif ($offset < 0) {
5673
      $haystack = self::substr($haystack, 0, $offset);
5674
    }
5675
5676 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5677
      $left = substr($haystack, 0, $pos);
5678
5679
      // negative offset not supported in PHP strpos(), ignoring
5680
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5681
    }
5682 11
5683
    return false;
5684 11
  }
5685 11
5686 11
  /**
5687
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5688 11
   * mask.
5689 1
   *
5690 1
   * @param string $str
5691 1
   * @param string $mask
5692
   * @param int    $offset
5693 11
   * @param int    $length
5694
   *
5695 11
   * @return int
5696
   */
5697 11
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5698 1
  {
5699 1
    if ($offset || 2147483647 !== $length) {
5700
      $str = self::substr($str, $offset, $length);
5701
    }
5702 11
5703 11
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5704
  }
5705 11
5706
  /**
5707 11
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5708
   *
5709
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5710
   *
5711
   * @param string $haystack      <p>
5712
   *                              The input string. Must be valid UTF-8.
5713
   *                              </p>
5714
   * @param string $needle        <p>
5715
   *                              The string to look for. Must be valid UTF-8.
5716
   *                              </p>
5717
   * @param bool   $before_needle [optional] <p>
5718
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5719
   *                              haystack before the first occurrence of the needle (excluding the needle).
5720
   *                              </p>
5721
   *
5722
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5723 21
   */
5724
  public static function strstr($haystack, $needle, $before_needle = false)
5725 21
  {
5726
    return \grapheme_strstr($haystack, $needle, $before_needle);
5727 21
  }
5728 6
5729
  /**
5730
   * Unicode transformation for case-less matching.
5731
   *
5732 19
   * @link http://unicode.org/reports/tr21/tr21-5.html
5733 19
   *
5734
   * @param string $str
5735 19
   * @param bool   $full <b>true</b> === replace full case folding chars + strtolower,<br />
5736
   *                     <b>false</b> use only $commonCaseFold +  strtolower
5737
   *
5738
   * @return string
5739
   */
5740
  public static function strtocasefold($str, $full = true)
5741
  {
5742
    static $fullCaseFold = null;
5743
    static $commonCaseFoldKeys = null;
5744
    static $commonCaseFoldValues = null;
5745 3
5746
    if ($commonCaseFoldKeys === null) {
5747 3
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5748
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5749
    }
5750
5751
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5752
5753
    if ($full) {
5754
5755
      if ($fullCaseFold === null) {
5756
        $fullCaseFold = self::getData('caseFolding_full');
5757
      }
5758
5759
      /** @noinspection OffsetOperationsInspection */
5760
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5761
    }
5762 16
5763
    $str = self::clean($str);
5764 16
5765
    return self::strtolower($str);
5766 16
  }
5767 4
5768
  /**
5769
   * Make a string lowercase.
5770
   *
5771 15
   * @link http://php.net/manual/en/function.mb-strtolower.php
5772
   *
5773 15
   * @param string $str      <p>The string being lowercased.</p>
5774 15
   * @param string $encoding
5775
   *
5776 15
   * @return string str with all alphabetic characters converted to lowercase.
5777
   */
5778
  public static function strtolower($str, $encoding = 'UTF-8')
5779
  {
5780
    // init
5781
    $str = (string)$str;
5782
5783
    if (!isset($str[0])) {
5784
      return '';
5785
    }
5786
5787
    if ($encoding !== 'UTF-8') {
5788
      $encoding = self::normalize_encoding($encoding);
5789
    }
5790
5791
    return \mb_strtolower($str, $encoding);
5792
  }
5793
5794
  /**
5795
   * Generic case sensitive transformation for collation matching.
5796
   *
5797
   * @param string $s
5798
   *
5799
   * @return string
5800
   */
5801
  private static function strtonatfold($s)
5802
  {
5803
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5804
  }
5805
5806
  /**
5807
   * Make a string uppercase.
5808
   *
5809
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5810
   *
5811
   * @param string $str <p>
5812
   *                    The string being uppercased.
5813
   *                    </p>
5814
   * @param string $encoding
5815
   *
5816 1
   * @return string str with all alphabetic characters converted to uppercase.
5817
   */
5818 1
  public static function strtoupper($str, $encoding = 'UTF-8')
5819 1
  {
5820 1
    $str = (string)$str;
5821 1
5822 1
    if (!isset($str[0])) {
5823
      return '';
5824 1
    }
5825 1
5826 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5827 1
      self::checkForSupport();
5828 1
    }
5829
5830 1
    if (self::$support['mbstring'] === true) {
5831 1
5832
      if ($encoding !== 'UTF-8') {
5833 1
        $encoding = self::normalize_encoding($encoding);
5834
      }
5835
5836
      return \mb_strtoupper($str, $encoding);
5837
    } else {
5838
5839
      // fallback
5840
5841
      static $caseTableKeys = null;
5842
      static $caseTableValues = null;
5843 1
5844
      if ($caseTableKeys === null) {
5845
        $caseTable = self::case_table();
5846 1
        $caseTableKeys = array_keys($caseTable);
5847
        $caseTableValues = array_values($caseTable);
5848 1
      }
5849
5850
      $str = self::clean($str);
5851
5852
      return str_replace($caseTableKeys, $caseTableValues, $str);
5853
    }
5854
  }
5855
5856
  /**
5857
   * Translate characters or replace sub-strings.
5858
   *
5859
   * @link  http://php.net/manual/en/function.strtr.php
5860
   *
5861
   * @param string       $str  <p>
5862
   *                           The string being translated.
5863
   *                           </p>
5864
   * @param string|array $from <p>
5865
   *                           The string replacing from.
5866
   *                           </p>
5867
   * @param string|array $to   <p>
5868
   *                           The string being translated to to.
5869
   *                           </p>
5870 47
   *
5871
   * @return string This function returns a copy of str,
5872 47
   * translating all occurrences of each character in
5873
   * from to the corresponding character in
5874 47
   * to.
5875 11
   */
5876
  public static function strtr($str, $from, $to = INF)
5877
  {
5878
    if (INF !== $to) {
5879 45
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5879 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5880
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5880 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5881 45
      $countFrom = count($from);
5882
      $countTo = count($to);
5883
5884
      if ($countFrom > $countTo) {
5885 1
        $from = array_slice($from, 0, $countTo);
5886 1
      } elseif ($countFrom < $countTo) {
5887
        $to = array_slice($to, 0, $countFrom);
5888 45
      }
5889 45
5890 37
      $from = array_combine($from, $to);
5891 37
    }
5892
5893 45
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5876 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5894 2
  }
5895
5896
  /**
5897 43
   * Return the width of a string.
5898 20
   *
5899 20
   * @param string $s
5900 41
   *
5901
   * @return int
5902
   */
5903 43
  public static function strwidth($s)
5904
  {
5905
    return \mb_strwidth($s, 'UTF-8');
5906 43
  }
5907 1
5908 1
  /**
5909 43
   * Get part of a string.
5910
   *
5911
   * @link http://php.net/manual/en/function.mb-substr.php
5912 43
   *
5913
   * @param string  $str       <p>
5914
   *                           The string being checked.
5915
   *                           </p>
5916
   * @param int     $start     <p>
5917
   *                           The first position used in str.
5918
   *                           </p>
5919
   * @param int     $length    [optional] <p>
5920
   *                           The maximum length of the returned string.
5921
   *                           </p>
5922
   * @param string  $encoding
5923
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5924
   *
5925
   * @return string Returns a sub-string specified by the start and length parameters.
5926
   */
5927
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5928
  {
5929
    // init
5930
    $str = (string)$str;
5931
5932
    if (!isset($str[0])) {
5933
      return '';
5934
    }
5935
5936
    if ($cleanUtf8 === true) {
5937
      // iconv and mbstring are not tolerant to invalid encoding
5938
      // further, their behaviour is inconsistent with that of PHP's substr
5939
5940
      $str = self::clean($str);
5941 1
    }
5942
5943 1
    $str_length = 0;
5944 1
    if ($start || $length === null) {
5945
      $str_length = (int)self::strlen($str);
5946 1
    }
5947
5948
    if ($start && $start > $str_length) {
5949
      return false;
5950
    }
5951
5952
    if ($length === null) {
5953
      $length = $str_length;
5954
    } else {
5955
      $length = (int)$length;
5956
    }
5957
5958
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5959
      self::checkForSupport();
5960
    }
5961
5962 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5963
5964
      // INFO: this is only a fallback for old versions
5965
      if ($encoding === 'UTF-8' || $encoding === true || $encoding === false) {
5966
        $encoding = 'UTF-8';
5967
      } else {
5968
        $encoding = self::normalize_encoding($encoding);
5969
      }
5970
5971 1
      return \mb_substr($str, $start, $length, $encoding);
5972
    }
5973 1
5974 1
    if (self::$support['iconv'] === true) {
5975
      return (string)\grapheme_substr($str, $start, $length);
5976 1
    }
5977 1
5978
    // fallback
5979
5980 1
    // split to array, and remove invalid characters
5981 1
    $array = self::split($str);
5982 1
5983
    // extract relevant part, and join to make sting again
5984 1
    return implode(array_slice($array, $start, $length));
5985 1
  }
5986
5987
  /**
5988 1
   * Binary safe comparison of two strings from an offset, up to length characters.
5989 1
   *
5990
   * @param string  $main_str           The main string being compared.
5991 1
   * @param string  $str                The secondary string being compared.
5992
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5993 1
   *                                    end of the string.
5994
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5995
   *                                    the str compared to the length of main_str less the offset.
5996
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5997
   *
5998
   * @return int
5999
   */
6000
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6001
  {
6002
    $main_str = self::substr($main_str, $offset, $length);
6003
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6002 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6004
6005
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6002 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6003 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6002 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6003 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6006
  }
6007
6008 6
  /**
6009
   * Count the number of substring occurrences
6010 6
   *
6011 1
   * @link  http://php.net/manual/en/function.substr-count.php
6012
   *
6013
   * @param string $haystack <p>
6014 1
   *                         The string to search in
6015 1
   *                         </p>
6016 1
   * @param string $needle   <p>
6017 1
   *                         The substring to search for
6018
   *                         </p>
6019
   * @param int    $offset   [optional] <p>
6020
   *                         The offset where to start counting
6021 1
   *                         </p>
6022 1
   * @param int    $length   [optional] <p>
6023 1
   *                         The maximum length after the specified offset to search for the
6024 1
   *                         substring. It outputs a warning if the offset plus the length is
6025 1
   *                         greater than the haystack length.
6026 1
   *                         </p>
6027 1
   *
6028 1
   * @return int This functions returns an integer.
6029
   */
6030
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
6031
  {
6032 1
    $haystack = (string)$haystack;
6033 1
    $needle = (string)$needle;
6034 1
6035 1
    if (!isset($haystack[0], $needle[0])) {
6036 1
      return false;
6037 1
    }
6038 1
6039 1
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6040
      $offset = (int)$offset;
6041
      $length = (int)$length;
6042 1
6043 1
      if ($length + $offset <= 0) {
6044 1
        return false;
6045 1
      }
6046
6047
      $haystack = self::substr($haystack, $offset, $length);
6048
    }
6049 1
6050
    return \mb_substr_count($haystack, $needle);
6051 6
  }
6052 1
6053 1
  /**
6054 1
   * Replace text within a portion of a string.
6055 1
   *
6056
   * source: https://gist.github.com/stemar/8287074
6057 1
   *
6058
   * @param string|array   $str
6059
   * @param string|array   $replacement
6060 6
   * @param int|array      $start
6061 6
   * @param null|int|array $length
6062
   *
6063 6
   * @return array|string
6064 4
   */
6065
  public static function substr_replace($str, $replacement, $start, $length = null)
6066 4
  {
6067 4
    if (is_array($str)) {
6068
      $num = count($str);
6069 6
6070
      // $replacement
6071 6
      if (is_array($replacement)) {
6072
        $replacement = array_slice($replacement, 0, $num);
6073
      } else {
6074
        $replacement = array_pad(array($replacement), $num, $replacement);
6075
      }
6076
6077
      // $start
6078 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6079
        $start = array_slice($start, 0, $num);
6080
        foreach ($start as &$valueTmp) {
6081
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6082 1
        }
6083
        unset($valueTmp);
6084 1
      } else {
6085
        $start = array_pad(array($start), $num, $start);
6086 1
      }
6087 1
6088
      // $length
6089
      if (!isset($length)) {
6090 1
        $length = array_fill(0, $num, 0);
6091 1 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6092
        $length = array_slice($length, 0, $num);
6093 1
        foreach ($length as &$valueTmpV2) {
6094 1
          if (isset($valueTmpV2)) {
6095
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6096 1
          } else {
6097
            $valueTmpV2 = 0;
6098 1
          }
6099 1
        }
6100
        unset($valueTmpV2);
6101 1
      } else {
6102
        $length = array_pad(array($length), $num, $length);
6103 1
      }
6104
6105 1
      // Recursive call
6106
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6107 1
    } else {
6108
      if (is_array($replacement)) {
6109
        if (count($replacement) > 0) {
6110
          $replacement = $replacement[0];
6111
        } else {
6112
          $replacement = '';
6113
        }
6114
      }
6115
    }
6116
6117
    preg_match_all('/./us', (string)$str, $smatches);
6118
    preg_match_all('/./us', (string)$replacement, $rmatches);
6119
6120 6
    if ($length === null) {
6121
      $length = \mb_strlen($str);
6122 6
    }
6123
6124
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6125
6126
    return implode($smatches[0], null);
6127
  }
6128
6129
  /**
6130
   * Returns a case swapped version of the string.
6131
   *
6132
   * @param string $str
6133
   * @param string $encoding
6134 1
   *
6135
   * @return string each character's case swapped
6136 1
   */
6137
  public static function swapCase($str, $encoding = 'UTF-8')
6138
  {
6139
    $str = (string)$str;
6140
6141
    if (!isset($str[0])) {
6142
      return '';
6143
    }
6144
6145
    if ($encoding !== 'UTF-8') {
6146
      $encoding = self::normalize_encoding($encoding);
6147
    }
6148 1
6149
    $str = self::clean($str);
6150 1
6151
    $strSwappedCase = preg_replace_callback(
6152
        '/[\S]/u',
6153
        function ($match) use ($encoding) {
6154
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6155
6156
          if ($match[0] === $marchToUpper) {
6157
            return UTF8::strtolower($match[0], $encoding);
6158
          } else {
6159
            return $marchToUpper;
6160
          }
6161 13
        },
6162
        $str
6163 13
    );
6164
6165
    return $strSwappedCase;
6166 13
  }
6167
6168 13
  /**
6169 3
   * alias for "UTF8::to_ascii()"
6170
   *
6171
   * @see UTF8::to_ascii()
6172 11
   *
6173
   * @param string $s The input string e.g. a UTF-8 String
6174 11
   * @param string $subst_chr
6175 11
   *
6176
   * @return string
6177
   */
6178
  public static function toAscii($s, $subst_chr = '?')
6179
  {
6180
    return self::to_ascii($s, $subst_chr);
6181
  }
6182
6183
  /**
6184 11
   * alias for "UTF8::to_latin1()"
6185 11
   *
6186 11
   * @see UTF8::to_latin1()
6187
   *
6188 11
   * @param $str
6189
   *
6190 11
   * @return string
6191 11
   */
6192
  public static function toLatin1($str)
6193
  {
6194 5
    return self::to_latin1($str);
6195
  }
6196
6197 5
  /**
6198 5
   * alias for "UTF8::to_utf8()"
6199 5
   *
6200
   * @see UTF8::to_utf8()
6201 5
   *
6202 2
   * @param string $str
6203
   *
6204 2
   * @return string
6205 2
   */
6206 2
  public static function toUTF8($str)
6207
  {
6208 2
    return self::to_utf8($str);
6209 1
  }
6210
6211 1
  /**
6212 1
   * convert to ASCII
6213 1
   *
6214
   * @param string $str     The input string.
6215 1
   * @param string $unknown Character use if character unknown. (default is ?)
6216
   *
6217
   * @return string
6218
   */
6219
  public static function to_ascii($str, $unknown = '?')
6220
  {
6221
    static $UTF8_TO_ASCII;
6222
6223
    // init
6224
    $str = (string)$str;
6225
6226
    if (!isset($str[0])) {
6227
      return '';
6228
    }
6229
6230 1
    $str = self::clean($str);
6231 2
6232
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6233 5
      self::checkForSupport();
6234
    }
6235
6236
    if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6237
      $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6238 5
6239
      // check again, if we only have ASCII, now ...
6240
      if (!preg_match("/[\x80-\xFF]/", $str)) {
6241
        return $str;
6242
      }
6243 5
    }
6244 5
6245 1
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6246 1
    $chars = $ar[0];
6247
    foreach ($chars as &$c) {
6248 1
6249 1
      $ordC0 = ord($c[0]);
6250 1
6251
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6252 1
        continue;
6253
      }
6254 5
6255 5
      $ordC1 = ord($c[1]);
6256 5
6257 5
      // ASCII - next please
6258 1
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6259
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6260 11
      }
6261
6262 11
      if ($ordC0 >= 224) {
6263
        $ordC2 = ord($c[2]);
6264
6265
        if ($ordC0 <= 239) {
6266
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6267
        }
6268
6269
        if ($ordC0 >= 240) {
6270
          $ordC3 = ord($c[3]);
6271
6272
          if ($ordC0 <= 247) {
6273
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6274 1
          }
6275
6276 1
          if ($ordC0 >= 248) {
6277
            $ordC4 = ord($c[4]);
6278
6279 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6280
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6281
            }
6282
6283
            if ($ordC0 >= 252) {
6284
              $ordC5 = ord($c[5]);
6285
6286 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6287
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6288 1
              }
6289
            }
6290 1
          }
6291
        }
6292
      }
6293
6294
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6295
        $c = $unknown;
6296
        continue;
6297
      }
6298
6299
      if (!isset($ord)) {
6300
        $c = $unknown;
6301
        continue;
6302
      }
6303
6304
      $bank = $ord >> 8;
6305
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6306
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6307
        if (file_exists($bankfile)) {
6308
          /** @noinspection PhpIncludeInspection */
6309
          require $bankfile;
6310
        } else {
6311
          $UTF8_TO_ASCII[$bank] = array();
6312
        }
6313
      }
6314 20
6315
      $newchar = $ord & 255;
6316 20
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6317 2
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6318
      } else {
6319 2
        $c = $unknown;
6320 2
      }
6321
    }
6322 2
6323
    return implode('', $chars);
6324
  }
6325 20
6326
  /**
6327 20
   * alias for "UTF8::to_win1252()"
6328 4
   *
6329
   * @see UTF8::to_win1252()
6330
   *
6331 19
   * @param   string $str
6332 19
   *
6333
   * @return  array|string
6334
   */
6335 19
  public static function to_iso8859($str)
6336 19
  {
6337
    return self::to_win1252($str);
6338 19
  }
6339 19
6340 19
  /**
6341 19
   * alias for "UTF8::to_win1252()"
6342
   *
6343 19
   * @see UTF8::to_win1252()
6344
   *
6345 16
   * @param string|array $str
6346 16
   *
6347 16
   * @return string|array
6348 16
   */
6349 5
  public static function to_latin1($str)
6350 5
  {
6351 5
    return self::to_win1252($str);
6352
  }
6353
6354 19
  /**
6355
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6356 17
   *
6357 13
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6358 13
   *
6359 13
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6360 8
   *
6361 8
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6362 8
   *    are followed by any of these:  ("group B")
6363
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6364
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6365 19
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6366
   * is also a valid unicode character, and will be left unchanged.
6367 9
   *
6368 4
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6369 4
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6370 4
   *
6371 6
   * @param string|array $str Any string or array.
6372 6
   *
6373 6
   * @return string The same string, but UTF8 encoded.
6374
   */
6375
  public static function to_utf8($str)
6376 9
  {
6377 6
    if (is_array($str)) {
6378 6
      foreach ($str as $k => $v) {
6379 6
        /** @noinspection AlterInForeachInspection */
6380
        $str[$k] = self::to_utf8($v);
6381
      }
6382 19
6383
      return $str;
6384 4
    }
6385 4
6386 2
    $str = (string)$str;
6387 2
6388 3
    if (!isset($str[0])) {
6389 3
      return $str;
6390 3
    }
6391
6392
    $max = strlen($str);
6393 4
    $buf = '';
6394 16
6395
    /** @noinspection ForeachInvariantsInspection */
6396 19
    for ($i = 0; $i < $max; $i++) {
6397
      $c1 = $str[$i];
6398 19
6399
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6400
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6401 19
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6402 19
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6403
6404 3
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6405 19
6406
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6407 19
            $buf .= $c1 . $c2;
6408
            $i++;
6409
          } else { // not valid UTF8 - convert it
6410 19
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6411 19
            $cc2 = ($c1 & "\x3f") | "\x80";
6412 19
            $buf .= $cc1 . $cc2;
6413 2
          }
6414 19
6415 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6416 19
6417
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6418 19
            $buf .= $c1 . $c2 . $c3;
6419
            $i += 2;
6420
          } else { // not valid UTF8 - convert it
6421
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6422
            $cc2 = ($c1 & "\x3f") | "\x80";
6423
            $buf .= $cc1 . $cc2;
6424
          }
6425
6426
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6427
6428 2 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6429
            $buf .= $c1 . $c2 . $c3 . $c4;
6430 2
            $i += 3;
6431
          } else { // not valid UTF8 - convert it
6432 1
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6433
            $cc2 = ($c1 & "\x3f") | "\x80";
6434 1
            $buf .= $cc1 . $cc2;
6435 1
          }
6436
6437 1
        } else { // doesn't look like UTF8, but should be converted
6438
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6439
          $cc2 = (($c1 & "\x3f") | "\x80");
6440 2
          $buf .= $cc1 . $cc2;
6441
        }
6442 2
6443 1
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6444
6445
        $ordC1 = ord($c1);
6446 2
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6447
          $buf .= self::$win1252ToUtf8[$ordC1];
6448
        } else {
6449
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6450
          $cc2 = (($c1 & "\x3f") | "\x80");
6451
          $buf .= $cc1 . $cc2;
6452
        }
6453
6454
      } else { // it doesn't need conversion
6455
        $buf .= $c1;
6456
      }
6457
    }
6458
6459
    // decode unicode escape sequences
6460
    $buf = preg_replace_callback(
6461
        '/\\\\u([0-9a-f]{4})/i',
6462 26
        function ($match) {
6463
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6464 26
        },
6465
        $buf
6466 26
    );
6467 5
6468
    // decode UTF-8 codepoints
6469
    $buf = preg_replace_callback(
6470
        '/&#\d{2,4};/',
6471 22
        function ($match) {
6472 6
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6473
        },
6474
        $buf
6475 16
    );
6476
6477
    return $buf;
6478
  }
6479
6480
  /**
6481
   * Convert a string into "win1252"-encoding.
6482
   *
6483
   * @param  string|array $str
6484
   *
6485 14
   * @return string|array
6486
   */
6487 14
  private static function to_win1252($str)
6488
  {
6489
    if (is_array($str)) {
6490
6491
      foreach ($str as $k => $v) {
6492
        /** @noinspection AlterInForeachInspection */
6493
        $str[$k] = self::to_win1252($v);
6494
      }
6495
6496
      return $str;
6497
    }
6498
6499 1
    $str = (string)$str;
6500
6501 1
    if (!isset($str[0])) {
6502
      return '';
6503
    }
6504
6505
    return self::utf8_decode($str);
6506
  }
6507
6508
  /**
6509
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6510
   *
6511
   * INFO: This is slower then "trim()"
6512 8
   *
6513
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6514 8
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6515 2
   *
6516
   * @param    string $str   The string to be trimmed
6517
   * @param    string $chars Optional characters to be stripped
6518
   *
6519 7
   * @return   string The trimmed string
6520 7
   */
6521
  public static function trim($str = '', $chars = INF)
6522 7
  {
6523 1
    $str = (string)$str;
6524 1
6525 7
    if (!isset($str[0])) {
6526
      return '';
6527
    }
6528 7
6529
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6530 7
    if ($chars === INF || !$chars) {
6531
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6532
    }
6533
6534 1
    return self::rtrim(self::ltrim($str, $chars), $chars);
6535 1
  }
6536 1
6537 7
  /**
6538 7
   * Makes string's first char uppercase.
6539 7
   *
6540 7
   * @param    string $str The input string
6541 7
   *
6542
   * @return   string The resulting string
6543 7
   */
6544
  public static function ucfirst($str)
6545
  {
6546
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6547
  }
6548
6549
  /**
6550
   * alias for "UTF8::ucfirst()"
6551
   *
6552
   * @see UTF8::ucfirst()
6553
   *
6554
   * @param string $word
6555
   *
6556
   * @return string
6557
   */
6558
  public static function ucword($word)
6559
  {
6560
    return self::ucfirst($word);
6561
  }
6562
6563 1
  /**
6564
   * Uppercase for all words in the string.
6565 1
   *
6566
   * @param  string $str
6567 1
   * @param array   $exceptions
6568 1
   *
6569
   * @return string
6570
   */
6571 1
  public static function ucwords($str, $exceptions = array())
6572
  {
6573 1
    if (!$str) {
6574
      return '';
6575 1
    }
6576 1
6577 1
    // init
6578 1
    $words = explode(' ', $str);
6579
    $newwords = array();
6580 1
6581 1
    if (count($exceptions) > 0) {
6582 1
      $useExceptions = true;
6583
    } else {
6584 1
      $useExceptions = false;
6585
    }
6586
6587
    foreach ($words as $word) {
6588
      if (
6589
          ($useExceptions === false)
6590
          ||
6591
          (
6592 1
              $useExceptions === true
6593
              &&
6594
              !in_array($word, $exceptions, true)
6595
          )
6596
      ) {
6597
        $word = self::ucfirst($word);
6598
      }
6599
      $newwords[] = $word;
6600
    }
6601
6602
    return self::ucfirst(implode(' ', $newwords));
6603
  }
6604
6605
  /**
6606
   * Multi decode html entity & fix urlencoded-win1252-chars.
6607
   *
6608
   * e.g:
6609
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6610
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6611
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6612
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6613
   * 'Düsseldorf'                   => 'Düsseldorf'
6614
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6615
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6616
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6617
   *
6618
   * @param string $str
6619
   *
6620
   * @return string
6621
   */
6622
  public static function urldecode($str)
6623
  {
6624
    $str = (string)$str;
6625
6626
    if (!isset($str[0])) {
6627
      return '';
6628
    }
6629
6630
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6631
6632
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6633
6634
    $str = self::fix_simple_utf8(
6635
        rawurldecode(
6636
            self::html_entity_decode(
6637
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6638
                $flags
6639
            )
6640
        )
6641
    );
6642
6643
    return (string)$str;
6644
  }
6645
6646
  /**
6647
   * Return a array with "urlencoded"-win1252 -> UTF-8
6648
   *
6649
   * @return mixed
6650
   */
6651
  public static function urldecode_fix_win1252_chars()
6652
  {
6653
    static $array = array(
6654
        '%20' => ' ',
6655
        '%21' => '!',
6656
        '%22' => '"',
6657
        '%23' => '#',
6658
        '%24' => '$',
6659
        '%25' => '%',
6660
        '%26' => '&',
6661
        '%27' => "'",
6662
        '%28' => '(',
6663
        '%29' => ')',
6664
        '%2A' => '*',
6665
        '%2B' => '+',
6666
        '%2C' => ',',
6667
        '%2D' => '-',
6668
        '%2E' => '.',
6669
        '%2F' => '/',
6670
        '%30' => '0',
6671
        '%31' => '1',
6672
        '%32' => '2',
6673
        '%33' => '3',
6674
        '%34' => '4',
6675
        '%35' => '5',
6676
        '%36' => '6',
6677
        '%37' => '7',
6678
        '%38' => '8',
6679
        '%39' => '9',
6680
        '%3A' => ':',
6681
        '%3B' => ';',
6682
        '%3C' => '<',
6683
        '%3D' => '=',
6684
        '%3E' => '>',
6685
        '%3F' => '?',
6686
        '%40' => '@',
6687
        '%41' => 'A',
6688
        '%42' => 'B',
6689
        '%43' => 'C',
6690
        '%44' => 'D',
6691
        '%45' => 'E',
6692
        '%46' => 'F',
6693
        '%47' => 'G',
6694
        '%48' => 'H',
6695
        '%49' => 'I',
6696
        '%4A' => 'J',
6697
        '%4B' => 'K',
6698
        '%4C' => 'L',
6699
        '%4D' => 'M',
6700
        '%4E' => 'N',
6701
        '%4F' => 'O',
6702
        '%50' => 'P',
6703
        '%51' => 'Q',
6704
        '%52' => 'R',
6705
        '%53' => 'S',
6706
        '%54' => 'T',
6707
        '%55' => 'U',
6708
        '%56' => 'V',
6709
        '%57' => 'W',
6710
        '%58' => 'X',
6711
        '%59' => 'Y',
6712
        '%5A' => 'Z',
6713
        '%5B' => '[',
6714
        '%5C' => '\\',
6715
        '%5D' => ']',
6716
        '%5E' => '^',
6717
        '%5F' => '_',
6718
        '%60' => '`',
6719
        '%61' => 'a',
6720
        '%62' => 'b',
6721
        '%63' => 'c',
6722
        '%64' => 'd',
6723
        '%65' => 'e',
6724
        '%66' => 'f',
6725
        '%67' => 'g',
6726
        '%68' => 'h',
6727
        '%69' => 'i',
6728
        '%6A' => 'j',
6729
        '%6B' => 'k',
6730
        '%6C' => 'l',
6731
        '%6D' => 'm',
6732
        '%6E' => 'n',
6733
        '%6F' => 'o',
6734
        '%70' => 'p',
6735
        '%71' => 'q',
6736
        '%72' => 'r',
6737
        '%73' => 's',
6738
        '%74' => 't',
6739
        '%75' => 'u',
6740
        '%76' => 'v',
6741
        '%77' => 'w',
6742
        '%78' => 'x',
6743
        '%79' => 'y',
6744
        '%7A' => 'z',
6745
        '%7B' => '{',
6746
        '%7C' => '|',
6747
        '%7D' => '}',
6748
        '%7E' => '~',
6749
        '%7F' => '',
6750
        '%80' => '`',
6751
        '%81' => '',
6752
        '%82' => '‚',
6753
        '%83' => 'ƒ',
6754
        '%84' => '„',
6755
        '%85' => '…',
6756
        '%86' => '†',
6757
        '%87' => '‡',
6758
        '%88' => 'ˆ',
6759
        '%89' => '‰',
6760
        '%8A' => 'Š',
6761
        '%8B' => '‹',
6762
        '%8C' => 'Œ',
6763
        '%8D' => '',
6764
        '%8E' => 'Ž',
6765
        '%8F' => '',
6766
        '%90' => '',
6767
        '%91' => '‘',
6768
        '%92' => '’',
6769
        '%93' => '“',
6770
        '%94' => '”',
6771
        '%95' => '•',
6772
        '%96' => '–',
6773
        '%97' => '—',
6774
        '%98' => '˜',
6775
        '%99' => '™',
6776
        '%9A' => 'š',
6777
        '%9B' => '›',
6778
        '%9C' => 'œ',
6779
        '%9D' => '',
6780
        '%9E' => 'ž',
6781
        '%9F' => 'Ÿ',
6782
        '%A0' => '',
6783
        '%A1' => '¡',
6784
        '%A2' => '¢',
6785
        '%A3' => '£',
6786
        '%A4' => '¤',
6787
        '%A5' => '¥',
6788
        '%A6' => '¦',
6789
        '%A7' => '§',
6790
        '%A8' => '¨',
6791
        '%A9' => '©',
6792
        '%AA' => 'ª',
6793
        '%AB' => '«',
6794
        '%AC' => '¬',
6795
        '%AD' => '',
6796
        '%AE' => '®',
6797
        '%AF' => '¯',
6798
        '%B0' => '°',
6799
        '%B1' => '±',
6800
        '%B2' => '²',
6801
        '%B3' => '³',
6802
        '%B4' => '´',
6803
        '%B5' => 'µ',
6804
        '%B6' => '¶',
6805
        '%B7' => '·',
6806
        '%B8' => '¸',
6807
        '%B9' => '¹',
6808
        '%BA' => 'º',
6809
        '%BB' => '»',
6810
        '%BC' => '¼',
6811
        '%BD' => '½',
6812
        '%BE' => '¾',
6813
        '%BF' => '¿',
6814
        '%C0' => 'À',
6815
        '%C1' => 'Á',
6816
        '%C2' => 'Â',
6817
        '%C3' => 'Ã',
6818
        '%C4' => 'Ä',
6819 1
        '%C5' => 'Å',
6820
        '%C6' => 'Æ',
6821 1
        '%C7' => 'Ç',
6822
        '%C8' => 'È',
6823
        '%C9' => 'É',
6824
        '%CA' => 'Ê',
6825
        '%CB' => 'Ë',
6826
        '%CC' => 'Ì',
6827
        '%CD' => 'Í',
6828
        '%CE' => 'Î',
6829
        '%CF' => 'Ï',
6830
        '%D0' => 'Ð',
6831 6
        '%D1' => 'Ñ',
6832
        '%D2' => 'Ò',
6833 6
        '%D3' => 'Ó',
6834 6
        '%D4' => 'Ô',
6835
        '%D5' => 'Õ',
6836 6
        '%D6' => 'Ö',
6837
        '%D7' => '×',
6838 6
        '%D8' => 'Ø',
6839 3
        '%D9' => 'Ù',
6840
        '%DA' => 'Ú',
6841
        '%DB' => 'Û',
6842
        '%DC' => 'Ü',
6843 6
        '%DD' => 'Ý',
6844
        '%DE' => 'Þ',
6845 6
        '%DF' => 'ß',
6846
        '%E0' => 'à',
6847 6
        '%E1' => 'á',
6848 1
        '%E2' => 'â',
6849 1
        '%E3' => 'ã',
6850 1
        '%E4' => 'ä',
6851
        '%E5' => 'å',
6852 6
        '%E6' => 'æ',
6853
        '%E7' => 'ç',
6854
        '%E8' => 'è',
6855
        '%E9' => 'é',
6856
        '%EA' => 'ê',
6857
        '%EB' => 'ë',
6858
        '%EC' => 'ì',
6859
        '%ED' => 'í',
6860
        '%EE' => 'î',
6861
        '%EF' => 'ï',
6862 6
        '%F0' => 'ð',
6863
        '%F1' => 'ñ',
6864 6
        '%F2' => 'ò',
6865
        '%F3' => 'ó',
6866 6
        '%F4' => 'ô',
6867 6
        '%F5' => 'õ',
6868
        '%F6' => 'ö',
6869
        '%F7' => '÷',
6870 5
        '%F8' => 'ø',
6871 5
        '%F9' => 'ù',
6872
        '%FA' => 'ú',
6873 5
        '%FB' => 'û',
6874 1
        '%FC' => 'ü',
6875 1
        '%FD' => 'ý',
6876 1
        '%FE' => 'þ',
6877
        '%FF' => 'ÿ',
6878 5
    );
6879
6880
    return $array;
6881
  }
6882
6883
  /**
6884
   * Decodes an UTF-8 string to ISO-8859-1.
6885
   *
6886
   * @param string $str
6887
   *
6888
   * @return string
6889
   */
6890
  public static function utf8_decode($str)
6891
  {
6892
    static $utf8ToWin1252Keys = null;
6893
    static $utf8ToWin1252Values = null;
6894
6895
    $str = (string)$str;
6896
6897
    if (!isset($str[0])) {
6898
      return '';
6899
    }
6900
6901
    // init
6902
    $str = self::to_utf8($str);
6903
6904
    if ($utf8ToWin1252Keys === null) {
6905
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6906
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6907
    }
6908
6909
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6910 1
  }
6911
6912 1
  /**
6913
   * Encodes an ISO-8859-1 string to UTF-8.
6914
   *
6915
   * @param string $str
6916
   *
6917
   * @return string
6918
   */
6919
  public static function utf8_encode($str)
6920
  {
6921
    $str = \utf8_encode($str);
6922
6923
    if (false === strpos($str, "\xC2")) {
6924 1
      return $str;
6925
    } else {
6926 1
6927
      static $cp1252ToUtf8Keys = null;
6928 1
      static $cp1252ToUtf8Values = null;
6929 1
6930
      if ($cp1252ToUtf8Keys === null) {
6931
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6932 1
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6933
      }
6934 1
6935 1
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6936
    }
6937
  }
6938 1
6939
  /**
6940
   * fix -> utf8-win1252 chars
6941 1
   *
6942 1
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6943 1
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6944 1
   * See: http://en.wikipedia.org/wiki/Windows-1252
6945 1
   *
6946
   * @deprecated use "UTF8::fix_simple_utf8()"
6947
   *
6948 1
   * @param   string $str
6949
   *
6950
   * @return  string
6951
   */
6952
  public static function utf8_fix_win1252_chars($str)
6953
  {
6954
    return self::fix_simple_utf8($str);
6955
  }
6956
6957
  /**
6958
   * Returns an array with all utf8 whitespace characters.
6959
   *
6960
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6961
   *
6962
   * @author: Derek E. [email protected]
6963
   *
6964
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6965
   *         as defined in above URL
6966
   */
6967
  public static function whitespace_table()
6968
  {
6969
    return self::$whitespaceTable;
6970
  }
6971
6972
  /**
6973
   * Limit the number of words in a string.
6974
   *
6975 9
   * @param  string $str
6976
   * @param  int    $words
6977 9
   * @param  string $strAddOn
6978 9
   *
6979
   * @return string
6980 9
   */
6981 2
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6982
  {
6983
    $str = (string)$str;
6984 8
6985 8
    if (!isset($str[0])) {
6986 8
      return '';
6987
    }
6988 8
6989
    $words = (int)$words;
6990
6991
    if ($words < 1) {
6992 8
      return '';
6993
    }
6994 8
6995
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6996 8
6997 1
    if (
6998 1
        !isset($matches[0])
6999 1
        ||
7000
        self::strlen($str) === self::strlen($matches[0])
7001 8
    ) {
7002 8
      return $str;
7003
    }
7004 8
7005 8
    return self::rtrim($matches[0]) . $strAddOn;
7006 8
  }
7007 8
7008 8
  /**
7009
   * Wraps a string to a given number of characters
7010 8
   *
7011 8
   * @link  http://php.net/manual/en/function.wordwrap.php
7012 8
   *
7013 8
   * @param string $str   <p>
7014
   *                      The input string.
7015 8
   *                      </p>
7016 6
   * @param int    $width [optional] <p>
7017 6
   *                      The column width.
7018 6
   *                      </p>
7019 6
   * @param string $break [optional] <p>
7020
   *                      The line is broken using the optional
7021 6
   *                      break parameter.
7022 3
   *                      </p>
7023 3
   * @param bool   $cut   [optional] <p>
7024
   *                      If the cut is set to true, the string is
7025 6
   *                      always wrapped at or before the specified width. So if you have
7026 6
   *                      a word that is larger than the given width, it is broken apart.
7027
   *                      (See second example).
7028 8
   *                      </p>
7029
   *
7030
   * @return string the given string wrapped at the specified column.
7031
   */
7032
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7033
  {
7034
    $str = (string)$str;
7035
    $break = (string)$break;
7036 1
7037
    if (!isset($str[0], $break[0])) {
7038 1
      return '';
7039
    }
7040
7041
    $w = '';
7042
    $strSplit = explode($break, $str);
7043
    $count = count($strSplit);
7044
7045
    if (1 === $count && '' === $strSplit[0]) {
7046
      return '';
7047
    }
7048
7049
    $chars = array();
7050
    /** @noinspection ForeachInvariantsInspection */
7051
    for ($i = 0; $i < $count; ++$i) {
7052
7053
      if ($i) {
7054
        $chars[] = $break;
7055
        $w .= '#';
7056
      }
7057
7058
      $c = $strSplit[$i];
7059
      unset($strSplit[$i]);
7060
7061
      foreach (self::split($c) as $c) {
7062
        $chars[] = $c;
7063
        $w .= ' ' === $c ? ' ' : '?';
7064
      }
7065
    }
7066
7067
    $strReturn = '';
7068
    $j = 0;
7069
    $b = $i = -1;
7070
    $w = wordwrap($w, $width, '#', $cut);
7071
7072
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7073
      for (++$i; $i < $b; ++$i) {
7074
        $strReturn .= $chars[$j];
7075
        unset($chars[$j++]);
7076
      }
7077
7078
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7079
        unset($chars[$j++]);
7080
      }
7081
7082
      $strReturn .= $break;
7083
    }
7084
7085
    return $strReturn . implode('', $chars);
7086
  }
7087
7088
  /**
7089
   * Returns an array of Unicode White Space characters.
7090
   *
7091
   * @return   array An array with numeric code point as key and White Space Character as value.
7092
   */
7093
  public static function ws()
7094
  {
7095
    return self::$whitespace;
7096
  }
7097
7098
}
7099