Completed
Push — master ( 54b9fe...59c8a4 )
by Lars
10:53
created

UTF8::hasBom()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 4
ccs 0
cts 0
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  protected static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  protected static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  protected static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  protected static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  protected static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  protected static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  protected static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  protected static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790 1
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792 1
      'ISO-IR-230',
793 1
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803 1
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809
    self::checkForSupport();
810
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820
  public static function access($str, $pos)
821
  {
822
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833 2
   */
834
  public static function add_bom_to_string($str)
835 2
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Convert binary into an string.
845
   *
846 1
   * @param mixed $bin 1|0
847
   *
848 1
   * @return string
849
   */
850
  public static function binary_to_str($bin)
851
  {
852
    return pack('H*', base_convert($bin, 2, 16));
853
  }
854
855
  /**
856
   * Returns the UTF-8 Byte Order Mark Character.
857
   *
858
   * @return string UTF-8 Byte Order Mark
859
   */
860
  public static function bom()
861
  {
862
    return "\xEF\xBB\xBF";
863
  }
864
865
  /**
866
   * @alias of UTF8::chr_map()
867
   * @see   UTF8::chr_map()
868
   *
869
   * @param string|array $callback
870
   * @param string       $str
871
   *
872
   * @return array
873
   */
874
  public static function callback($callback, $str)
875
  {
876
    return self::chr_map($callback, $str);
877
  }
878
879
  /**
880
   * Returns an array of all lower and upper case UTF-8 encoded characters.
881
   *
882
   * @return   string An array with lower case chars as keys and upper chars as values.
883
   */
884
  protected static function case_table()
885
  {
886
    static $case = array(
887
888
      // lower => upper
889
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
890
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
891
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
892
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
893
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
894
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
895
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
896
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
897
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
898
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
899
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
900
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
901
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
902
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
903
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
904
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
905
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
906
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
907
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
908
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
909
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
910
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
911
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
912
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
913
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
914
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
915
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
916
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
917
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
918
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
919
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
920
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
921
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
922
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
923
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
924
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
925
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
926
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
927
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
928
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
929
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
930
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
931
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
932
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
933
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
934
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
935
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
936
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
937
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
938
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
939
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
940
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
941
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
942
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
943
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
944
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
945
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
946
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
947
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
948
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
949
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
950
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
951
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
952
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
953
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
954
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
955
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
956
      "\xea\x9e\x87"     => "\xea\x9e\x86",
957
      "\xea\x9e\x85"     => "\xea\x9e\x84",
958
      "\xea\x9e\x83"     => "\xea\x9e\x82",
959
      "\xea\x9e\x81"     => "\xea\x9e\x80",
960
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
961
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
962
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
963
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
964
      "\xea\x9d\xad"     => "\xea\x9d\xac",
965
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
966
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
967
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
968
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
969
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
970
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
971
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
972
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
973
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
974
      "\xea\x9d\x99"     => "\xea\x9d\x98",
975
      "\xea\x9d\x97"     => "\xea\x9d\x96",
976
      "\xea\x9d\x95"     => "\xea\x9d\x94",
977
      "\xea\x9d\x93"     => "\xea\x9d\x92",
978
      "\xea\x9d\x91"     => "\xea\x9d\x90",
979
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
980
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
981
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
982
      "\xea\x9d\x89"     => "\xea\x9d\x88",
983
      "\xea\x9d\x87"     => "\xea\x9d\x86",
984
      "\xea\x9d\x85"     => "\xea\x9d\x84",
985
      "\xea\x9d\x83"     => "\xea\x9d\x82",
986
      "\xea\x9d\x81"     => "\xea\x9d\x80",
987
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
988
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
989
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
990
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
991
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
992
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
993
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
994
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
995
      "\xea\x9c\xad"     => "\xea\x9c\xac",
996
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
997
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
998
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
999
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
1000
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
1001
      "\xea\x9a\x97"     => "\xea\x9a\x96",
1002
      "\xea\x9a\x95"     => "\xea\x9a\x94",
1003
      "\xea\x9a\x93"     => "\xea\x9a\x92",
1004
      "\xea\x9a\x91"     => "\xea\x9a\x90",
1005
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
1006
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
1007
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
1008
      "\xea\x9a\x89"     => "\xea\x9a\x88",
1009
      "\xea\x9a\x87"     => "\xea\x9a\x86",
1010
      "\xea\x9a\x85"     => "\xea\x9a\x84",
1011
      "\xea\x9a\x83"     => "\xea\x9a\x82",
1012
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1013
      "\xea\x99\xad"     => "\xea\x99\xac",
1014
      "\xea\x99\xab"     => "\xea\x99\xaa",
1015
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1016
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1017
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1018
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1019
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1020
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1021
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1022
      "\xea\x99\x99"     => "\xea\x99\x98",
1023
      "\xea\x99\x97"     => "\xea\x99\x96",
1024
      "\xea\x99\x95"     => "\xea\x99\x94",
1025
      "\xea\x99\x93"     => "\xea\x99\x92",
1026
      "\xea\x99\x91"     => "\xea\x99\x90",
1027
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1028
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1029
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1030
      "\xea\x99\x89"     => "\xea\x99\x88",
1031
      "\xea\x99\x87"     => "\xea\x99\x86",
1032
      "\xea\x99\x85"     => "\xea\x99\x84",
1033
      "\xea\x99\x83"     => "\xea\x99\x82",
1034
      "\xea\x99\x81"     => "\xea\x99\x80",
1035
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1036
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1037
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1038
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1039
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1040
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1041
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1042
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1043
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1044
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1045
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1046
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1047
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1048
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1049
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1050
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1051
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1052
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1053
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1054
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1055
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1056
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1057
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1058
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1059
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1060
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1061
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1062
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1063
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1064
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1065
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1066
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1067
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1068
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1069
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1070
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1071
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1072
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1073
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1074
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1075
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1076
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1077
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1078
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1079
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1080
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1081
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1082
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1083
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1084
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1085
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1086
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1087
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1088
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1089
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1090
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1091
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1092
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1093
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1094
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1095
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1096
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1097
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1098
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1099
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1100
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1101
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1102
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1103
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1104
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1105
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1106
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1107
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1108
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1109
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1110
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1111
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1112
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1113
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1114
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1115
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1116
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1117
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1118
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1119
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1120
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1121
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1122
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1123
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1124
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1125
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1126
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1127
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1128
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1129
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1130
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1131
      "\xe2\xb1\xa5"     => "\xc8\xba",
1132
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1133
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1134
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1135
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1136
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1137
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1138
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1139
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1140
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1141
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1142
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1143
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1144
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1145
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1146
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1147
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1148
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1149
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1150
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1151
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1152
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1153
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1154
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1155
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1156
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1157
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1158
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1159
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1160
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1161
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1162
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1163
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1164
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1165
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1166
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1167
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1168
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1169
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1170
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1171
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1172
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1173
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1174
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1175
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1176
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1177
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1178
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1179
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1180
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1181
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1182
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1183
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1184
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1185
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1186
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1187
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1188
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1189
      "\xe1\xbe\xbe"     => "\xce\x99",
1190
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1191
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1192
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1193
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1194
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1195
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1196
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1197
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1198
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1199
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1200
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1201
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1202
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1203
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1204
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1205
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1206
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1207
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1208
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1209
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1210
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1211
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1212
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1213
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1214
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1215
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1216
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1217
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1218
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1219
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1220
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1221
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1222
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1223
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1224
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1225
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1226
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1227
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1228
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1229
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1230
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1231
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1232
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1233
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1234
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1235
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1236
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1237
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1238
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1239
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1240
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1241
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1242
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1243
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1244
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1245
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1246
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1247
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1248
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1249
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1250
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1251
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1252
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1253
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1254
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1255
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1256
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1257
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1258
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1259
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1260
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1261
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1262
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1263
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1264
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1265
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1266
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1267
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1268
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1269
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1270
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1271
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1272
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1273
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1274
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1275
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1276
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1277
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1278
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1279
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1280
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1281
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1282
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1283
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1284
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1285
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1286
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1287
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1288
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1289
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1290
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1291
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1292
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1293
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1294
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1295
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1296
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1297
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1298
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1299
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1300
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1301
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1302
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1303
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1304
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1305
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1306
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1307
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1308
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1309
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1310
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1311
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1312
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1313
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1314
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1315
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1316
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1317
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1318
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1319
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1320
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1321
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1322
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1323
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1324
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1325
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1326
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1327
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1328
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1329
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1330
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1331
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1332
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1333
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1334
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1335
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1336
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1337
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1338
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1339
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1340
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1341
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1342
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1343
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1344
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1345
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1346
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1347
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1348
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1349
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1350
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1351
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1352
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1353
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1354
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1355
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1356
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1357
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1358
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1359
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1360
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1361
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1362
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1363
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1364
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1365
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1366
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1367
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1368
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1369
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1370
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1371
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1372
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1373
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1374
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1375
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1376
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1377
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1378
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1379
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1380
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1381
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1382
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1383
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1384
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1385
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1386
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1387
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1388
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1389
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1390
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1391
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1392
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1393
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1394
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1395
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1396
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1397
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1398
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1399
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1400
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1401
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1402
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1403
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1404
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1405
      "\xd6\x86"         => "\xd5\x96",
1406
      "\xd6\x85"         => "\xd5\x95",
1407
      "\xd6\x84"         => "\xd5\x94",
1408
      "\xd6\x83"         => "\xd5\x93",
1409
      "\xd6\x82"         => "\xd5\x92",
1410
      "\xd6\x81"         => "\xd5\x91",
1411
      "\xd6\x80"         => "\xd5\x90",
1412
      "\xd5\xbf"         => "\xd5\x8f",
1413
      "\xd5\xbe"         => "\xd5\x8e",
1414
      "\xd5\xbd"         => "\xd5\x8d",
1415
      "\xd5\xbc"         => "\xd5\x8c",
1416
      "\xd5\xbb"         => "\xd5\x8b",
1417
      "\xd5\xba"         => "\xd5\x8a",
1418
      "\xd5\xb9"         => "\xd5\x89",
1419
      "\xd5\xb8"         => "\xd5\x88",
1420
      "\xd5\xb7"         => "\xd5\x87",
1421
      "\xd5\xb6"         => "\xd5\x86",
1422
      "\xd5\xb5"         => "\xd5\x85",
1423
      "\xd5\xb4"         => "\xd5\x84",
1424
      "\xd5\xb3"         => "\xd5\x83",
1425
      "\xd5\xb2"         => "\xd5\x82",
1426
      "\xd5\xb1"         => "\xd5\x81",
1427
      "\xd5\xb0"         => "\xd5\x80",
1428
      "\xd5\xaf"         => "\xd4\xbf",
1429
      "\xd5\xae"         => "\xd4\xbe",
1430
      "\xd5\xad"         => "\xd4\xbd",
1431
      "\xd5\xac"         => "\xd4\xbc",
1432
      "\xd5\xab"         => "\xd4\xbb",
1433
      "\xd5\xaa"         => "\xd4\xba",
1434
      "\xd5\xa9"         => "\xd4\xb9",
1435
      "\xd5\xa8"         => "\xd4\xb8",
1436
      "\xd5\xa7"         => "\xd4\xb7",
1437
      "\xd5\xa6"         => "\xd4\xb6",
1438
      "\xd5\xa5"         => "\xd4\xb5",
1439
      "\xd5\xa4"         => "\xd4\xb4",
1440
      "\xd5\xa3"         => "\xd4\xb3",
1441
      "\xd5\xa2"         => "\xd4\xb2",
1442
      "\xd5\xa1"         => "\xd4\xb1",
1443
      "\xd4\xa5"         => "\xd4\xa4",
1444
      "\xd4\xa3"         => "\xd4\xa2",
1445
      "\xd4\xa1"         => "\xd4\xa0",
1446
      "\xd4\x9f"         => "\xd4\x9e",
1447
      "\xd4\x9d"         => "\xd4\x9c",
1448
      "\xd4\x9b"         => "\xd4\x9a",
1449
      "\xd4\x99"         => "\xd4\x98",
1450
      "\xd4\x97"         => "\xd4\x96",
1451
      "\xd4\x95"         => "\xd4\x94",
1452
      "\xd4\x93"         => "\xd4\x92",
1453
      "\xd4\x91"         => "\xd4\x90",
1454
      "\xd4\x8f"         => "\xd4\x8e",
1455
      "\xd4\x8d"         => "\xd4\x8c",
1456
      "\xd4\x8b"         => "\xd4\x8a",
1457
      "\xd4\x89"         => "\xd4\x88",
1458
      "\xd4\x87"         => "\xd4\x86",
1459
      "\xd4\x85"         => "\xd4\x84",
1460
      "\xd4\x83"         => "\xd4\x82",
1461
      "\xd4\x81"         => "\xd4\x80",
1462
      "\xd3\xbf"         => "\xd3\xbe",
1463
      "\xd3\xbd"         => "\xd3\xbc",
1464
      "\xd3\xbb"         => "\xd3\xba",
1465
      "\xd3\xb9"         => "\xd3\xb8",
1466
      "\xd3\xb7"         => "\xd3\xb6",
1467
      "\xd3\xb5"         => "\xd3\xb4",
1468
      "\xd3\xb3"         => "\xd3\xb2",
1469
      "\xd3\xb1"         => "\xd3\xb0",
1470
      "\xd3\xaf"         => "\xd3\xae",
1471
      "\xd3\xad"         => "\xd3\xac",
1472
      "\xd3\xab"         => "\xd3\xaa",
1473
      "\xd3\xa9"         => "\xd3\xa8",
1474
      "\xd3\xa7"         => "\xd3\xa6",
1475
      "\xd3\xa5"         => "\xd3\xa4",
1476
      "\xd3\xa3"         => "\xd3\xa2",
1477
      "\xd3\xa1"         => "\xd3\xa0",
1478
      "\xd3\x9f"         => "\xd3\x9e",
1479
      "\xd3\x9d"         => "\xd3\x9c",
1480
      "\xd3\x9b"         => "\xd3\x9a",
1481
      "\xd3\x99"         => "\xd3\x98",
1482
      "\xd3\x97"         => "\xd3\x96",
1483
      "\xd3\x95"         => "\xd3\x94",
1484
      "\xd3\x93"         => "\xd3\x92",
1485
      "\xd3\x91"         => "\xd3\x90",
1486
      "\xd3\x8f"         => "\xd3\x80",
1487
      "\xd3\x8e"         => "\xd3\x8d",
1488
      "\xd3\x8c"         => "\xd3\x8b",
1489
      "\xd3\x8a"         => "\xd3\x89",
1490
      "\xd3\x88"         => "\xd3\x87",
1491
      "\xd3\x86"         => "\xd3\x85",
1492
      "\xd3\x84"         => "\xd3\x83",
1493
      "\xd3\x82"         => "\xd3\x81",
1494
      "\xd2\xbf"         => "\xd2\xbe",
1495
      "\xd2\xbd"         => "\xd2\xbc",
1496
      "\xd2\xbb"         => "\xd2\xba",
1497
      "\xd2\xb9"         => "\xd2\xb8",
1498
      "\xd2\xb7"         => "\xd2\xb6",
1499
      "\xd2\xb5"         => "\xd2\xb4",
1500
      "\xd2\xb3"         => "\xd2\xb2",
1501
      "\xd2\xb1"         => "\xd2\xb0",
1502
      "\xd2\xaf"         => "\xd2\xae",
1503
      "\xd2\xad"         => "\xd2\xac",
1504
      "\xd2\xab"         => "\xd2\xaa",
1505
      "\xd2\xa9"         => "\xd2\xa8",
1506
      "\xd2\xa7"         => "\xd2\xa6",
1507
      "\xd2\xa5"         => "\xd2\xa4",
1508
      "\xd2\xa3"         => "\xd2\xa2",
1509
      "\xd2\xa1"         => "\xd2\xa0",
1510
      "\xd2\x9f"         => "\xd2\x9e",
1511
      "\xd2\x9d"         => "\xd2\x9c",
1512
      "\xd2\x9b"         => "\xd2\x9a",
1513
      "\xd2\x99"         => "\xd2\x98",
1514
      "\xd2\x97"         => "\xd2\x96",
1515
      "\xd2\x95"         => "\xd2\x94",
1516
      "\xd2\x93"         => "\xd2\x92",
1517
      "\xd2\x91"         => "\xd2\x90",
1518
      "\xd2\x8f"         => "\xd2\x8e",
1519
      "\xd2\x8d"         => "\xd2\x8c",
1520
      "\xd2\x8b"         => "\xd2\x8a",
1521
      "\xd2\x81"         => "\xd2\x80",
1522
      "\xd1\xbf"         => "\xd1\xbe",
1523
      "\xd1\xbd"         => "\xd1\xbc",
1524
      "\xd1\xbb"         => "\xd1\xba",
1525
      "\xd1\xb9"         => "\xd1\xb8",
1526
      "\xd1\xb7"         => "\xd1\xb6",
1527
      "\xd1\xb5"         => "\xd1\xb4",
1528
      "\xd1\xb3"         => "\xd1\xb2",
1529
      "\xd1\xb1"         => "\xd1\xb0",
1530
      "\xd1\xaf"         => "\xd1\xae",
1531
      "\xd1\xad"         => "\xd1\xac",
1532
      "\xd1\xab"         => "\xd1\xaa",
1533
      "\xd1\xa9"         => "\xd1\xa8",
1534
      "\xd1\xa7"         => "\xd1\xa6",
1535
      "\xd1\xa5"         => "\xd1\xa4",
1536
      "\xd1\xa3"         => "\xd1\xa2",
1537
      "\xd1\xa1"         => "\xd1\xa0",
1538
      "\xd1\x9f"         => "\xd0\x8f",
1539
      "\xd1\x9e"         => "\xd0\x8e",
1540
      "\xd1\x9d"         => "\xd0\x8d",
1541
      "\xd1\x9c"         => "\xd0\x8c",
1542
      "\xd1\x9b"         => "\xd0\x8b",
1543
      "\xd1\x9a"         => "\xd0\x8a",
1544
      "\xd1\x99"         => "\xd0\x89",
1545
      "\xd1\x98"         => "\xd0\x88",
1546
      "\xd1\x97"         => "\xd0\x87",
1547
      "\xd1\x96"         => "\xd0\x86",
1548
      "\xd1\x95"         => "\xd0\x85",
1549
      "\xd1\x94"         => "\xd0\x84",
1550
      "\xd1\x93"         => "\xd0\x83",
1551
      "\xd1\x92"         => "\xd0\x82",
1552
      "\xd1\x91"         => "\xd0\x81",
1553
      "\xd1\x90"         => "\xd0\x80",
1554
      "\xd1\x8f"         => "\xd0\xaf",
1555
      "\xd1\x8e"         => "\xd0\xae",
1556
      "\xd1\x8d"         => "\xd0\xad",
1557
      "\xd1\x8c"         => "\xd0\xac",
1558
      "\xd1\x8b"         => "\xd0\xab",
1559
      "\xd1\x8a"         => "\xd0\xaa",
1560
      "\xd1\x89"         => "\xd0\xa9",
1561
      "\xd1\x88"         => "\xd0\xa8",
1562
      "\xd1\x87"         => "\xd0\xa7",
1563
      "\xd1\x86"         => "\xd0\xa6",
1564
      "\xd1\x85"         => "\xd0\xa5",
1565
      "\xd1\x84"         => "\xd0\xa4",
1566
      "\xd1\x83"         => "\xd0\xa3",
1567
      "\xd1\x82"         => "\xd0\xa2",
1568
      "\xd1\x81"         => "\xd0\xa1",
1569
      "\xd1\x80"         => "\xd0\xa0",
1570
      "\xd0\xbf"         => "\xd0\x9f",
1571
      "\xd0\xbe"         => "\xd0\x9e",
1572
      "\xd0\xbd"         => "\xd0\x9d",
1573
      "\xd0\xbc"         => "\xd0\x9c",
1574
      "\xd0\xbb"         => "\xd0\x9b",
1575
      "\xd0\xba"         => "\xd0\x9a",
1576
      "\xd0\xb9"         => "\xd0\x99",
1577
      "\xd0\xb8"         => "\xd0\x98",
1578
      "\xd0\xb7"         => "\xd0\x97",
1579
      "\xd0\xb6"         => "\xd0\x96",
1580
      "\xd0\xb5"         => "\xd0\x95",
1581
      "\xd0\xb4"         => "\xd0\x94",
1582
      "\xd0\xb3"         => "\xd0\x93",
1583
      "\xd0\xb2"         => "\xd0\x92",
1584
      "\xd0\xb1"         => "\xd0\x91",
1585
      "\xd0\xb0"         => "\xd0\x90",
1586
      "\xcf\xbb"         => "\xcf\xba",
1587
      "\xcf\xb8"         => "\xcf\xb7",
1588
      "\xcf\xb5"         => "\xce\x95",
1589
      "\xcf\xb2"         => "\xcf\xb9",
1590
      "\xcf\xb1"         => "\xce\xa1",
1591
      "\xcf\xb0"         => "\xce\x9a",
1592
      "\xcf\xaf"         => "\xcf\xae",
1593
      "\xcf\xad"         => "\xcf\xac",
1594
      "\xcf\xab"         => "\xcf\xaa",
1595
      "\xcf\xa9"         => "\xcf\xa8",
1596
      "\xcf\xa7"         => "\xcf\xa6",
1597
      "\xcf\xa5"         => "\xcf\xa4",
1598
      "\xcf\xa3"         => "\xcf\xa2",
1599
      "\xcf\xa1"         => "\xcf\xa0",
1600
      "\xcf\x9f"         => "\xcf\x9e",
1601
      "\xcf\x9d"         => "\xcf\x9c",
1602
      "\xcf\x9b"         => "\xcf\x9a",
1603
      "\xcf\x99"         => "\xcf\x98",
1604
      "\xcf\x97"         => "\xcf\x8f",
1605
      "\xcf\x96"         => "\xce\xa0",
1606
      "\xcf\x95"         => "\xce\xa6",
1607
      "\xcf\x91"         => "\xce\x98",
1608
      "\xcf\x90"         => "\xce\x92",
1609
      "\xcf\x8e"         => "\xce\x8f",
1610
      "\xcf\x8d"         => "\xce\x8e",
1611
      "\xcf\x8c"         => "\xce\x8c",
1612
      "\xcf\x8b"         => "\xce\xab",
1613
      "\xcf\x8a"         => "\xce\xaa",
1614
      "\xcf\x89"         => "\xce\xa9",
1615
      "\xcf\x88"         => "\xce\xa8",
1616
      "\xcf\x87"         => "\xce\xa7",
1617
      "\xcf\x86"         => "\xce\xa6",
1618
      "\xcf\x85"         => "\xce\xa5",
1619
      "\xcf\x84"         => "\xce\xa4",
1620
      "\xcf\x83"         => "\xce\xa3",
1621
      "\xcf\x82"         => "\xce\xa3",
1622
      "\xcf\x81"         => "\xce\xa1",
1623
      "\xcf\x80"         => "\xce\xa0",
1624
      "\xce\xbf"         => "\xce\x9f",
1625
      "\xce\xbe"         => "\xce\x9e",
1626
      "\xce\xbd"         => "\xce\x9d",
1627
      "\xce\xbc"         => "\xce\x9c",
1628
      "\xce\xbb"         => "\xce\x9b",
1629
      "\xce\xba"         => "\xce\x9a",
1630
      "\xce\xb9"         => "\xce\x99",
1631
      "\xce\xb8"         => "\xce\x98",
1632
      "\xce\xb7"         => "\xce\x97",
1633
      "\xce\xb6"         => "\xce\x96",
1634
      "\xce\xb5"         => "\xce\x95",
1635
      "\xce\xb4"         => "\xce\x94",
1636
      "\xce\xb3"         => "\xce\x93",
1637
      "\xce\xb2"         => "\xce\x92",
1638
      "\xce\xb1"         => "\xce\x91",
1639
      "\xce\xaf"         => "\xce\x8a",
1640
      "\xce\xae"         => "\xce\x89",
1641
      "\xce\xad"         => "\xce\x88",
1642
      "\xce\xac"         => "\xce\x86",
1643
      "\xcd\xbd"         => "\xcf\xbf",
1644
      "\xcd\xbc"         => "\xcf\xbe",
1645
      "\xcd\xbb"         => "\xcf\xbd",
1646
      "\xcd\xb7"         => "\xcd\xb6",
1647
      "\xcd\xb3"         => "\xcd\xb2",
1648
      "\xcd\xb1"         => "\xcd\xb0",
1649
      "\xca\x92"         => "\xc6\xb7",
1650
      "\xca\x8c"         => "\xc9\x85",
1651
      "\xca\x8b"         => "\xc6\xb2",
1652
      "\xca\x8a"         => "\xc6\xb1",
1653
      "\xca\x89"         => "\xc9\x84",
1654
      "\xca\x88"         => "\xc6\xae",
1655
      "\xca\x83"         => "\xc6\xa9",
1656
      "\xca\x80"         => "\xc6\xa6",
1657
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1658
      "\xc9\xb5"         => "\xc6\x9f",
1659
      "\xc9\xb2"         => "\xc6\x9d",
1660
      "\xc9\xb1"         => "\xe2\xb1\xae",
1661
      "\xc9\xaf"         => "\xc6\x9c",
1662
      "\xc9\xab"         => "\xe2\xb1\xa2",
1663
      "\xc9\xa9"         => "\xc6\x96",
1664
      "\xc9\xa8"         => "\xc6\x97",
1665
      "\xc9\xa5"         => "\xea\x9e\x8d",
1666
      "\xc9\xa3"         => "\xc6\x94",
1667
      "\xc9\xa0"         => "\xc6\x93",
1668
      "\xc9\x9b"         => "\xc6\x90",
1669
      "\xc9\x99"         => "\xc6\x8f",
1670
      "\xc9\x97"         => "\xc6\x8a",
1671
      "\xc9\x96"         => "\xc6\x89",
1672
      "\xc9\x94"         => "\xc6\x86",
1673
      "\xc9\x93"         => "\xc6\x81",
1674
      "\xc9\x92"         => "\xe2\xb1\xb0",
1675
      "\xc9\x91"         => "\xe2\xb1\xad",
1676
      "\xc9\x90"         => "\xe2\xb1\xaf",
1677
      "\xc9\x8f"         => "\xc9\x8e",
1678
      "\xc9\x8d"         => "\xc9\x8c",
1679
      "\xc9\x8b"         => "\xc9\x8a",
1680
      "\xc9\x89"         => "\xc9\x88",
1681
      "\xc9\x87"         => "\xc9\x86",
1682
      "\xc9\x82"         => "\xc9\x81",
1683
      "\xc9\x80"         => "\xe2\xb1\xbf",
1684
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1685
      "\xc8\xbc"         => "\xc8\xbb",
1686
      "\xc8\xb3"         => "\xc8\xb2",
1687
      "\xc8\xb1"         => "\xc8\xb0",
1688
      "\xc8\xaf"         => "\xc8\xae",
1689
      "\xc8\xad"         => "\xc8\xac",
1690
      "\xc8\xab"         => "\xc8\xaa",
1691
      "\xc8\xa9"         => "\xc8\xa8",
1692
      "\xc8\xa7"         => "\xc8\xa6",
1693
      "\xc8\xa5"         => "\xc8\xa4",
1694
      "\xc8\xa3"         => "\xc8\xa2",
1695
      "\xc8\x9f"         => "\xc8\x9e",
1696
      "\xc8\x9d"         => "\xc8\x9c",
1697
      "\xc8\x9b"         => "\xc8\x9a",
1698
      "\xc8\x99"         => "\xc8\x98",
1699
      "\xc8\x97"         => "\xc8\x96",
1700
      "\xc8\x95"         => "\xc8\x94",
1701
      "\xc8\x93"         => "\xc8\x92",
1702
      "\xc8\x91"         => "\xc8\x90",
1703
      "\xc8\x8f"         => "\xc8\x8e",
1704
      "\xc8\x8d"         => "\xc8\x8c",
1705
      "\xc8\x8b"         => "\xc8\x8a",
1706
      "\xc8\x89"         => "\xc8\x88",
1707
      "\xc8\x87"         => "\xc8\x86",
1708
      "\xc8\x85"         => "\xc8\x84",
1709
      "\xc8\x83"         => "\xc8\x82",
1710
      "\xc8\x81"         => "\xc8\x80",
1711
      "\xc7\xbf"         => "\xc7\xbe",
1712
      "\xc7\xbd"         => "\xc7\xbc",
1713
      "\xc7\xbb"         => "\xc7\xba",
1714
      "\xc7\xb9"         => "\xc7\xb8",
1715
      "\xc7\xb5"         => "\xc7\xb4",
1716
      "\xc7\xb3"         => "\xc7\xb2",
1717
      "\xc7\xaf"         => "\xc7\xae",
1718
      "\xc7\xad"         => "\xc7\xac",
1719
      "\xc7\xab"         => "\xc7\xaa",
1720
      "\xc7\xa9"         => "\xc7\xa8",
1721
      "\xc7\xa7"         => "\xc7\xa6",
1722
      "\xc7\xa5"         => "\xc7\xa4",
1723
      "\xc7\xa3"         => "\xc7\xa2",
1724
      "\xc7\xa1"         => "\xc7\xa0",
1725
      "\xc7\x9f"         => "\xc7\x9e",
1726
      "\xc7\x9d"         => "\xc6\x8e",
1727
      "\xc7\x9c"         => "\xc7\x9b",
1728
      "\xc7\x9a"         => "\xc7\x99",
1729
      "\xc7\x98"         => "\xc7\x97",
1730
      "\xc7\x96"         => "\xc7\x95",
1731
      "\xc7\x94"         => "\xc7\x93",
1732
      "\xc7\x92"         => "\xc7\x91",
1733
      "\xc7\x90"         => "\xc7\x8f",
1734
      "\xc7\x8e"         => "\xc7\x8d",
1735
      "\xc7\x8c"         => "\xc7\x8b",
1736
      "\xc7\x89"         => "\xc7\x88",
1737
      "\xc7\x86"         => "\xc7\x85",
1738
      "\xc6\xbf"         => "\xc7\xb7",
1739
      "\xc6\xbd"         => "\xc6\xbc",
1740
      "\xc6\xb9"         => "\xc6\xb8",
1741
      "\xc6\xb6"         => "\xc6\xb5",
1742
      "\xc6\xb4"         => "\xc6\xb3",
1743
      "\xc6\xb0"         => "\xc6\xaf",
1744
      "\xc6\xad"         => "\xc6\xac",
1745
      "\xc6\xa8"         => "\xc6\xa7",
1746
      "\xc6\xa5"         => "\xc6\xa4",
1747
      "\xc6\xa3"         => "\xc6\xa2",
1748
      "\xc6\xa1"         => "\xc6\xa0",
1749
      "\xc6\x9e"         => "\xc8\xa0",
1750
      "\xc6\x9a"         => "\xc8\xbd",
1751
      "\xc6\x99"         => "\xc6\x98",
1752
      "\xc6\x95"         => "\xc7\xb6",
1753
      "\xc6\x92"         => "\xc6\x91",
1754
      "\xc6\x8c"         => "\xc6\x8b",
1755
      "\xc6\x88"         => "\xc6\x87",
1756
      "\xc6\x85"         => "\xc6\x84",
1757
      "\xc6\x83"         => "\xc6\x82",
1758
      "\xc6\x80"         => "\xc9\x83",
1759
      "\xc5\xbf"         => "\x53",
1760
      "\xc5\xbe"         => "\xc5\xbd",
1761
      "\xc5\xbc"         => "\xc5\xbb",
1762
      "\xc5\xba"         => "\xc5\xb9",
1763
      "\xc5\xb7"         => "\xc5\xb6",
1764
      "\xc5\xb5"         => "\xc5\xb4",
1765
      "\xc5\xb3"         => "\xc5\xb2",
1766
      "\xc5\xb1"         => "\xc5\xb0",
1767
      "\xc5\xaf"         => "\xc5\xae",
1768
      "\xc5\xad"         => "\xc5\xac",
1769
      "\xc5\xab"         => "\xc5\xaa",
1770
      "\xc5\xa9"         => "\xc5\xa8",
1771
      "\xc5\xa7"         => "\xc5\xa6",
1772
      "\xc5\xa5"         => "\xc5\xa4",
1773
      "\xc5\xa3"         => "\xc5\xa2",
1774
      "\xc5\xa1"         => "\xc5\xa0",
1775
      "\xc5\x9f"         => "\xc5\x9e",
1776
      "\xc5\x9d"         => "\xc5\x9c",
1777
      "\xc5\x9b"         => "\xc5\x9a",
1778
      "\xc5\x99"         => "\xc5\x98",
1779
      "\xc5\x97"         => "\xc5\x96",
1780
      "\xc5\x95"         => "\xc5\x94",
1781
      "\xc5\x93"         => "\xc5\x92",
1782
      "\xc5\x91"         => "\xc5\x90",
1783
      "\xc5\x8f"         => "\xc5\x8e",
1784
      "\xc5\x8d"         => "\xc5\x8c",
1785
      "\xc5\x8b"         => "\xc5\x8a",
1786
      "\xc5\x88"         => "\xc5\x87",
1787
      "\xc5\x86"         => "\xc5\x85",
1788
      "\xc5\x84"         => "\xc5\x83",
1789
      "\xc5\x82"         => "\xc5\x81",
1790
      "\xc5\x80"         => "\xc4\xbf",
1791
      "\xc4\xbe"         => "\xc4\xbd",
1792
      "\xc4\xbc"         => "\xc4\xbb",
1793
      "\xc4\xba"         => "\xc4\xb9",
1794
      "\xc4\xb7"         => "\xc4\xb6",
1795
      "\xc4\xb5"         => "\xc4\xb4",
1796
      "\xc4\xb3"         => "\xc4\xb2",
1797
      "\xc4\xb1"         => "\x49",
1798
      "\xc4\xaf"         => "\xc4\xae",
1799
      "\xc4\xad"         => "\xc4\xac",
1800
      "\xc4\xab"         => "\xc4\xaa",
1801
      "\xc4\xa9"         => "\xc4\xa8",
1802
      "\xc4\xa7"         => "\xc4\xa6",
1803
      "\xc4\xa5"         => "\xc4\xa4",
1804
      "\xc4\xa3"         => "\xc4\xa2",
1805
      "\xc4\xa1"         => "\xc4\xa0",
1806
      "\xc4\x9f"         => "\xc4\x9e",
1807
      "\xc4\x9d"         => "\xc4\x9c",
1808
      "\xc4\x9b"         => "\xc4\x9a",
1809
      "\xc4\x99"         => "\xc4\x98",
1810
      "\xc4\x97"         => "\xc4\x96",
1811
      "\xc4\x95"         => "\xc4\x94",
1812
      "\xc4\x93"         => "\xc4\x92",
1813
      "\xc4\x91"         => "\xc4\x90",
1814
      "\xc4\x8f"         => "\xc4\x8e",
1815
      "\xc4\x8d"         => "\xc4\x8c",
1816
      "\xc4\x8b"         => "\xc4\x8a",
1817
      "\xc4\x89"         => "\xc4\x88",
1818
      "\xc4\x87"         => "\xc4\x86",
1819
      "\xc4\x85"         => "\xc4\x84",
1820
      "\xc4\x83"         => "\xc4\x82",
1821
      "\xc4\x81"         => "\xc4\x80",
1822
      "\xc3\xbf"         => "\xc5\xb8",
1823
      "\xc3\xbe"         => "\xc3\x9e",
1824
      "\xc3\xbd"         => "\xc3\x9d",
1825
      "\xc3\xbc"         => "\xc3\x9c",
1826
      "\xc3\xbb"         => "\xc3\x9b",
1827
      "\xc3\xba"         => "\xc3\x9a",
1828
      "\xc3\xb9"         => "\xc3\x99",
1829
      "\xc3\xb8"         => "\xc3\x98",
1830
      "\xc3\xb6"         => "\xc3\x96",
1831
      "\xc3\xb5"         => "\xc3\x95",
1832
      "\xc3\xb4"         => "\xc3\x94",
1833
      "\xc3\xb3"         => "\xc3\x93",
1834
      "\xc3\xb2"         => "\xc3\x92",
1835
      "\xc3\xb1"         => "\xc3\x91",
1836
      "\xc3\xb0"         => "\xc3\x90",
1837
      "\xc3\xaf"         => "\xc3\x8f",
1838
      "\xc3\xae"         => "\xc3\x8e",
1839
      "\xc3\xad"         => "\xc3\x8d",
1840
      "\xc3\xac"         => "\xc3\x8c",
1841
      "\xc3\xab"         => "\xc3\x8b",
1842
      "\xc3\xaa"         => "\xc3\x8a",
1843
      "\xc3\xa9"         => "\xc3\x89",
1844
      "\xc3\xa8"         => "\xc3\x88",
1845
      "\xc3\xa7"         => "\xc3\x87",
1846
      "\xc3\xa6"         => "\xc3\x86",
1847
      "\xc3\xa5"         => "\xc3\x85",
1848
      "\xc3\xa4"         => "\xc3\x84",
1849
      "\xc3\xa3"         => "\xc3\x83",
1850
      "\xc3\xa2"         => "\xc3\x82",
1851
      "\xc3\xa1"         => "\xc3\x81",
1852
      "\xc3\xa0"         => "\xc3\x80",
1853
      "\xc2\xb5"         => "\xce\x9c",
1854
      "\x7a"             => "\x5a",
1855
      "\x79"             => "\x59",
1856
      "\x78"             => "\x58",
1857
      "\x77"             => "\x57",
1858
      "\x76"             => "\x56",
1859
      "\x75"             => "\x55",
1860
      "\x74"             => "\x54",
1861 157
      "\x73"             => "\x53",
1862
      "\x72"             => "\x52",
1863 157
      "\x71"             => "\x51",
1864
      "\x70"             => "\x50",
1865 1
      "\x6f"             => "\x4f",
1866 1
      "\x6e"             => "\x4e",
1867 1
      "\x6d"             => "\x4d",
1868 1
      "\x6c"             => "\x4c",
1869 1
      "\x6b"             => "\x4b",
1870 157
      "\x6a"             => "\x4a",
1871
      "\x69"             => "\x49",
1872
      "\x68"             => "\x48",
1873
      "\x67"             => "\x47",
1874
      "\x66"             => "\x46",
1875
      "\x65"             => "\x45",
1876
      "\x64"             => "\x44",
1877
      "\x63"             => "\x43",
1878
      "\x62"             => "\x42",
1879 8
      "\x61"             => "\x41",
1880
1881 8
    );
1882
1883 8
    return $case;
1884
  }
1885
1886
  /**
1887
   * This method will auto-detect your server environment for UTF-8 support.
1888
   *
1889
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1890 8
   */
1891
  public static function checkForSupport()
1892
  {
1893
    if (!isset(self::$support['mbstring'])) {
1894
1895
      self::$support['mbstring'] = self::mbstring_loaded();
1896
      self::$support['iconv'] = self::iconv_loaded();
1897
      self::$support['intl'] = self::intl_loaded();
1898
      self::$support['intlChar'] = self::intlChar_loaded();
1899
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1900
    }
1901
  }
1902 1
1903
  /**
1904 1
   * Generates a UTF-8 encoded character from the given code point.
1905
   *
1906 1
   * INFO: opposite to UTF8::ord()
1907
   *
1908
   * @param    int $code_point The code point for which to generate a character.
1909
   *
1910
   * @return   string|null Multi-Byte character, returns null on failure to encode.
1911
   */
1912
  public static function chr($code_point)
1913
  {
1914
    self::checkForSupport();
1915
1916
    $i = (int)$code_point;
1917
1918
    if (self::$support['intlChar'] === true) {
1919
      return \IntlChar::chr($code_point);
1920
    }
1921 2
1922
    if ($i !== $code_point) {
1923 2
      $i = self::hex_to_int($code_point);
1924 2
    }
1925
1926
    if (!$i) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $i of type integer|false is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1927 2
      return null;
1928
    }
1929
1930
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1931
  }
1932
1933
  /**
1934
   * Applies callback to all characters of a string.
1935
   *
1936
   * @param  string|array $callback The callback function.
1937 2
   * @param  string       $str      UTF-8 string to run callback on.
1938
   *
1939 2
   * @return array The outcome of callback.
1940 2
   */
1941 2
  public static function chr_map($callback, $str)
1942
  {
1943 2
    $chars = self::split($str);
1944
1945 2
    return array_map($callback, $chars);
1946
  }
1947
1948 2
  /**
1949
   * Generates an array of byte length of each character of a Unicode string.
1950 2
   *
1951 2
   * 1 byte => U+0000  - U+007F
1952 2
   * 2 byte => U+0080  - U+07FF
1953
   * 3 byte => U+0800  - U+FFFF
1954 1
   * 4 byte => U+10000 - U+10FFFF
1955 1
   *
1956 1
   * @param    string $str The original Unicode string.
1957
   *
1958
   * @return   array An array of byte lengths of each character.
1959
   */
1960
  public static function chr_size_list($str)
1961
  {
1962 2
    if (!$str) {
1963
      return array();
1964 2
    }
1965 2
1966
    return array_map('strlen', self::split($str));
1967 2
  }
1968
1969
  /**
1970
   * Get a decimal code representation of a specific character.
1971
   *
1972
   * @param   string $char The input character
1973
   *
1974
   * @return  int
1975
   */
1976
  public static function chr_to_decimal($char)
1977
  {
1978
    $char = (string)$char;
1979
    $code = self::ord($char[0]);
1980
    $bytes = 1;
1981
1982
    if (!($code & 0x80)) {
1983
      // 0xxxxxxx
1984
      return $code;
1985
    }
1986
1987
    if (($code & 0xe0) === 0xc0) {
1988
      // 110xxxxx
1989
      $bytes = 2;
1990
      $code &= ~0xc0;
1991
    } elseif (($code & 0xf0) === 0xe0) {
1992
      // 1110xxxx
1993 1
      $bytes = 3;
1994
      $code &= ~0xe0;
1995 1
    } elseif (($code & 0xf8) === 0xf0) {
1996
      // 11110xxx
1997
      $bytes = 4;
1998
      $code &= ~0xf0;
1999
    }
2000
2001
    for ($i = 2; $i <= $bytes; $i++) {
2002
      // 10xxxxxx
2003
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
2004
    }
2005
2006
    return $code;
2007
  }
2008
2009 35
  /**
2010
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
2011
   *
2012
   * @param    string $char The input character
2013
   * @param    string $pfix
2014
   *
2015
   * @return   string The code point encoded as U+xxxx
2016
   */
2017
  public static function chr_to_hex($char, $pfix = 'U+')
2018
  {
2019
    return self::int_to_hex(self::ord($char), $pfix);
2020
  }
2021
2022
  /**
2023
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2024 35
   *
2025 35
   * @param    string $body     The original string to be split.
2026
   * @param    int    $chunklen The maximum character length of a chunk.
2027 35
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2028 35
   *
2029
   * @return   string The chunked string
2030 35
   */
2031 7
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2032 7
  {
2033
    return implode($end, self::split($body, $chunklen));
2034 35
  }
2035 1
2036 1
  /**
2037
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2038 35
   *
2039 4
   * @param string $str                     The string to be sanitized.
2040 4
   * @param bool   $remove_bom
2041
   * @param bool   $normalize_whitespace
2042 35
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2043
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2044
   *
2045
   * @return string Clean UTF-8 encoded string
2046
   */
2047
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2048
  {
2049
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2050
    // caused connection reset problem on larger strings
2051
2052 3
    $regx = '/
2053
      (
2054 3
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2055
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2056 3
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2057 1
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2058
        ){1,100}                      # ...one or more times
2059
      )
2060
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2061 3
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2062
    /x';
2063
    $str = preg_replace($regx, '$1', $str);
2064
2065
    $str = self::replace_diamond_question_mark($str, '');
2066
    $str = self::remove_invisible_characters($str);
2067
2068 3
    if ($normalize_whitespace === true) {
2069
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2070 3
    }
2071
2072
    if ($normalize_msword === true) {
2073
      $str = self::normalize_msword($str);
2074
    }
2075
2076
    if ($remove_bom === true) {
2077
      $str = self::removeBOM($str);
2078
    }
2079
2080
    return $str;
2081
  }
2082 3
2083
  /**
2084 3
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2085 3
   *
2086 3
   * @param string $str
2087
   *
2088 3
   * @return string
2089
   */
2090 3
  public static function cleanup($str)
2091 3
  {
2092 3
    $str = (string)$str;
2093
2094 3
    if (!isset($str[0])) {
2095
      return '';
2096 3
    }
2097
2098
    // fixed ISO <-> UTF-8 Errors
2099
    $str = self::fix_simple_utf8($str);
2100
2101
    // remove all none UTF-8 symbols
2102
    // && remove diamond question mark (�)
2103
    // && remove remove invisible characters (e.g. "\0")
2104
    // && remove BOM
2105
    // && normalize whitespace chars (but keep non-breaking-spaces)
2106 3
    $str = self::clean($str, true, true, false, true);
2107
2108
    return (string)$str;
2109
  }
2110
2111
  /**
2112
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2113
   *
2114
   * INFO: opposite to UTF8::string()
2115
   *
2116
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2117 3
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2118
   *                                    default, code points will be returned as integers.
2119 3
   *
2120
   * @return   array The array of code points
2121 3
   */
2122
  public static function codepoints($arg, $u_style = false)
2123 3
  {
2124
    if (is_string($arg)) {
2125
      $arg = self::split($arg);
2126
    }
2127
2128
    $arg = array_map(
2129
        array(
2130
            '\\voku\\helper\\UTF8',
2131
            'ord',
2132
        ),
2133 1
        $arg
2134
    );
2135 1
2136
    if ($u_style) {
2137 1
      $arg = array_map(
2138 1
          array(
2139 1
              '\\voku\\helper\\UTF8',
2140
              'int_to_hex',
2141 1
          ),
2142
          $arg
2143
      );
2144
    }
2145
2146
    return $arg;
2147
  }
2148
2149
  /**
2150
   * Returns count of characters used in a string.
2151
   *
2152
   * @param    string $str       The input string.
2153
   * @param    bool   $cleanUtf8 Clean non UTF-8 chars from the string.
2154
   *
2155 11
   * @return   array An associative array of Character as keys and
2156
   *           their count as values.
2157 11
   */
2158
  public static function count_chars($str, $cleanUtf8 = false)
2159 11
  {
2160 11
    return array_count_values(self::split($str, 1, $cleanUtf8));
2161
  }
2162
2163 1
  /**
2164 1
   * Get a UTF-8 character from its decimal code representation.
2165
   *
2166
   * @param   int $code Code.
2167
   *
2168
   * @return  string
2169
   */
2170
  public static function decimal_to_chr($code)
2171
  {
2172
    self::checkForSupport();
2173
2174
    return \mb_convert_encoding(
2175
        '&#x' . dechex($code) . ';',
2176
        'UTF-8',
2177
        'HTML-ENTITIES'
2178
    );
2179
  }
2180
2181
  /**
2182
   * Encode a string with a new charset-encoding.
2183
   *
2184
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2185
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2186
   *
2187
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2188
   * @param string $str      the string
2189
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2190
   *                         otherwise we auto-detect the current string-encoding
2191
   *
2192
   * @return string
2193
   */
2194
  public static function encode($encoding, $str, $force = true)
2195
  {
2196
    $str = (string)$str;
2197
    $encoding = (string)$encoding;
2198
2199
    if (!isset($str[0], $encoding[0])) {
2200
      return $str;
2201
    }
2202
2203
    $encoding = self::normalizeEncoding($encoding);
2204
    $encodingDetected = self::str_detect_encoding($str);
2205
2206
    if (
2207
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2208
        &&
2209
        (
2210
            $force === true
2211
            ||
2212
            $encodingDetected !== $encoding
2213
        )
2214
    ) {
2215
      self::checkForSupport();
2216
2217 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2218
          $encoding === 'UTF-8'
2219
          &&
2220
          (
2221
              $force === true
2222
              || $encodingDetected === 'UTF-8'
2223
              || $encodingDetected === 'WINDOWS-1252'
2224
              || $encodingDetected === 'ISO-8859-1'
2225
          )
2226
      ) {
2227
        return self::to_utf8($str);
2228
      }
2229
2230 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2231
          $encoding === 'ISO-8859-1'
2232
          &&
2233
          (
2234
              $force === true
2235
              || $encodingDetected === 'ISO-8859-1'
2236
              || $encodingDetected === 'UTF-8'
2237
          )
2238
      ) {
2239
        return self::to_win1252($str);
2240
      }
2241
2242
      $strEncoded = \mb_convert_encoding(
2243
          $str,
2244
          $encoding,
2245
          $encodingDetected
2246
      );
2247
2248
      if ($strEncoded) {
2249
        return $strEncoded;
2250
      }
2251
    }
2252 2
2253
    return $str;
2254
  }
2255 2
2256 2
  /**
2257
   * Reads entire file into a string.
2258 2
   *
2259 2
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2260
   *
2261
   * @link http://php.net/manual/en/function.file-get-contents.php
2262
   *
2263 2
   * @param string        $filename      <p>
2264 2
   *                                     Name of the file to read.
2265
   *                                     </p>
2266 2
   * @param int|null      $flags         [optional] <p>
2267 2
   *                                     Prior to PHP 6, this parameter is called
2268
   *                                     use_include_path and is a bool.
2269 2
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2270 1
   *                                     to trigger include path
2271 1
   *                                     search.
2272 2
   *                                     </p>
2273
   *                                     <p>
2274
   *                                     The value of flags can be any combination of
2275
   *                                     the following flags (with some restrictions), joined with the
2276 2
   *                                     binary OR (|)
2277
   *                                     operator.
2278
   *                                     </p>
2279
   *                                     <p>
2280 2
   *                                     <table>
2281 2
   *                                     Available flags
2282
   *                                     <tr valign="top">
2283 2
   *                                     <td>Flag</td>
2284
   *                                     <td>Description</td>
2285 2
   *                                     </tr>
2286 1
   *                                     <tr valign="top">
2287 1
   *                                     <td>
2288 1
   *                                     FILE_USE_INCLUDE_PATH
2289 1
   *                                     </td>
2290 1
   *                                     <td>
2291 1
   *                                     Search for filename in the include directory.
2292
   *                                     See include_path for more
2293 2
   *                                     information.
2294 2
   *                                     </td>
2295 2
   *                                     </tr>
2296 2
   *                                     <tr valign="top">
2297
   *                                     <td>
2298
   *                                     FILE_TEXT
2299 2
   *                                     </td>
2300
   *                                     <td>
2301
   *                                     As of PHP 6, the default encoding of the read
2302
   *                                     data is UTF-8. You can specify a different encoding by creating a
2303
   *                                     custom context or by changing the default using
2304
   *                                     stream_default_encoding. This flag cannot be
2305
   *                                     used with FILE_BINARY.
2306
   *                                     </td>
2307
   *                                     </tr>
2308
   *                                     <tr valign="top">
2309 1
   *                                     <td>
2310
   *                                     FILE_BINARY
2311 1
   *                                     </td>
2312
   *                                     <td>
2313
   *                                     With this flag, the file is read in binary mode. This is the default
2314
   *                                     setting and cannot be used with FILE_TEXT.
2315
   *                                     </td>
2316
   *                                     </tr>
2317
   *                                     </table>
2318
   *                                     </p>
2319
   * @param resource|null $context       [optional] <p>
2320
   *                                     A valid context resource created with
2321
   *                                     stream_context_create. If you don't need to use a
2322
   *                                     custom context, you can skip this parameter by &null;.
2323 7
   *                                     </p>
2324
   * @param int|null      $offset        [optional] <p>
2325 7
   *                                     The offset where the reading starts.
2326 7
   *                                     </p>
2327 2
   * @param int|null      $maxlen        [optional] <p>
2328
   *                                     Maximum length of data read. The default is to read until end
2329 1
   *                                     of file is reached.
2330 2
   *                                     </p>
2331 2
   * @param int           $timeout
2332 7
   *
2333 1
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2334 1
   *                                     non default utf-8 chars
2335 1
   *
2336 1
   * @return string The function returns the read data or false on failure.
2337 7
   */
2338 7
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2339
  {
2340
    // init
2341
    $timeout = (int)$timeout;
2342 7
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2343 7
2344 1
    if ($timeout && $context === null) {
2345 1
      $context = stream_context_create(
2346 7
          array(
2347
              'http' =>
2348 7
                  array(
2349 5
                      'timeout' => $timeout,
2350 5
                  ),
2351 4
          )
2352
      );
2353
    }
2354
2355 7
    if (is_int($maxlen)) {
2356
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2357
    } else {
2358
      $data = file_get_contents($filename, $flags, $context, $offset);
2359
    }
2360 7
2361 7
    // return false on error
2362 7
    if ($data === false) {
2363
      return false;
2364 7
    }
2365
2366
    if ($convertToUtf8 === true) {
2367
      self::checkForSupport();
2368
2369
      $data = self::encode('UTF-8', $data, false);
2370
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2371
    }
2372
2373
    // clean utf-8 string
2374
    return $data;
2375
  }
2376
2377
  /**
2378
   * Checks if a file starts with BOM (Byte Order Mark) character.
2379
   *
2380
   * @param    string $file_path Path to a valid file.
2381
   *
2382
   * @return   bool True if the file has BOM at the start, False otherwise.
2383
   */
2384
  public static function file_has_bom($file_path)
2385
  {
2386
    return self::string_has_bom(file_get_contents($file_path));
2387
  }
2388
2389
  /**
2390
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2391
   *
2392
   * @param mixed  $var
2393
   * @param int    $normalization_form
2394
   * @param string $leading_combining
2395
   *
2396
   * @return mixed
2397
   */
2398
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
2399
  {
2400
    switch (gettype($var)) {
2401 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2402
        foreach ($var as $k => $v) {
2403
          /** @noinspection AlterInForeachInspection */
2404
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2405
        }
2406
        break;
2407 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2408
        foreach ($var as $k => $v) {
2409
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
2410
        }
2411
        break;
2412
      case 'string':
2413
        if (false !== strpos($var, "\r")) {
2414
          // Workaround https://bugs.php.net/65732
2415
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2416
        }
2417 1
        if (preg_match('/[\x80-\xFF]/', $var)) {
2418
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2419 1
            $n = '-';
2420 1
          } else {
2421 1
            $n = \Normalizer::normalize($var, $normalization_form);
2422 1
2423
            if (isset($n[0])) {
2424
              $var = $n;
2425 1
            } else {
2426
              $var = self::encode('UTF-8', $var);
2427
            }
2428
2429
          }
2430
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2431
            // Prevent leading combining chars
2432
            // for NFC-safe concatenations.
2433
            $var = $leading_combining . $var;
2434
          }
2435
        }
2436
        break;
2437 1
    }
2438
2439 1
    return $var;
2440 1
  }
2441 1
2442 1
  /**
2443
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2444
   *
2445 1
   * @param int    $type
2446
   * @param string $var
2447
   * @param int    $filter
2448
   * @param mixed  $option
2449
   *
2450
   * @return mixed
2451
   */
2452 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2453
  {
2454
    if (4 > func_num_args()) {
2455
      $var = filter_input($type, $var, $filter);
2456
    } else {
2457 1
      $var = filter_input($type, $var, $filter, $option);
2458
    }
2459 1
2460
    return self::filter($var);
2461
  }
2462
2463
  /**
2464
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2465
   *
2466
   * @param int   $type
2467
   * @param mixed $definition
2468
   * @param bool  $add_empty
2469 8
   *
2470
   * @return mixed
2471 8
   */
2472 8 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2473
  {
2474 8
    if (2 > func_num_args()) {
2475
      $a = filter_input_array($type);
2476 8
    } else {
2477 2
      $a = filter_input_array($type, $definition, $add_empty);
2478
    }
2479
2480 8
    return self::filter($a);
2481 1
  }
2482 1
2483 1
  /**
2484
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2485 8
   *
2486
   * @param mixed $var
2487
   * @param int   $filter
2488
   * @param mixed $option
2489
   *
2490
   * @return mixed
2491
   */
2492 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2493
  {
2494
    if (3 > func_num_args()) {
2495 1
      $var = filter_var($var, $filter);
2496
    } else {
2497 1
      $var = filter_var($var, $filter, $option);
2498
    }
2499
2500
    return self::filter($var);
2501
  }
2502
2503
  /**
2504
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2505
   *
2506
   * @param array $data
2507 1
   * @param mixed $definition
2508 1
   * @param bool  $add_empty
2509 1
   *
2510 1
   * @return mixed
2511 1
   */
2512 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2513 1
  {
2514
    if (2 > func_num_args()) {
2515
      $a = filter_var_array($data);
2516
    } else {
2517
      $a = filter_var_array($data, $definition, $add_empty);
2518
    }
2519
2520
    return self::filter($a);
2521
  }
2522
2523 1
  /**
2524
   * Check if the number of unicode characters are not more than the specified integer.
2525 1
   *
2526
   * @param    string $str      The original string to be checked.
2527 1
   * @param    int    $box_size The size in number of chars to be checked against string.
2528 1
   *
2529
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2530
   */
2531 1
  public static function fits_inside($str, $box_size)
2532
  {
2533 1
    return (self::strlen($str) <= $box_size);
2534 1
  }
2535 1
2536 1
  /**
2537 1
   * Try to fix simple broken UTF-8 strings.
2538 1
   *
2539 1
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2540 1
   *
2541 1
   * @param string $str
2542 1
   *
2543 1
   * @return string
2544
   */
2545
  public static function fix_simple_utf8($str)
2546
  {
2547
    static $brokenUtf8ToUtf8Keys = null;
2548
    static $brokenUtf8ToUtf8Values = null;
2549
2550
    $str = (string)$str;
2551
2552
    if (!isset($str[0])) {
2553
      return '';
2554
    }
2555
2556
    if ($brokenUtf8ToUtf8Keys === null) {
2557
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2558
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2559
    }
2560
2561
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2562
  }
2563 1
2564 1
  /**
2565
   * Fix a double (or multiple) encoded UTF8 string.
2566
   *
2567
   * @param string|string[] $str You can use a string or an array of strings.
2568
   *
2569
   * @return mixed
2570
   */
2571
  public static function fix_utf8($str)
2572
  {
2573
    if (is_array($str)) {
2574
2575
      foreach ($str as $k => $v) {
2576
        /** @noinspection AlterInForeachInspection */
2577
        /** @noinspection OffsetOperationsInspection */
2578
        $str[$k] = self::fix_utf8($v);
2579
      }
2580
2581
      return $str;
2582
    }
2583
2584
    $last = '';
2585
    while ($last !== $str) {
2586
      $last = $str;
2587
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2587 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2588
    }
2589
2590
    return $str;
2591
  }
2592
2593
  /**
2594
   * Get character of a specific character.
2595
   *
2596
   * @param   string $char Character.
2597
   *
2598
   * @return  string 'RTL' or 'LTR'
2599
   */
2600
  public static function getCharDirection($char)
2601
  {
2602
    // init
2603
    self::checkForSupport();
2604
2605
    if (self::$support['intlChar'] === true) {
2606
      $tmpReturn = \IntlChar::charDirection($char);
2607
2608
      // from "IntlChar"-Class
2609
      $charDirection = array(
2610
          'RTL' => array(1, 13, 14, 15, 21),
2611
          'LTR' => array(0, 11, 12, 20),
2612
      );
2613
2614
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2615
        return 'LTR';
2616
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2617
        return 'RTL';
2618
      }
2619
    }
2620
2621
    $c = static::chr_to_decimal($char);
2622
2623 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2624
      return 'LTR';
2625 2
    }
2626 2
2627 2
    if (0x85e >= $c) {
2628
2629
      if (0x5be === $c ||
2630
          0x5c0 === $c ||
2631
          0x5c3 === $c ||
2632
          0x5c6 === $c ||
2633
          (0x5d0 <= $c && 0x5ea >= $c) ||
2634
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2635
          0x608 === $c ||
2636
          0x60b === $c ||
2637
          0x60d === $c ||
2638
          0x61b === $c ||
2639
          (0x61e <= $c && 0x64a >= $c) ||
2640 1
          (0x66d <= $c && 0x66f >= $c) ||
2641
          (0x671 <= $c && 0x6d5 >= $c) ||
2642 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2643 1
          (0x6ee <= $c && 0x6ef >= $c) ||
2644
          (0x6fa <= $c && 0x70d >= $c) ||
2645 1
          0x710 === $c ||
2646 1
          (0x712 <= $c && 0x72f >= $c) ||
2647
          (0x74d <= $c && 0x7a5 >= $c) ||
2648
          0x7b1 === $c ||
2649
          (0x7c0 <= $c && 0x7ea >= $c) ||
2650 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2651
          0x7fa === $c ||
2652 1
          (0x800 <= $c && 0x815 >= $c) ||
2653 1
          0x81a === $c ||
2654 1
          0x824 === $c ||
2655
          0x828 === $c ||
2656 1
          (0x830 <= $c && 0x83e >= $c) ||
2657 1
          (0x840 <= $c && 0x858 >= $c) ||
2658 1
          0x85e === $c
2659 1
      ) {
2660 1
        return 'RTL';
2661
      }
2662 1
2663
    } elseif (0x200f === $c) {
2664 1
2665 1
      return 'RTL';
2666
2667
    } elseif (0xfb1d <= $c) {
2668
2669 1
      if (0xfb1d === $c ||
2670 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2671
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2672 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2673
          0xfb3e === $c ||
2674 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2675 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2676 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2677
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2678 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2679
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2680
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2681
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2682
          (0xfe76 <= $c && 0xfefc >= $c) ||
2683
          (0x10800 <= $c && 0x10805 >= $c) ||
2684
          0x10808 === $c ||
2685
          (0x1080a <= $c && 0x10835 >= $c) ||
2686
          (0x10837 <= $c && 0x10838 >= $c) ||
2687
          0x1083c === $c ||
2688
          (0x1083f <= $c && 0x10855 >= $c) ||
2689
          (0x10857 <= $c && 0x1085f >= $c) ||
2690
          (0x10900 <= $c && 0x1091b >= $c) ||
2691
          (0x10920 <= $c && 0x10939 >= $c) ||
2692
          0x1093f === $c ||
2693
          0x10a00 === $c ||
2694
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2695
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2696
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2697
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2698
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2699
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2700
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2701
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2702
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2703
          (0x10b78 <= $c && 0x10b7f >= $c)
2704
      ) {
2705
        return 'RTL';
2706
      }
2707
    }
2708 1
2709
    return 'LTR';
2710 1
  }
2711 1
2712
  /**
2713 1
   * get data from "/data/*.ser"
2714 1
   *
2715 1
   * @param string $file
2716 1
   *
2717 1
   * @return bool|string|array|int false on error
2718 1
   */
2719
  protected static function getData($file)
2720
  {
2721
    $file = __DIR__ . '/data/' . $file . '.php';
2722
    if (file_exists($file)) {
2723
      /** @noinspection PhpIncludeInspection */
2724
      return require $file;
2725
    } else {
2726
      return false;
2727
    }
2728
  }
2729
2730
  /**
2731
   * Converts hexadecimal U+xxxx code point representation to integer.
2732
   *
2733
   * INFO: opposite to UTF8::int_to_hex()
2734
   *
2735
   * @param    string $str The hexadecimal code point representation.
2736
   *
2737
   * @return   int|false The code point, or false on failure.
2738
   */
2739
  public static function hex_to_int($str)
2740
  {
2741
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2742
      return intval($match[1], 16);
2743
    }
2744
2745
    return false;
2746
  }
2747
2748
  /**
2749
   * alias for "UTF8::html_entity_decode()"
2750
   *
2751
   * @see UTF8::html_entity_decode()
2752
   *
2753
   * @param string $str
2754
   * @param int    $flags
2755
   * @param string $encoding
2756
   *
2757
   * @return string
2758
   */
2759
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2760
  {
2761
    return self::html_entity_decode($str, $flags, $encoding);
2762
  }
2763
2764
  /**
2765
   * Converts a UTF-8 string to a series of HTML numbered entities.
2766
   *
2767
   * INFO: opposite to UTF8::html_decode()
2768
   *
2769
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2770
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2771
   * @param  string $encoding
2772
   *
2773
   * @return string HTML numbered entities.
2774
   */
2775
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2776
  {
2777
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2778
    if (function_exists('mb_encode_numericentity')) {
2779
2780
      $startCode = 0x00;
2781
      if ($keepAsciiChars === true) {
2782
        $startCode = 0x80;
2783
      }
2784
2785
      $encoding = self::normalizeEncoding($encoding);
2786
2787
      return mb_encode_numericentity(
2788
          $str,
2789
          array($startCode, 0xffff, 0, 0xffff,),
2790 15
          $encoding
2791
      );
2792 15
    }
2793
2794 15
    return implode(
2795 3
        array_map(
2796
            function ($data) use ($keepAsciiChars) {
2797
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
2798 15
            },
2799 4
            self::split($str)
2800
        )
2801
    );
2802 15
  }
2803 3
2804 3
  /**
2805 3
   * UTF-8 version of html_entity_decode()
2806
   *
2807
   * The reason we are not using html_entity_decode() by itself is because
2808 3
   * while it is not technically correct to leave out the semicolon
2809
   * at the end of an entity most browsers will still interpret the entity
2810
   * correctly. html_entity_decode() does not convert entities without
2811 15
   * semicolons, so we are left with our own little solution here. Bummer.
2812
   *
2813 15
   * Convert all HTML entities to their applicable characters
2814
   *
2815
   * INFO: opposite to UTF8::html_encode()
2816 15
   *
2817 15
   * @link http://php.net/manual/en/function.html-entity-decode.php
2818 15
   *
2819
   * @param string $str      <p>
2820 15
   *                         The input string.
2821
   *                         </p>
2822 15
   * @param int    $flags    [optional] <p>
2823
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2824 15
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2825
   *                         <table>
2826
   *                         Available <i>flags</i> constants
2827
   *                         <tr valign="top">
2828
   *                         <td>Constant Name</td>
2829
   *                         <td>Description</td>
2830
   *                         </tr>
2831
   *                         <tr valign="top">
2832
   *                         <td><b>ENT_COMPAT</b></td>
2833
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2834 12
   *                         </tr>
2835
   *                         <tr valign="top">
2836 12
   *                         <td><b>ENT_QUOTES</b></td>
2837
   *                         <td>Will convert both double and single quotes.</td>
2838 12
   *                         </tr>
2839
   *                         <tr valign="top">
2840 12
   *                         <td><b>ENT_NOQUOTES</b></td>
2841 5
   *                         <td>Will leave both double and single quotes unconverted.</td>
2842
   *                         </tr>
2843
   *                         <tr valign="top">
2844 11
   *                         <td><b>ENT_HTML401</b></td>
2845
   *                         <td>
2846
   *                         Handle code as HTML 4.01.
2847
   *                         </td>
2848
   *                         </tr>
2849
   *                         <tr valign="top">
2850
   *                         <td><b>ENT_XML1</b></td>
2851
   *                         <td>
2852
   *                         Handle code as XML 1.
2853
   *                         </td>
2854
   *                         </tr>
2855
   *                         <tr valign="top">
2856
   *                         <td><b>ENT_XHTML</b></td>
2857
   *                         <td>
2858
   *                         Handle code as XHTML.
2859
   *                         </td>
2860
   *                         </tr>
2861
   *                         <tr valign="top">
2862
   *                         <td><b>ENT_HTML5</b></td>
2863
   *                         <td>
2864
   *                         Handle code as HTML 5.
2865
   *                         </td>
2866
   *                         </tr>
2867
   *                         </table>
2868
   *                         </p>
2869
   * @param string $encoding [optional] <p>
2870
   *                         Encoding to use.
2871
   *                         </p>
2872
   *
2873
   * @return string the decoded string.
2874
   */
2875
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2876
  {
2877
    $str = (string)$str;
2878
2879
    if (!isset($str[0])) {
2880
      return '';
2881
    }
2882
2883
    if (strpos($str, '&') === false) {
2884
      return $str;
2885
    }
2886
2887
    self::checkForSupport();
2888
2889
    $encoding = self::normalizeEncoding($encoding);
2890
2891
    if ($flags === null) {
2892
      if (Bootup::is_php('5.4') === true) {
2893
        $flags = ENT_COMPAT | ENT_HTML5;
2894
      } else {
2895
        $flags = ENT_COMPAT;
2896
      }
2897
    }
2898
2899
    do {
2900
      $str_compare = $str;
2901
2902
      $str = preg_replace_callback("/&#\d{2,5};/", function ($matches) {
2903
        $returnTmp =  \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2904
2905
        if ($returnTmp !== '"' && $returnTmp !== "'") {
2906
          return $returnTmp;
2907
        } else {
2908
          return $matches[0];
2909
        }
2910
      }, $str);
2911
2912
      // decode numeric & UTF16 two byte entities
2913
      $str = html_entity_decode(
2914
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2915
          $flags,
2916
          $encoding
2917
      );
2918
2919
    } while ($str_compare !== $str);
2920
2921
    return $str;
2922
  }
2923
2924
  /**
2925
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2926
   *
2927
   * @link http://php.net/manual/en/function.htmlentities.php
2928
   *
2929
   * @param string $str           <p>
2930
   *                              The input string.
2931
   *                              </p>
2932
   * @param int    $flags         [optional] <p>
2933
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2934
   *                              invalid code unit sequences and the used document type. The default is
2935
   *                              ENT_COMPAT | ENT_HTML401.
2936
   *                              <table>
2937
   *                              Available <i>flags</i> constants
2938
   *                              <tr valign="top">
2939
   *                              <td>Constant Name</td>
2940
   *                              <td>Description</td>
2941
   *                              </tr>
2942
   *                              <tr valign="top">
2943
   *                              <td><b>ENT_COMPAT</b></td>
2944
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2945
   *                              </tr>
2946
   *                              <tr valign="top">
2947
   *                              <td><b>ENT_QUOTES</b></td>
2948
   *                              <td>Will convert both double and single quotes.</td>
2949
   *                              </tr>
2950 2
   *                              <tr valign="top">
2951
   *                              <td><b>ENT_NOQUOTES</b></td>
2952 2
   *                              <td>Will leave both double and single quotes unconverted.</td>
2953
   *                              </tr>
2954
   *                              <tr valign="top">
2955
   *                              <td><b>ENT_IGNORE</b></td>
2956
   *                              <td>
2957
   *                              Silently discard invalid code unit sequences instead of returning
2958
   *                              an empty string. Using this flag is discouraged as it
2959
   *                              may have security implications.
2960
   *                              </td>
2961
   *                              </tr>
2962
   *                              <tr valign="top">
2963
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2964
   *                              <td>
2965
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2966
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2967
   *                              </td>
2968
   *                              </tr>
2969
   *                              <tr valign="top">
2970
   *                              <td><b>ENT_DISALLOWED</b></td>
2971
   *                              <td>
2972
   *                              Replace invalid code points for the given document type with a
2973
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2974
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2975
   *                              instance, to ensure the well-formedness of XML documents with
2976
   *                              embedded external content.
2977
   *                              </td>
2978
   *                              </tr>
2979
   *                              <tr valign="top">
2980
   *                              <td><b>ENT_HTML401</b></td>
2981
   *                              <td>
2982
   *                              Handle code as HTML 4.01.
2983
   *                              </td>
2984
   *                              </tr>
2985
   *                              <tr valign="top">
2986
   *                              <td><b>ENT_XML1</b></td>
2987
   *                              <td>
2988
   *                              Handle code as XML 1.
2989
   *                              </td>
2990
   *                              </tr>
2991
   *                              <tr valign="top">
2992
   *                              <td><b>ENT_XHTML</b></td>
2993
   *                              <td>
2994
   *                              Handle code as XHTML.
2995
   *                              </td>
2996
   *                              </tr>
2997
   *                              <tr valign="top">
2998
   *                              <td><b>ENT_HTML5</b></td>
2999
   *                              <td>
3000
   *                              Handle code as HTML 5.
3001
   *                              </td>
3002
   *                              </tr>
3003
   *                              </table>
3004
   *                              </p>
3005
   * @param string $encoding      [optional] <p>
3006
   *                              Like <b>htmlspecialchars</b>,
3007
   *                              <b>htmlentities</b> takes an optional third argument
3008
   *                              <i>encoding</i> which defines encoding used in
3009
   *                              conversion.
3010
   *                              Although this argument is technically optional, you are highly
3011
   *                              encouraged to specify the correct value for your code.
3012
   *                              </p>
3013
   * @param bool   $double_encode [optional] <p>
3014
   *                              When <i>double_encode</i> is turned off PHP will not
3015
   *                              encode existing html entities. The default is to convert everything.
3016
   *                              </p>
3017
   *
3018
   *
3019
   * @return string the encoded string.
3020
   * </p>
3021
   * <p>
3022
   * If the input <i>string</i> contains an invalid code unit
3023
   * sequence within the given <i>encoding</i> an empty string
3024
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3025
   * <b>ENT_SUBSTITUTE</b> flags are set.
3026
   */
3027
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3028
  {
3029
    $encoding = self::normalizeEncoding($encoding);
3030
3031
    $str = htmlentities($str, $flags, $encoding, $double_encode);
3032
3033
    if ($encoding !== 'UTF-8') {
3034
      return $str;
3035
    }
3036
3037
    $byteLengths = self::chr_size_list($str);
3038
    $search = array();
3039
    $replacements = array();
3040
    foreach ($byteLengths as $counter => $byteLength) {
3041
      if ($byteLength >= 3) {
3042
        $char = self::access($str, $counter);
3043
3044
        if (!isset($replacements[$char])) {
3045
          $search[$char] = $char;
3046
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 3042 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3047
        }
3048
      }
3049
    }
3050
3051
    return str_replace($search, $replacements, $str);
3052
  }
3053
3054
  /**
3055
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3056
   *
3057
   * INFO: Take a look at "UTF8::htmlentities()"
3058
   *
3059
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3060
   *
3061
   * @param string $str           <p>
3062 1
   *                              The string being converted.
3063
   *                              </p>
3064 1
   * @param int    $flags         [optional] <p>
3065
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3066
   *                              invalid code unit sequences and the used document type. The default is
3067
   *                              ENT_COMPAT | ENT_HTML401.
3068
   *                              <table>
3069
   *                              Available <i>flags</i> constants
3070
   *                              <tr valign="top">
3071
   *                              <td>Constant Name</td>
3072 1
   *                              <td>Description</td>
3073
   *                              </tr>
3074 1
   *                              <tr valign="top">
3075
   *                              <td><b>ENT_COMPAT</b></td>
3076
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3077
   *                              </tr>
3078
   *                              <tr valign="top">
3079
   *                              <td><b>ENT_QUOTES</b></td>
3080
   *                              <td>Will convert both double and single quotes.</td>
3081
   *                              </tr>
3082
   *                              <tr valign="top">
3083
   *                              <td><b>ENT_NOQUOTES</b></td>
3084
   *                              <td>Will leave both double and single quotes unconverted.</td>
3085
   *                              </tr>
3086
   *                              <tr valign="top">
3087
   *                              <td><b>ENT_IGNORE</b></td>
3088
   *                              <td>
3089
   *                              Silently discard invalid code unit sequences instead of returning
3090
   *                              an empty string. Using this flag is discouraged as it
3091
   *                              may have security implications.
3092
   *                              </td>
3093
   *                              </tr>
3094
   *                              <tr valign="top">
3095
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3096
   *                              <td>
3097
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3098
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3099
   *                              </td>
3100
   *                              </tr>
3101
   *                              <tr valign="top">
3102
   *                              <td><b>ENT_DISALLOWED</b></td>
3103 1
   *                              <td>
3104
   *                              Replace invalid code points for the given document type with a
3105 1
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3106
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3107
   *                              instance, to ensure the well-formedness of XML documents with
3108
   *                              embedded external content.
3109
   *                              </td>
3110
   *                              </tr>
3111
   *                              <tr valign="top">
3112
   *                              <td><b>ENT_HTML401</b></td>
3113
   *                              <td>
3114
   *                              Handle code as HTML 4.01.
3115 1
   *                              </td>
3116
   *                              </tr>
3117 1
   *                              <tr valign="top">
3118
   *                              <td><b>ENT_XML1</b></td>
3119
   *                              <td>
3120
   *                              Handle code as XML 1.
3121
   *                              </td>
3122
   *                              </tr>
3123
   *                              <tr valign="top">
3124
   *                              <td><b>ENT_XHTML</b></td>
3125
   *                              <td>
3126
   *                              Handle code as XHTML.
3127 1
   *                              </td>
3128
   *                              </tr>
3129 1
   *                              <tr valign="top">
3130
   *                              <td><b>ENT_HTML5</b></td>
3131
   *                              <td>
3132
   *                              Handle code as HTML 5.
3133
   *                              </td>
3134
   *                              </tr>
3135
   *                              </table>
3136
   *                              </p>
3137
   * @param string $encoding      [optional] <p>
3138
   *                              Defines encoding used in conversion.
3139
   *                              </p>
3140
   *                              <p>
3141
   *                              For the purposes of this function, the encodings
3142
   *                              ISO-8859-1, ISO-8859-15,
3143
   *                              UTF-8, cp866,
3144
   *                              cp1251, cp1252, and
3145
   *                              KOI8-R are effectively equivalent, provided the
3146
   *                              <i>string</i> itself is valid for the encoding, as
3147
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3148
   *                              the same positions in all of these encodings.
3149
   *                              </p>
3150
   * @param bool   $double_encode [optional] <p>
3151
   *                              When <i>double_encode</i> is turned off PHP will not
3152
   *                              encode existing html entities, the default is to convert everything.
3153
   *                              </p>
3154
   *
3155
   * @return string The converted string.
3156
   * </p>
3157
   * <p>
3158
   * If the input <i>string</i> contains an invalid code unit
3159
   * sequence within the given <i>encoding</i> an empty string
3160
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3161
   * <b>ENT_SUBSTITUTE</b> flags are set.
3162
   */
3163
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3164
  {
3165
    $encoding = self::normalizeEncoding($encoding);
3166
3167
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3168
  }
3169
3170
  /**
3171
   * checks whether iconv is available on the server
3172
   *
3173
   * @return   bool True if available, False otherwise
3174
   */
3175
  public static function iconv_loaded()
3176
  {
3177
    return extension_loaded('iconv') ? true : false;
3178
  }
3179 16
3180
  /**
3181 16
   * Converts Integer to hexadecimal U+xxxx code point representation.
3182
   *
3183
   * INFO: opposite to UTF8::hex_to_int()
3184
   *
3185
   * @param    int    $int The integer to be converted to hexadecimal code point.
3186
   * @param    string $pfix
3187
   *
3188
   * @return   string The code point, or empty string on failure.
3189
   */
3190
  public static function int_to_hex($int, $pfix = 'U+')
3191
  {
3192 4
    if (ctype_digit((string)$int)) {
3193
      $hex = dechex((int)$int);
3194 4
3195
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3196
3197
      return $pfix . $hex;
3198
    }
3199
3200
    return '';
3201
  }
3202
3203
  /**
3204 1
   * checks whether intl-char is available on the server
3205
   *
3206 1
   * @return   bool True if available, False otherwise
3207
   */
3208 1
  public static function intlChar_loaded()
3209 1
  {
3210
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3211
  }
3212 1
3213 1
  /**
3214
   * checks whether intl is available on the server
3215 1
   *
3216
   * @return   bool True if available, False otherwise
3217
   */
3218
  public static function intl_loaded()
3219
  {
3220
    return extension_loaded('intl') ? true : false;
3221
  }
3222
3223
  /**
3224
   * alias for "UTF8::is_ascii()"
3225
   *
3226 4
   * @see UTF8::is_ascii()
3227
   *
3228
   * @param string $str
3229 4
   *
3230
   * @return boolean
3231
   */
3232 4
  public static function isAscii($str)
3233
  {
3234 4
    return self::is_ascii($str);
3235 4
  }
3236 4
3237 4
  /**
3238 3
   * alias for "UTF8::is_base64()"
3239
   *
3240 4
   * @see UTF8::is_base64()
3241
   *
3242
   * @param string $str
3243
   *
3244
   * @return bool
3245
   */
3246
  public static function isBase64($str)
3247
  {
3248
    return self::is_base64($str);
3249
  }
3250
3251
  /**
3252
   * alias for "UTF8::is_binary()"
3253
   *
3254
   * @see UTF8::is_binary()
3255
   *
3256
   * @param string $str
3257
   *
3258
   * @return bool
3259
   */
3260
  public static function isBinary($str)
3261
  {
3262
    return self::is_binary($str);
3263
  }
3264
3265
  /**
3266
   * alias for "UTF8::is_bom()"
3267
   *
3268
   * @see UTF8::is_bom()
3269
   *
3270
   * @param string $utf8_chr
3271
   *
3272
   * @return boolean
3273 2
   */
3274
  public static function isBom($utf8_chr)
3275 2
  {
3276
    return self::is_bom($utf8_chr);
3277
  }
3278
3279
  /**
3280
   * alias for "UTF8::is_html()"
3281
   *
3282
   * @see UTF8::is_html()
3283
   *
3284
   * @param string $str
3285 2
   *
3286
   * @return boolean
3287 2
   */
3288 2
  public static function isHtml($str)
3289
  {
3290 2
    return self::is_html($str);
3291 2
  }
3292 2
3293 2
  /**
3294 2
   * alias for "UTF8::is_json()"
3295 2
   *
3296 2
   * @see UTF8::is_json()
3297 2
   *
3298 2
   * @param string $str
3299 1
   *
3300 1
   * @return bool
3301 2
   */
3302 2
  public static function isJson($str)
3303 2
  {
3304
    return self::is_json($str);
3305 2
  }
3306 2
3307 2
  /**
3308 2
   * alias for "UTF8::is_utf16()"
3309 2
   *
3310 2
   * @see UTF8::is_utf16()
3311 2
   *
3312 2
   * @param string $str
3313 2
   *
3314 1
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3315 1
   */
3316 2
  public static function isUtf16($str)
3317 2
  {
3318 2
    return self::is_utf16($str);
3319
  }
3320 2
3321 1
  /**
3322 1
   * alias for "UTF8::is_utf32()"
3323
   *
3324 1
   * @see UTF8::is_utf32()
3325
   *
3326
   * @param string $str
3327
   *
3328 2
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3329
   */
3330 2
  public static function isUtf32($str)
3331
  {
3332
    return self::is_utf32($str);
3333
  }
3334
3335
  /**
3336
   * alias for "UTF8::is_utf8()"
3337
   *
3338
   * @see UTF8::is_utf8()
3339
   *
3340 2
   * @param string $str
3341
   * @param  bool  $strict
3342 2
   *
3343 2
   * @return bool
3344
   */
3345 2
  public static function isUtf8($str, $strict = false)
3346 2
  {
3347 2
    return self::is_utf8($str, $strict);
3348 2
  }
3349 2
3350 2
  /**
3351 2
   * Checks if a string is 7 bit ASCII.
3352 2
   *
3353 2
   * @param    string $str The string to check.
3354
   *
3355
   * @return   bool <strong>true</strong> if it is ASCII<br />
3356 2
   *                <strong>false</strong> otherwise
3357 2
   */
3358 2
  public static function is_ascii($str)
3359
  {
3360 2
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3361 2
  }
3362 2
3363 1
  /**
3364 1
   * Returns true if the string is base64 encoded, false otherwise.
3365 1
   *
3366 1
   * @param string $str
3367 1
   *
3368 1
   * @return bool Whether or not $str is base64 encoded
3369
   */
3370
  public static function is_base64($str)
3371 1
  {
3372 1
    $str = (string)$str;
3373 1
3374
    if (!isset($str[0])) {
3375 2
      return false;
3376
    }
3377
3378
    if (base64_encode(base64_decode($str, true)) === $str) {
3379
      return true;
3380
    } else {
3381
      return false;
3382
    }
3383 2
  }
3384
3385 2
  /**
3386
   * Check if the input is binary... (is look like a hack).
3387
   *
3388
   * @param mixed $input
3389
   *
3390
   * @return bool
3391
   */
3392
  public static function is_binary($input)
3393
  {
3394
3395
    $testLength = strlen($input);
3396
3397 34
    if (
3398
        preg_match('~^[01]+$~', $input)
3399 34
        ||
3400
        substr_count($input, "\x00") > 0
3401 34
        ||
3402 3
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3403
    ) {
3404
      return true;
3405 32
    } else {
3406
      return false;
3407
    }
3408
  }
3409
3410
  /**
3411
   * Check if the file is binary.
3412
   *
3413
   * @param string $file
3414
   *
3415 32
   * @return boolean
3416
   */
3417 32
  public static function is_binary_file($file)
3418 32
  {
3419 32
    try {
3420
      $fp = fopen($file, 'r');
3421
      $block = fread($fp, 512);
3422 32
      fclose($fp);
3423 32
    } catch (\Exception $e) {
3424 32
      $block = '';
3425
    }
3426
3427 32
    return self::is_binary($block);
3428
  }
3429 30
3430 32
  /**
3431
   * Checks if the given string is equal to any "Byte Order Mark".
3432 28
   *
3433 28
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3434 28
   *
3435 28
   * @param    string $str The input string.
3436 30
   *
3437
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3438 13
   */
3439 13
  public static function is_bom($str)
3440 13
  {
3441 13
    foreach (self::$bom as $bomString => $bomByteLength) {
3442 23
      if ($str === $bomString) {
3443
        return true;
3444 6
      }
3445 6
    }
3446 6
3447 6
    return false;
3448 12
  }
3449
3450
  /**
3451
   * Check if the string contains any html-tags <lall>.
3452
   *
3453
   * @param string $str
3454
   *
3455
   * @return boolean
3456
   */
3457 3
  public static function is_html($str)
3458 3
  {
3459 3
    $str = (string)$str;
3460 3
3461 7
    if (!isset($str[0])) {
3462
      return false;
3463 3
    }
3464 3
3465 3
    // init
3466 3
    $matches = array();
3467 3
3468
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3469
3470
    if (count($matches) == 0) {
3471 3
      return false;
3472
    } else {
3473 32
      return true;
3474
    }
3475
  }
3476 30
3477
  /**
3478 28
   * Try to check if "$str" is an json-string.
3479 28
   *
3480 28
   * @param string $str
3481 28
   *
3482
   * @return bool
3483
   */
3484
  public static function is_json($str)
3485
  {
3486 28
    $str = (string)$str;
3487
3488
    if (!isset($str[0])) {
3489
      return false;
3490
    }
3491
3492 28
    if (
3493 28
        is_object(self::json_decode($str))
3494 28
        &&
3495 28
        json_last_error() === JSON_ERROR_NONE
3496
    ) {
3497 28
      return true;
3498
    } else {
3499 28
      return false;
3500 28
    }
3501 5
  }
3502
3503
  /**
3504 28
   * Check if the string is UTF-16.
3505 28
   *
3506 28
   * @param string $str
3507 28
   *
3508 28
   * @return int|false false if is't not UTF-16, 1 for UTF-16LE, 2 for UTF-16BE.
3509
   */
3510 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3511
  {
3512
    $str = self::remove_bom($str);
3513 13
3514
    if (self::is_binary($str)) {
3515
      self::checkForSupport();
3516 32
3517
      $maybeUTF16LE = 0;
3518 14
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3519
      if ($test) {
3520
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3521
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3522
        if ($test3 === $test) {
3523
          $strChars = self::count_chars($str, true);
3524
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3525
            if (in_array($test3char, $strChars, true) === true) {
3526
              $maybeUTF16LE++;
3527
            }
3528
          }
3529
        }
3530
      }
3531
3532
      $maybeUTF16BE = 0;
3533
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3534
      if ($test) {
3535
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3536
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3537
        if ($test3 === $test) {
3538
          $strChars = self::count_chars($str, true);
3539
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3540
            if (in_array($test3char, $strChars, true) === true) {
3541
              $maybeUTF16BE++;
3542
            }
3543
          }
3544
        }
3545
      }
3546
3547
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3548
        if ($maybeUTF16LE > $maybeUTF16BE) {
3549
          return 1;
3550
        } else {
3551
          return 2;
3552
        }
3553
      }
3554
3555
    }
3556
3557
    return false;
3558 2
  }
3559
3560 2
  /**
3561
   * Check if the string is UTF-32.
3562 2
   *
3563 2
   * @param string $str
3564 2
   *
3565
   * @return int|false false if is't not UTF-16, 1 for UTF-32LE, 2 for UTF-32BE.
3566
   */
3567 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3568 2
  {
3569
    $str = self::remove_bom($str);
3570
3571
    if (self::is_binary($str)) {
3572
      self::checkForSupport();
3573
3574
      $maybeUTF32LE = 0;
3575
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3576
      if ($test) {
3577
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3578
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3579
        if ($test3 === $test) {
3580
          $strChars = self::count_chars($str, true);
3581
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3582
            if (in_array($test3char, $strChars, true) === true) {
3583
              $maybeUTF32LE++;
3584
            }
3585
          }
3586
        }
3587
      }
3588
3589
      $maybeUTF32BE = 0;
3590
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3591
      if ($test) {
3592
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3593
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3594
        if ($test3 === $test) {
3595
          $strChars = self::count_chars($str, true);
3596
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3597
            if (in_array($test3char, $strChars, true) === true) {
3598
              $maybeUTF32BE++;
3599
            }
3600
          }
3601
        }
3602
      }
3603
3604
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3605
        if ($maybeUTF32LE > $maybeUTF32BE) {
3606
          return 1;
3607 1
        } else {
3608
          return 2;
3609 1
        }
3610
      }
3611 1
3612
    }
3613
3614 1
    return false;
3615
  }
3616
3617 1
  /**
3618
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3619
   *
3620
   * @see    http://hsivonen.iki.fi/php-utf8/
3621
   *
3622
   * @param  string $str    The string to be checked.
3623
   * @param  bool   $strict Check also if the string is not UTF-16 or UTF-32.
3624
   *
3625
   * @return bool
3626
   */
3627 6
  public static function is_utf8($str, $strict = false)
3628
  {
3629 6
    $str = (string)$str;
3630
3631
    if (!isset($str[0])) {
3632
      return true;
3633
    }
3634
3635
    if ($strict === true) {
3636
      if (self::is_utf16($str) !== false) {
3637
        return false;
3638
      }
3639
3640
      if (self::is_utf32($str) !== false) {
3641
        return false;
3642 24
      }
3643
    }
3644 24
3645
    if (self::pcre_utf8_support() !== true) {
3646 24
3647 2
      // If even just the first character can be matched, when the /u
3648
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3649
      // invalid, nothing at all will match, even if the string contains
3650 23
      // some valid sequences
3651
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3652 23
3653
    } else {
3654
3655
      $mState = 0; // cached expected number of octets after the current octet
3656
      // until the beginning of the next UTF8 character sequence
3657
      $mUcs4 = 0; // cached Unicode character
3658
      $mBytes = 1; // cached expected number of octets in the current sequence
3659
      $len = strlen($str);
3660
3661
      /** @noinspection ForeachInvariantsInspection */
3662 1
      for ($i = 0; $i < $len; $i++) {
3663
        $in = ord($str[$i]);
3664 1
        if ($mState === 0) {
3665
          // When mState is zero we expect either a US-ASCII character or a
3666
          // multi-octet sequence.
3667
          if (0 === (0x80 & $in)) {
3668 1
            // US-ASCII, pass straight through.
3669
            $mBytes = 1;
3670 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3671
            // First octet of 2 octet sequence.
3672
            $mUcs4 = $in;
3673
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3674
            $mState = 1;
3675
            $mBytes = 2;
3676
          } elseif (0xE0 === (0xF0 & $in)) {
3677
            // First octet of 3 octet sequence.
3678
            $mUcs4 = $in;
3679 1
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3680
            $mState = 2;
3681 1
            $mBytes = 3;
3682 1 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3683 1
            // First octet of 4 octet sequence.
3684
            $mUcs4 = $in;
3685 1
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3686
            $mState = 3;
3687
            $mBytes = 4;
3688
          } elseif (0xF8 === (0xFC & $in)) {
3689
            /* First octet of 5 octet sequence.
3690
            *
3691
            * This is illegal because the encoded codepoint must be either
3692
            * (a) not the shortest form or
3693
            * (b) outside the Unicode range of 0-0x10FFFF.
3694 2
            * Rather than trying to resynchronize, we will carry on until the end
3695
            * of the sequence and let the later error handling code catch it.
3696 2
            */
3697
            $mUcs4 = $in;
3698 2
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3699 2
            $mState = 4;
3700 2
            $mBytes = 5;
3701 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3702 2
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3703
            $mUcs4 = $in;
3704
            $mUcs4 = ($mUcs4 & 1) << 30;
3705
            $mState = 5;
3706
            $mBytes = 6;
3707
          } else {
3708
            /* Current octet is neither in the US-ASCII range nor a legal first
3709
             * octet of a multi-octet sequence.
3710
             */
3711
            return false;
3712 1
          }
3713
        } else {
3714 1
          // When mState is non-zero, we expect a continuation of the multi-octet
3715
          // sequence
3716
          if (0x80 === (0xC0 & $in)) {
3717
            // Legal continuation.
3718 1
            $shift = ($mState - 1) * 6;
3719
            $tmp = $in;
3720
            $tmp = ($tmp & 0x0000003F) << $shift;
3721
            $mUcs4 |= $tmp;
3722
            /**
3723
             * End of the multi-octet sequence. mUcs4 now contains the final
3724
             * Unicode code point to be output
3725
             */
3726
            if (0 === --$mState) {
3727
              /*
3728 13
              * Check for illegal sequences and code points.
3729
              */
3730 13
              // From Unicode 3.1, non-shortest form is illegal
3731
              if (
3732 13
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3733
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3734
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3735 13
                  (4 < $mBytes) ||
3736 13
                  // From Unicode 3.2, surrogate characters are illegal.
3737 13
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3738 13
                  // Code points outside the Unicode range are illegal.
3739 13
                  ($mUcs4 > 0x10FFFF)
3740 13
              ) {
3741 13
                return false;
3742 13
              }
3743 13
              // initialize UTF8 cache
3744 13
              $mState = 0;
3745 13
              $mUcs4 = 0;
3746 13
              $mBytes = 1;
3747 13
            }
3748 13
          } else {
3749
            /**
3750 13
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3751 2
             * Incomplete multi-octet sequence.
3752
             */
3753
            return false;
3754 13
          }
3755
        }
3756
      }
3757
3758
      return true;
3759
    }
3760
  }
3761
3762
  /**
3763
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3764 2
   * Decodes a JSON string
3765
   *
3766 2
   * @link http://php.net/manual/en/function.json-decode.php
3767 2
   *
3768
   * @param string $json    <p>
3769 2
   *                        The <i>json</i> string being decoded.
3770 1
   *                        </p>
3771 1
   *                        <p>
3772 1
   *                        This function only works with UTF-8 encoded strings.
3773
   *                        </p>
3774 2
   *                        <p>PHP implements a superset of
3775
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3776
   *                        only supports these values when they are nested inside an array or an object.
3777
   *                        </p>
3778
   * @param bool   $assoc   [optional] <p>
3779
   *                        When <b>TRUE</b>, returned objects will be converted into
3780
   *                        associative arrays.
3781
   *                        </p>
3782
   * @param int    $depth   [optional] <p>
3783
   *                        User specified recursion depth.
3784
   *                        </p>
3785
   * @param int    $options [optional] <p>
3786 8
   *                        Bitmask of JSON decode options. Currently only
3787
   *                        <b>JSON_BIGINT_AS_STRING</b>
3788 8
   *                        is supported (default is to cast large integers as floats)
3789 8
   *                        </p>
3790
   *
3791 8
   * @return mixed the value encoded in <i>json</i> in appropriate
3792
   * PHP type. Values true, false and
3793 8
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3794
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3795 2
   * <i>json</i> cannot be decoded or if the encoded
3796
   * data is deeper than the recursion limit.
3797 2
   */
3798
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3799 1
  {
3800 1
    $json = self::filter($json);
3801
3802 2
    if (Bootup::is_php('5.4') === true) {
3803 2
      $json = json_decode($json, $assoc, $depth, $options);
3804
    } else {
3805 8
      $json = json_decode($json, $assoc, $depth);
3806 8
    }
3807 1
3808 1
    return $json;
3809
  }
3810 8
3811 8
  /**
3812
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3813 8
   * Returns the JSON representation of a value.
3814
   *
3815
   * @link http://php.net/manual/en/function.json-encode.php
3816
   *
3817
   * @param mixed $value   <p>
3818
   *                       The <i>value</i> being encoded. Can be any type except
3819
   *                       a resource.
3820
   *                       </p>
3821
   *                       <p>
3822
   *                       All string data must be UTF-8 encoded.
3823
   *                       </p>
3824
   *                       <p>PHP implements a superset of
3825
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3826 1
   *                       only supports these values when they are nested inside an array or an object.
3827
   *                       </p>
3828 1
   * @param int   $options [optional] <p>
3829 1
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3830
   *                       <b>JSON_HEX_TAG</b>,
3831
   *                       <b>JSON_HEX_AMP</b>,
3832
   *                       <b>JSON_HEX_APOS</b>,
3833
   *                       <b>JSON_NUMERIC_CHECK</b>,
3834
   *                       <b>JSON_PRETTY_PRINT</b>,
3835
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3836
   *                       <b>JSON_FORCE_OBJECT</b>,
3837
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3838
   *                       constants is described on
3839
   *                       the JSON constants page.
3840
   *                       </p>
3841
   * @param int   $depth   [optional] <p>
3842 1
   *                       Set the maximum depth. Must be greater than zero.
3843
   *                       </p>
3844 1
   *
3845
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3846
   */
3847
  public static function json_encode($value, $options = 0, $depth = 512)
3848
  {
3849
    $value = self::filter($value);
3850
3851
    if (Bootup::is_php('5.5')) {
3852
      $json = json_encode($value, $options, $depth);
3853
    } else {
3854
      $json = json_encode($value, $options);
3855 15
    }
3856
3857 15
    return $json;
3858 2
  }
3859
3860
  /**
3861 14
   * Makes string's first char lowercase.
3862 14
   *
3863
   * @param    string $str The input string
3864 14
   *
3865 2
   * @return   string The resulting string
3866
   */
3867
  public static function lcfirst($str)
3868 13
  {
3869 7
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3870
  }
3871
3872 12
  /**
3873 8
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3874
   *
3875
   * @param  string $str   The string to be trimmed
3876 10
   * @param  string $chars Optional characters to be stripped
3877
   *
3878
   * @return string The string with unwanted characters stripped from the left
3879
   */
3880 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3881
  {
3882
    $str = (string)$str;
3883
3884
    if (!isset($str[0])) {
3885
      return '';
3886
    }
3887
3888
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3889
    if ($chars === INF || !$chars) {
3890
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3891
    }
3892
3893
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3894
3895
    return preg_replace("/^{$chars}+/u", '', $str);
3896
  }
3897 1
3898
  /**
3899
   * Returns the UTF-8 character with the maximum code point in the given data.
3900 1
   *
3901
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3902 1
   *
3903
   * @return   string The character with the highest code point than others.
3904 1
   */
3905 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3906
  {
3907
    if (is_array($arg)) {
3908
      $arg = implode($arg);
3909
    }
3910
3911
    return self::chr(max(self::codepoints($arg)));
3912 33
  }
3913
3914
  /**
3915 33
   * Calculates and returns the maximum number of bytes taken by any
3916
   * UTF-8 encoded character in the given string.
3917
   *
3918
   * @param  string $str The original Unicode string.
3919
   *
3920
   * @return int Max byte lengths of the given chars.
3921
   */
3922
  public static function max_chr_width($str)
3923
  {
3924
    $bytes = self::chr_size_list($str);
3925
    if (count($bytes) > 0) {
3926 1
      return (int)max($bytes);
3927
    } else {
3928 1
      return 0;
3929 1
    }
3930
  }
3931
3932 1
  /**
3933
   * checks whether mbstring is available on the server
3934 1
   *
3935
   * @return   bool True if available, False otherwise
3936
   */
3937 1
  public static function mbstring_loaded()
3938
  {
3939
    $return = extension_loaded('mbstring');
3940 1
3941
    if ($return === true) {
3942
      \mb_internal_encoding('UTF-8');
3943
    }
3944 1
3945
    return $return;
3946 1
  }
3947
3948
  /**
3949 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3950
   *
3951
   * @param  mixed $arg A UTF-8 encoded string or an array of such strings.
3952 1
   *
3953
   * @return string The character with the lowest code point than others.
3954
   */
3955 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3956 1
  {
3957
    if (is_array($arg)) {
3958 1
      $arg = implode($arg);
3959 1
    }
3960 1
3961 1
    return self::chr(min(self::codepoints($arg)));
3962 1
  }
3963
3964
  /**
3965
   * alias for "UTF8::normalize_encoding()"
3966
   *
3967
   * @see UTF8::normalize_encoding()
3968
   *
3969
   * @param string $encoding
3970
   *
3971
   * @return string
3972
   */
3973
  public static function normalizeEncoding($encoding)
3974
  {
3975 7
    return self::normalize_encoding($encoding);
3976
  }
3977 7
3978
  /**
3979
   * Normalize the encoding-"name" input.
3980 7
   *
3981 2
   * @param  string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3982 2
   *
3983 7
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3984
   */
3985 7
  public static function normalize_encoding($encoding)
3986
  {
3987
    static $staticNormalizeEncodingCache = array();
3988 3
3989 1
    if (!$encoding) {
3990 1
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3991
    }
3992
3993
    if ('UTF-8' === $encoding) {
3994 3
      return $encoding;
3995 1
    }
3996 1
3997 3
    if (in_array($encoding, self::$iconvEncoding, true)) {
3998
      return $encoding;
3999 7
    }
4000
4001
    if (isset($staticNormalizeEncodingCache[$encoding])) {
4002 3
      return $staticNormalizeEncodingCache[$encoding];
4003 1
    }
4004 1
4005
    $encodingOrig = $encoding;
4006
    $encoding = strtoupper($encoding);
4007
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4008 3
4009 1
    $equivalences = array(
4010 1
        'ISO88591'    => 'ISO-8859-1',
4011 3
        'ISO8859'     => 'ISO-8859-1',
4012
        'ISO'         => 'ISO-8859-1',
4013 7
        'LATIN1'      => 'ISO-8859-1',
4014
        'LATIN'       => 'ISO-8859-1',
4015
        'UTF16'       => 'UTF-16',
4016
        'UTF32'       => 'UTF-32',
4017
        'UTF8'        => 'UTF-8',
4018
        'UTF'         => 'UTF-8',
4019
        'UTF7'        => 'UTF-7',
4020
        'WIN1252'     => 'ISO-8859-1',
4021
        'WINDOWS1252' => 'ISO-8859-1',
4022
        '8BIT'        => 'CP850',
4023
        'BINARY'      => 'CP850',
4024 1
    );
4025
4026 1
    if (!empty($equivalences[$encodingUpperHelper])) {
4027 1
      $encoding = $equivalences[$encodingUpperHelper];
4028 1
    }
4029
4030 1
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
4031 1
4032 1
    return $encoding;
4033 1
  }
4034 1
4035
  /**
4036 1
   * Normalize some MS Word special characters.
4037
   *
4038
   * @param string $str The string to be normalized.
4039
   *
4040
   * @return string
4041
   */
4042
  public static function normalize_msword($str)
4043
  {
4044
    static $utf8MSWordKeys = null;
4045
    static $utf8MSWordValues = null;
4046
4047
    if ($utf8MSWordKeys === null) {
4048
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
4049
      $utf8MSWordValues = array_values(self::$utf8MSWord);
4050
    }
4051
4052 36
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
4053
  }
4054
4055 36
  /**
4056
   * Normalize the whitespace.
4057
   *
4058
   * @param string $str                     The string to be normalized.
4059 36
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
4060 36
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
4061 36
   *
4062 36
   * @return string
4063
   */
4064 36
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
4065
  {
4066
    static $whitespaces = array();
4067 36
    static $bidiUniCodeControls = null;
4068 36
4069
    $cacheKey = (int)$keepNonBreakingSpace;
4070 36
4071
    if (!isset($whitespaces[$cacheKey])) {
4072
4073
      $whitespaces[$cacheKey] = self::$whitespaceTable;
4074
4075
      if ($keepNonBreakingSpace === true) {
4076
        /** @noinspection OffsetOperationsInspection */
4077
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
4078
      }
4079
4080
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
4081 36
    }
4082
4083 36
    if ($keepBidiUnicodeControls === false) {
4084
      if ($bidiUniCodeControls === null) {
4085 36
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
4086 36
      }
4087 36
4088
      $str = str_replace($bidiUniCodeControls, '', $str);
4089 36
    }
4090 36
4091 36
    return str_replace($whitespaces[$cacheKey], ' ', $str);
4092
  }
4093 36
4094
  /**
4095
   * Format a number with grouped thousands.
4096
   *
4097
   * @param float  $number
4098
   * @param int    $decimals
4099
   * @param string $dec_point
4100
   * @param string $thousands_sep
4101
   *
4102
   * @deprecated
4103
   *
4104
   * @return string
4105
   */
4106 23
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4107
  {
4108 23
    $thousands_sep = (string)$thousands_sep;
4109
    $dec_point = (string)$dec_point;
4110 23
4111 5
    if (
4112
        isset($thousands_sep[1], $dec_point[1])
4113
        &&
4114 19
        Bootup::is_php('5.4') === true
4115
    ) {
4116 19
      return str_replace(
4117
          array(
4118
              '.',
4119
              ',',
4120
          ),
4121
          array(
4122
              $dec_point,
4123
              $thousands_sep,
4124
          ),
4125
          number_format($number, $decimals, '.', ',')
4126
      );
4127 40
    }
4128
4129 40
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4130
  }
4131 40
4132
  /**
4133 40
   * Calculates Unicode code point of the given UTF-8 encoded character.
4134 30
   *
4135
   * INFO: opposite to UTF8::chr()
4136
   *
4137 16
   * @param  string $chr The character of which to calculate code point.
4138
   *
4139 16
   * @return int Unicode code point of the given character,<br />
4140 15
   *         0 on invalid UTF-8 byte sequence.
4141
   */
4142 15
  public static function ord($chr)
4143 14
  {
4144 15
    if (!$chr && $chr !== '0') {
4145 1
      return 0;
4146 1
    }
4147
4148
    // init
4149 16
    self::checkForSupport();
4150
4151 16
    if (self::$support['intlChar'] === true) {
4152
      $tmpReturn = \IntlChar::ord($chr);
4153 16
      if ($tmpReturn) {
4154 16
        return $tmpReturn;
4155 16
      }
4156
    }
4157
4158
    $chr = unpack('C*', substr($chr, 0, 4));
4159 16
    $a = $chr ? $chr[1] : 0;
4160
4161 16
    if (0xF0 <= $a && isset($chr[4])) {
4162
      return (($a - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
4163
    }
4164
4165
    if (0xE0 <= $a && isset($chr[3])) {
4166
      return (($a - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
4167
    }
4168
4169
    if (0xC0 <= $a && isset($chr[2])) {
4170
      return (($a - 0xC0) << 6) + $chr[2] - 0x80;
4171
    }
4172
4173
    return $a;
4174
  }
4175
4176
  /**
4177
   * Parses the string into an array (into the the second parameter).
4178
   *
4179
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4180
   *          if the second parameter is not set!
4181 2
   *
4182
   * @link http://php.net/manual/en/function.parse-str.php
4183 2
   *
4184 1
   * @param string $str     <p>
4185
   *                        The input string.
4186
   *                        </p>
4187 2
   * @param array  $result  <p>
4188
   *                        The result will be returned into this reference parameter.
4189
   *                        </p>
4190
   *
4191
   * @return bool will return false if php can't parse the string and we haven't any $result
4192
   */
4193
  public static function parse_str($str, &$result)
4194
  {
4195
    // init
4196
    self::checkForSupport();
4197
4198
    $str = self::clean($str);
4199 25
4200
    $return = \mb_parse_str($str, $result);
4201 25
    if ($return === false || empty($result)) {
4202
      return false;
4203 25
    }
4204 5
4205
    return true;
4206
  }
4207
4208 24
  /**
4209 24
   * checks if \u modifier is available that enables Unicode support in PCRE.
4210 24
   *
4211
   * @return   bool True if support is available, false otherwise
4212 24
   */
4213
  public static function pcre_utf8_support()
4214 24
  {
4215
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4216
    return (bool)@preg_match('//u', '');
4217
  }
4218 24
4219 24
  /**
4220 24
   * Create an array containing a range of UTF-8 characters.
4221 24
   *
4222 24
   * @param  mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4223
   * @param  mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4224 24
   *
4225
   * @return array
4226
   */
4227
  public static function range($var1, $var2)
4228
  {
4229
    if (!$var1 || !$var2) {
4230
      return array();
4231
    }
4232
4233 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4234
      $start = (int)$var1;
4235
    } elseif (ctype_xdigit($var1)) {
4236
      $start = (int)self::hex_to_int($var1);
4237
    } else {
4238
      $start = self::ord($var1);
4239
    }
4240
4241
    if (!$start) {
4242
      return array();
4243
    }
4244
4245 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4246
      $end = (int)$var2;
4247
    } elseif (ctype_xdigit($var2)) {
4248
      $end = (int)self::hex_to_int($var2);
4249
    } else {
4250
      $end = self::ord($var2);
4251
    }
4252
4253
    if (!$end) {
4254
      return array();
4255
    }
4256 24
4257 5
    return array_map(
4258
        array(
4259 5
            '\\voku\\helper\\UTF8',
4260 5
            'chr',
4261
        ),
4262 24
        range($start, $end)
4263
    );
4264
  }
4265
4266 24
  /**
4267
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4268
   *
4269
   * @param string $str
4270
   *
4271
   * @return string
4272
   */
4273
  public static function remove_bom($str)
4274
  {
4275
    foreach (self::$bom as $bomString => $bomByteLength) {
4276
      if (0 === strpos($str, $bomString)) {
4277 3
        $str = substr($str, $bomByteLength);
4278
      }
4279
    }
4280
4281
    return $str;
4282
  }
4283
4284 3
  /**
4285 2
   * alias for "UTF8::remove_bom()"
4286 1
   *
4287 2
   * @see UTF8::remove_bom()
4288 1
   *
4289 2
   * @param string $str
4290
   *
4291 2
   * @return string
4292
   */
4293
  public static function removeBOM($str)
4294 2
  {
4295
    return self::remove_bom($str);
4296
  }
4297
4298
  /**
4299
   * Removes duplicate occurrences of a string in another string.
4300 3
   *
4301 1
   * @param    string       $str  The base string
4302
   * @param    string|array $what String to search for in the base string
4303
   *
4304
   * @return   string The result string with removed duplicates
4305
   */
4306
  public static function remove_duplicates($str, $what = ' ')
4307
  {
4308
    if (is_string($what)) {
4309
      $what = array($what);
4310 3
    }
4311 3
4312 3
    if (is_array($what)) {
4313 3
      foreach ($what as $item) {
4314 3
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4315 3
      }
4316 3
    }
4317 3
4318
    return $str;
4319
  }
4320 3
4321 3
  /**
4322 3
   * Remove invisible characters from a string.
4323 3
   *
4324
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4325
   *
4326
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4327
   *
4328
   * @param  string $str
4329
   * @param  bool   $url_encoded
4330
   * @param  string $replacement
4331
   *
4332
   * @return  string
4333
   */
4334
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4335
  {
4336
    // init
4337
    $non_displayables = array();
4338
4339
    // every control character except newline (dec 10),
4340
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4341
    if ($url_encoded) {
4342
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4343
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4344
    }
4345
4346
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4347
4348
    do {
4349
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4350
    } while ($count !== 0);
4351
4352
    return $str;
4353 13
  }
4354
4355 13
  /**
4356
   * Replace the diamond question mark (�) with the replacement.
4357
   *
4358 13
   * @param string $str
4359 13
   * @param string $unknown
4360 1
   *
4361 1
   * @return string
4362 12
   */
4363
  public static function replace_diamond_question_mark($str, $unknown = '?')
4364 13
  {
4365
    return str_replace(
4366 13
        array(
4367 13
            "\xEF\xBF\xBD",
4368
            '�',
4369 13
        ),
4370
        array(
4371
            $unknown,
4372
            $unknown,
4373
        ),
4374
        $str
4375
    );
4376
  }
4377
4378
  /**
4379
   * Strip whitespace or other characters from end of a UTF-8 string.
4380
   *
4381 1
   * @param    string $str   The string to be trimmed
4382
   * @param    string $chars Optional characters to be stripped
4383 1
   *
4384
   * @return   string The string with unwanted characters stripped from the right
4385
   */
4386 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4387 1
  {
4388
    $str = (string)$str;
4389 1
4390
    if (!isset($str[0])) {
4391
      return '';
4392
    }
4393 1
4394 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4395
    if ($chars === INF || !$chars) {
4396
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4397 1
    }
4398 1
4399 1
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4400 1
4401
    return preg_replace("/{$chars}+$/u", '', $str);
4402 1
  }
4403
4404
  /**
4405 1
   * rxClass
4406
   *
4407
   * @param string $s
4408 1
   * @param string $class
4409
   *
4410
   * @return string
4411
   */
4412
  protected static function rxClass($s, $class = '')
4413
  {
4414
    static $rxClassCache = array();
4415
4416
    $cacheKey = $s . $class;
4417
4418
    if (isset($rxClassCache[$cacheKey])) {
4419
      return $rxClassCache[$cacheKey];
4420
    }
4421 2
4422
    $class = array($class);
4423 2
4424
    /** @noinspection SuspiciousLoopInspection */
4425 2
    foreach (self::str_split($s) as $s) {
4426 2
      if ('-' === $s) {
4427
        $class[0] = '-' . $class[0];
4428 2
      } elseif (!isset($s[2])) {
4429
        $class[0] .= preg_quote($s, '/');
4430
      } elseif (1 === self::strlen($s)) {
4431 2
        $class[0] .= $s;
4432 2
      } else {
4433 2
        $class[] = $s;
4434 2
      }
4435 2
    }
4436
4437 2
    if ($class[0]) {
4438 2
      $class[0] = '[' . $class[0] . ']';
4439 2
    }
4440 2
4441 2
    if (1 === count($class)) {
4442 2
      $return = $class[0];
4443
    } else {
4444 2
      $return = '(?:' . implode('|', $class) . ')';
4445 2
    }
4446 2
4447 2
    $rxClassCache[$cacheKey] = $return;
4448 2
4449 2
    return $return;
4450
  }
4451 2
4452
  /**
4453
   * Echo native UTF8-Support libs, e.g. for debugging.
4454 2
   */
4455
  public static function showSupport()
4456
  {
4457
    foreach (self::$support as $utf8Support) {
4458
      echo $utf8Support . "\n<br>";
4459
    }
4460
  }
4461
4462
  /**
4463
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4464
   *
4465
   * @param    string $char           The Unicode character to be encoded as numbered entity.
4466
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4467
   *
4468
   * @return   string The HTML numbered entity.
4469
   */
4470
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
4471
  {
4472
    if (!$char) {
4473
      return '';
4474
    }
4475 1
4476
    if (
4477 1
        $keepAsciiChars === true
4478
        &&
4479 1
        self::isAscii($char) === true
4480
    ) {
4481
      return $char;
4482
    }
4483
4484
    return '&#' . self::ord($char) . ';';
4485
  }
4486
4487
  /**
4488
   * Convert a string to an array of Unicode characters.
4489
   *
4490
   * @param    string  $str       The string to split into array.
4491
   * @param    int     $length    Max character length of each array element.
4492
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4493
   *
4494
   * @return   array An array containing chunks of the string.
4495
   */
4496
  public static function split($str, $length = 1, $cleanUtf8 = false)
4497
  {
4498
    $str = (string)$str;
4499
4500
    if (!isset($str[0])) {
4501
      return array();
4502
    }
4503
4504
    // init
4505
    self::checkForSupport();
4506
    $str = (string)$str;
4507
    $ret = array();
4508
4509
    if (self::$support['pcre_utf8'] === true) {
4510
4511
      if ($cleanUtf8 === true) {
4512 12
        $str = self::clean($str);
4513
      }
4514 12
4515
      preg_match_all('/./us', $str, $retArray);
4516
      if (isset($retArray[0])) {
4517
        $ret = $retArray[0];
4518
      }
4519
      unset($retArray);
4520
4521
    } else {
4522
4523
      // fallback
4524
4525
      $len = strlen($str);
4526
4527
      /** @noinspection ForeachInvariantsInspection */
4528
      for ($i = 0; $i < $len; $i++) {
4529
        if (($str[$i] & "\x80") === "\x00") {
4530
          $ret[] = $str[$i];
4531
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4532
          if (($str[$i + 1] & "\xC0") === "\x80") {
4533
            $ret[] = $str[$i] . $str[$i + 1];
4534
4535
            $i++;
4536
          }
4537 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4538
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4539
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4540
4541
            $i += 2;
4542 1
          }
4543
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4544 1 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4545
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4546 1
4547 1
            $i += 3;
4548 1
          }
4549
        }
4550 1
      }
4551 1
    }
4552 1
4553 1
    if ($length > 1) {
4554
      $ret = array_chunk($ret, $length);
4555
4556 1
      $ret = array_map('implode', $ret);
4557
    }
4558
4559
    /** @noinspection OffsetOperationsInspection */
4560
    if (isset($ret[0]) && $ret[0] === '') {
4561
      return array();
4562
    }
4563
4564
    return $ret;
4565
  }
4566
4567 17
  /**
4568
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4569
   *
4570 17
   * @param string $str
4571
   *
4572 17
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4573
   *                      otherwise it will return false.
4574
   */
4575
  public static function str_detect_encoding($str)
4576
  {
4577
4578 17
    //
4579 17
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4580 17
    //
4581 17
4582 17
    if (self::is_binary($str)) {
4583 16
      if (self::is_utf16($str) === 1) {
4584 16
        return 'UTF-16LE';
4585 17
      } elseif (self::is_utf16($str) === 2) {
4586
        return 'UTF-16BE';
4587
      } elseif (self::is_utf32($str) === 1) {
4588
        return 'UTF-32LE';
4589
      } elseif (self::is_utf32($str) === 2) {
4590 17
        return 'UTF-32BE';
4591 17
      }
4592
    }
4593
4594 1
    //
4595 1
    // 2.) simple check for ASCII chars
4596
    //
4597
4598 1
    if (self::is_ascii($str) === true) {
4599 1
      return 'ASCII';
4600 1
    }
4601 1
4602 1
    //
4603
    // 3.) simple check for UTF-8 chars
4604 1
    //
4605
4606 1
    if (self::is_utf8($str) === true) {
4607
      return 'UTF-8';
4608
    }
4609
4610
    //
4611
    // 4.) check via "\mb_detect_encoding()"
4612
    //
4613
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4614
4615
    $detectOrder = array(
4616 1
        'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5',
4617
        'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10',
4618 1
        'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
4619
        'WINDOWS-1251', 'WINDOWS-1252', 'WINDOWS-1254',
4620 1
        'ISO-2022-JP', 'JIS', 'EUC-JP',
4621
    );
4622
4623
    self::checkForSupport();
4624
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4625 1
    if ($encoding) {
4626 1
      return $encoding;
4627
    }
4628
4629 1
    //
4630 1
    // 5.) check via "iconv()"
4631 1
    //
4632
4633 1
    $md5 = md5($str);
4634
    foreach (self::$iconvEncoding as $encodingTmp) {
4635
      # INFO: //IGNORE and //TRANSLIT still throw notice
4636
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4637
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4638
        return $encodingTmp;
4639
      }
4640
    }
4641
4642
    return false;
4643
  }
4644
4645
  /**
4646
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4647
   *
4648
   * @link  http://php.net/manual/en/function.str-ireplace.php
4649
   *
4650
   * @param mixed $search  <p>
4651
   *                       Every replacement with search array is
4652
   *                       performed on the result of previous replacement.
4653
   *                       </p>
4654 8
   * @param mixed $replace <p>
4655
   *                       </p>
4656 8
   * @param mixed $subject <p>
4657
   *                       If subject is an array, then the search and
4658 8
   *                       replace is performed with every entry of
4659
   *                       subject, and the return value is an array as
4660 8
   *                       well.
4661 2
   *                       </p>
4662
   * @param int   $count   [optional] <p>
4663
   *                       The number of matched and replaced needles will
4664 7
   *                       be returned in count which is passed by
4665
   *                       reference.
4666 7
   *                       </p>
4667 7
   *
4668 7
   * @return mixed A string or an array of replacements.
4669
   */
4670 7
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4671
  {
4672 7
    $search = (array)$search;
4673 6
4674
    /** @noinspection AlterInForeachInspection */
4675
    foreach ($search as &$s) {
4676 4
      if ('' === $s .= '') {
4677
        $s = '/^(?<=.)$/';
4678
      } else {
4679 4
        $s = '/' . preg_quote($s, '/') . '/ui';
4680 4
      }
4681 4
    }
4682
4683 4
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4684 3
    $count = $replace; // used as reference parameter
4685
4686 3
    return $subject;
4687 3
  }
4688 3
4689
  /**
4690 3
   * Limit the number of characters in a string, but also after the next word.
4691 1
   *
4692
   * @param  string $str
4693 1
   * @param  int    $length
4694 1
   * @param  string $strAddOn
4695 1
   *
4696
   * @return string
4697 1
   */
4698
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4699
  {
4700
    $str = (string)$str;
4701
4702
    if (!isset($str[0])) {
4703
      return '';
4704
    }
4705
4706
    $length = (int)$length;
4707
4708
    if (self::strlen($str) <= $length) {
4709
      return $str;
4710
    }
4711
4712 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4713 3
      return self::substr($str, 0, $length - 1) . $strAddOn;
4714
    }
4715 4
4716
    $str = self::substr($str, 0, $length);
4717
    $array = explode(' ', $str);
4718
    array_pop($array);
4719
    $new_str = implode(' ', $array);
4720 4
4721
    if ($new_str === '') {
4722
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4723
    } else {
4724
      $str = $new_str . $strAddOn;
4725 4
    }
4726 4
4727 2
    return $str;
4728 2
  }
4729
4730 2
  /**
4731 2
   * Pad a UTF-8 string to given length with another string.
4732 1
   *
4733
   * @param    string $str        The input string
4734 2
   * @param    int    $pad_length The length of return string
4735
   * @param    string $pad_string String to use for padding the input string
4736 4
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4737 4
   *
4738 4
   * @return   string Returns the padded string
4739 4
   */
4740 1
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4741
  {
4742 7
    $str_length = self::strlen($str);
4743
4744 7
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $str_length)) {
4745
      $ps_length = self::strlen($pad_string);
4746
4747
      $diff = $pad_length - $str_length;
4748
4749
      switch ($pad_type) {
4750 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4751
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4752
          $pre = self::substr($pre, 0, $diff);
4753
          $post = '';
4754
          break;
4755
4756 1
        case STR_PAD_BOTH:
4757
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4758 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
4759 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4760 1
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4761 1
          break;
4762
4763 1
        case STR_PAD_RIGHT:
4764 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4765
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4766
          $post = self::substr($post, 0, $diff);
4767 1
          $pre = '';
4768
      }
4769
4770
      return $pre . $str . $post;
4771
    }
4772
4773
    return $str;
4774
  }
4775
4776 1
  /**
4777
   * Repeat a string.
4778
   *
4779 1
   * @param string $str        <p>
4780
   *                           The string to be repeated.
4781
   *                           </p>
4782
   * @param int    $multiplier <p>
4783
   *                           Number of time the input string should be
4784
   *                           repeated.
4785
   *                           </p>
4786
   *                           <p>
4787
   *                           multiplier has to be greater than or equal to 0.
4788
   *                           If the multiplier is set to 0, the function
4789
   *                           will return an empty string.
4790 8
   *                           </p>
4791
   *
4792 8
   * @return string the repeated string.
4793
   */
4794
  public static function str_repeat($str, $multiplier)
4795
  {
4796
    $str = self::filter($str);
4797
4798
    return str_repeat($str, $multiplier);
4799
  }
4800
4801
  /**
4802
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4803
   *
4804
   * (PHP 4, PHP 5)<br/>
4805 8
   * Replace all occurrences of the search string with the replacement string
4806
   *
4807 8
   * @link http://php.net/manual/en/function.str-replace.php
4808 5
   *
4809 5
   * @param mixed $search  <p>
4810 8
   *                       The value being searched for, otherwise known as the needle.
4811
   *                       An array may be used to designate multiple needles.
4812
   *                       </p>
4813
   * @param mixed $replace <p>
4814
   *                       The replacement value that replaces found search
4815
   *                       values. An array may be used to designate multiple replacements.
4816
   *                       </p>
4817
   * @param mixed $subject <p>
4818
   *                       The string or array being searched and replaced on,
4819
   *                       otherwise known as the haystack.
4820
   *                       </p>
4821
   *                       <p>
4822
   *                       If subject is an array, then the search and
4823 5
   *                       replace is performed with every entry of
4824
   *                       subject, and the return value is an array as
4825 5
   *                       well.
4826
   *                       </p>
4827
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4828
   *
4829 5
   * @return mixed This function returns a string or an array with the replaced values.
4830
   */
4831
  public static function str_replace($search, $replace, $subject, &$count = null)
4832 5
  {
4833
    return str_replace($search, $replace, $subject, $count);
4834
  }
4835
4836 5
  /**
4837 5
   * Shuffles all the characters in the string.
4838
   *
4839
   * @param    string $str The input string
4840
   *
4841
   * @return   string The shuffled string.
4842
   */
4843
  public static function str_shuffle($str)
4844
  {
4845
    $array = self::split($str);
4846
4847
    shuffle($array);
4848
4849
    return implode('', $array);
4850 2
  }
4851
4852 2
  /**
4853 2
   * Sort all characters according to code points.
4854
   *
4855 2
   * @param    string $str    A UTF-8 string.
4856 2
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4857 2
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4858
   *
4859 2
   * @return   string String of sorted characters
4860 2
   */
4861
  public static function str_sort($str, $unique = false, $desc = false)
4862
  {
4863
    $array = self::codepoints($str);
4864
4865
    if ($unique) {
4866
      $array = array_flip(array_flip($array));
4867
    }
4868
4869
    if ($desc) {
4870 1
      arsort($array);
4871
    } else {
4872 1
      asort($array);
4873
    }
4874
4875
    return self::string($array);
4876
  }
4877
4878
  /**
4879
   * Split a string into an array.
4880
   *
4881
   * @param string $str
4882
   * @param int    $len
4883
   *
4884
   * @return array
4885
   */
4886
  public static function str_split($str, $len = 1)
4887
  {
4888
    // init
4889
    self::checkForSupport();
4890
    $len = (int)$len;
4891
4892
    if ($len < 1) {
4893
      return str_split($str, $len);
4894 2
    }
4895
4896
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4897 2
    $a = $a[0];
4898
4899 2
    if ($len === 1) {
4900
      return $a;
4901
    }
4902
4903
    $arrayOutput = array();
4904
    $p = -1;
4905
4906
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4907
    foreach ($a as $l => $a) {
4908
      if ($l % $len) {
4909
        $arrayOutput[$p] .= $a;
4910
      } else {
4911
        $arrayOutput[++$p] = $a;
4912
      }
4913
    }
4914
4915
    return $arrayOutput;
4916
  }
4917
4918
  /**
4919
   * Get a binary representation of a specific string.
4920
   *
4921
   * @param  string $str The input string.
4922
   *
4923
   * @return string
4924
   */
4925 8
  public static function str_to_binary($str)
4926
  {
4927 8
    $str = (string)$str;
4928 8
4929
    $value = unpack('H*', $str);
4930 8
4931 2
    return base_convert($value[1], 16, 2);
4932
  }
4933
4934
  /**
4935 7
   * alias for "UTF8::to_ascii()"
4936
   *
4937 7
   * @see UTF8::to_ascii()
4938 1
   *
4939 1
   * @param string $str
4940 1
   * @param string $unknown
4941
   *
4942
   * @return string
4943 7
   */
4944 1
  public static function str_transliterate($str, $unknown = '?')
4945 1
  {
4946
    return self::to_ascii($str, $unknown);
4947 7
  }
4948
4949
  /**
4950
   * Counts number of words in the UTF-8 string.
4951
   *
4952
   * @param string $str    The input string.
4953
   * @param int    $format <strong>0</strong> => return a number of words<br />
4954
   *                       <strong>1</strong> => return an array of words<br />
4955
   *                       <strong>2</strong> => return an array of words with word-offset as key
4956
   * @param string $charlist Additional chars that contains to words and do not start a new word (default: "'", "’")
4957
   *
4958
   * @return array|int The number of words in the string
4959 7
   */
4960
  public static function str_word_count($str, $format = 0, $charlist = '')
4961 7
  {
4962 2
    $charlist = self::rxClass($charlist, '\pL');
4963
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4964
4965
    $len = count($strParts);
4966 5
4967
    if ($format === 1) {
4968 5
4969
      $numberOfWords = array();
4970
      for ($i = 1; $i < $len; $i += 2) {
4971
        $numberOfWords[] = $strParts[$i];
4972
      }
4973
4974
    } elseif ($format === 2) {
4975
4976
      self::checkForSupport();
4977
4978
      $numberOfWords = array();
4979
      $offset = self::strlen($strParts[0]);
4980
      for ($i = 1; $i < $len; $i += 2) {
4981
        $numberOfWords[$offset] = $strParts[$i];
4982
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4983
      }
4984
4985 66
    } else {
4986
4987 66
      $numberOfWords = ($len - 1) / 2;
4988
4989 66
    }
4990 4
4991
    return $numberOfWords;
4992
  }
4993
4994 65
  /**
4995
   * Case-insensitive string comparison.
4996
   *
4997 65
   * INFO: Case-insensitive version of UTF8::strcmp()
4998
   *
4999
   * @param string $str1
5000
   * @param string $str2
5001 65
   *
5002
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5003
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
5004
   *             <strong>0</strong> if they are equal.
5005 65
   */
5006
  public static function strcasecmp($str1, $str2)
5007
  {
5008
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5009
  }
5010
5011
  /**
5012
   * Case-sensitive string comparison.
5013
   *
5014
   * @param string $str1
5015
   * @param string $str2
5016
   *
5017 1
   * @return int  <strong>&lt; 0</strong> if str1 is less than str2<br />
5018
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
5019 1
   *              <strong>0</strong> if they are equal.
5020
   */
5021
  public static function strcmp($str1, $str2)
5022
  {
5023
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5024
        \Normalizer::normalize($str1, \Normalizer::NFD),
5025
        \Normalizer::normalize($str2, \Normalizer::NFD)
5026
    );
5027
  }
5028
5029
  /**
5030
   * Find length of initial segment not matching mask.
5031 2
   *
5032
   * @param string $str
5033 2
   * @param string $charList
5034
   * @param int    $offset
5035
   * @param int    $length
5036
   *
5037
   * @return int|null
5038
   */
5039
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5040
  {
5041
    if ('' === $charList .= '') {
5042
      return null;
5043
    }
5044
5045
    if ($offset || 2147483647 !== $length) {
5046
      $str = (string)self::substr($str, $offset, $length);
5047
    } else {
5048
      $str = (string)$str;
5049
    }
5050
5051
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5052
      /** @noinspection OffsetOperationsInspection */
5053
      return self::strlen($length[1]);
5054
    } else {
5055
      return self::strlen($str);
5056
    }
5057
  }
5058
5059
  /**
5060
   * Create a UTF-8 string from code points.
5061
   *
5062
   * INFO: opposite to UTF8::codepoints()
5063
   *
5064
   * @param  array $array Integer or Hexadecimal codepoints
5065
   *
5066
   * @return string UTF-8 encoded string
5067
   */
5068
  public static function string(array $array)
5069
  {
5070
    return implode(
5071
        array_map(
5072
            array(
5073
                '\\voku\\helper\\UTF8',
5074
                'chr',
5075
            ),
5076
            $array
5077
        )
5078
    );
5079
  }
5080
5081
  /**
5082
   * alias for "UTF8::string_has_bom()"
5083
   *
5084
   * @see UTF8::string_has_bom()
5085
   *
5086
   * @param string $str
5087
   *
5088
   * @return bool
5089
   */
5090
  public static function hasBom($str)
5091
  {
5092
    return self::string_has_bom($str);
5093
  }
5094
5095
  /**
5096
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5097
   *
5098
   * @param    string $str The input string.
5099
   *
5100
   * @return   bool True if the string has BOM at the start, False otherwise.
5101
   */
5102
  public static function string_has_bom($str)
5103 11
  {
5104
    foreach (self::$bom as $bomString => $bomByteLength) {
5105 11
      if (0 === strpos($str, $bomString)) {
5106 11
        return true;
5107
      }
5108 11
    }
5109 2
5110
    return false;
5111
  }
5112
5113 10
  /**
5114 10
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5115
   *
5116
   * @link http://php.net/manual/en/function.strip-tags.php
5117
   *
5118 10
   * @param string $str            <p>
5119
   *                               The input string.
5120
   *                               </p>
5121
   * @param string $allowable_tags [optional] <p>
5122 10
   *                               You can use the optional second parameter to specify tags which should
5123
   *                               not be stripped.
5124
   *                               </p>
5125
   *                               <p>
5126 1
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5127 1
   *                               can not be changed with allowable_tags.
5128 1
   *                               </p>
5129
   *
5130 10
   * @return string the stripped string.
5131
   */
5132
  public static function strip_tags($str, $allowable_tags = null)
5133 10
  {
5134 1
    // clean broken utf8
5135 1
    $str = self::clean($str);
5136
5137 10
    return strip_tags($str, $allowable_tags);
5138
  }
5139
5140
  /**
5141
   * Finds position of first occurrence of a string within another, case insensitive.
5142
   *
5143
   * @link http://php.net/manual/en/function.mb-stripos.php
5144
   *
5145
   * @param string  $haystack  <p>
5146
   *                           The string from which to get the position of the first occurrence
5147
   *                           of needle
5148
   *                           </p>
5149
   * @param string  $needle    <p>
5150
   *                           The string to find in haystack
5151
   *                           </p>
5152
   * @param int     $offset    [optional] <p>
5153
   *                           The position in haystack
5154
   *                           to start searching
5155
   *                           </p>
5156
   * @param string  $encoding
5157
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string.
5158
   *
5159
   * @return int|false Return the numeric position of the first occurrence of needle in the haystack string,<br />
5160
   *                   or false if needle is not found.
5161
   */
5162
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5163
  {
5164
    $haystack = (string)$haystack;
5165
    $needle = (string)$needle;
5166
5167
    if (!isset($haystack[0], $needle[0])) {
5168
      return false;
5169
    }
5170
5171
    // init
5172
    self::checkForSupport();
5173
5174
    if ($cleanUtf8 === true) {
5175
      $haystack = self::clean($haystack);
5176
      $needle = self::clean($needle);
5177
    }
5178
5179
    // INFO: this is only a fallback for old versions
5180
    if ($encoding === true || $encoding === false) {
5181
      $encoding = 'UTF-8';
5182
    } else {
5183
      $encoding = self::normalizeEncoding($encoding);
5184
    }
5185
5186 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5187
  }
5188 1
5189
  /**
5190 1
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5191
   *
5192
   * @param string $str
5193
   * @param string $needle
5194
   * @param bool   $before_needle
5195
   *
5196
   * @return false|string sub-string, or false if needle is not found
5197
   */
5198
  public static function stristr($str, $needle, $before_needle = false)
5199
  {
5200 4
    if ('' === $needle .= '') {
5201
      return false;
5202 4
    }
5203
5204
    // init
5205
    self::checkForSupport();
5206
5207
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5208
  }
5209
5210
  /**
5211
   * Get the string length, not the byte-length!
5212
   *
5213
   * @link     http://php.net/manual/en/function.mb-strlen.php
5214
   *
5215
   * @param string  $str       The string being checked for length.
5216
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5217
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5218
   *
5219
   * @return int the number of characters in the string $str having character encoding $encoding. (One multi-byte character counted as +1)
5220
   */
5221
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5222
  {
5223
    $str = (string)$str;
5224
5225
    if (!isset($str[0])) {
5226
      return 0;
5227
    }
5228
5229
    // INFO: this is only a fallback for old versions
5230
    if ($encoding === true || $encoding === false) {
5231
      $encoding = 'UTF-8';
5232
    } else {
5233 1
      $encoding = self::normalizeEncoding($encoding);
5234
    }
5235 1
5236
    switch ($encoding) {
5237 1
      case 'ASCII':
5238
      case 'CP850':
5239
        return strlen($str);
5240
    }
5241
5242
    self::checkForSupport();
5243
5244
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5245
      $str = self::clean($str);
5246
    }
5247
5248
    return \mb_strlen($str, $encoding);
5249 1
  }
5250
5251 1
  /**
5252
   * Case insensitive string comparisons using a "natural order" algorithm.
5253
   *
5254
   * INFO: natural order version of UTF8::strcasecmp()
5255
   *
5256
   * @param string $str1
5257
   * @param string $str2
5258
   *
5259
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5260
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5261
   *             <strong>0</strong> if they are equal
5262
   */
5263
  public static function strnatcasecmp($str1, $str2)
5264
  {
5265
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5266
  }
5267
5268
  /**
5269
   * String comparisons using a "natural order" algorithm
5270
   *
5271
   * INFO: natural order version of UTF8::strcmp()
5272
   *
5273
   * @link  http://php.net/manual/en/function.strnatcmp.php
5274
   *
5275
   * @param string $str1 <p>
5276 10
   *                     The first string.
5277
   *                     </p>
5278 10
   * @param string $str2 <p>
5279 10
   *                     The second string.
5280
   *                     </p>
5281 10
   *
5282 2
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5283
   * str1 is less than str2; &gt;
5284
   * 0 if str1 is greater than
5285
   * str2, and 0 if they are equal.
5286 9
   */
5287
  public static function strnatcmp($str1, $str2)
5288 9
  {
5289
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5290
  }
5291
5292 9
  /**
5293 9
   * Case-insensitive string comparison of the first n characters.
5294
   *
5295 9
   * @link  http://php.net/manual/en/function.strncasecmp.php
5296
   *
5297
   * @param string $str1 <p>
5298 1
   *                     The first string.
5299 1
   *                     </p>
5300 1
   * @param string $str2 <p>
5301
   *                     The second string.
5302 9
   *                     </p>
5303 9
   * @param int    $len  <p>
5304
   *                     The length of strings to be used in the comparison.
5305
   *                     </p>
5306
   *
5307
   * @return int &lt; 0 if <i>str1</i> is less than
5308
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5309
   * greater than <i>str2</i>, and 0 if they are equal.
5310
   */
5311
  public static function strncasecmp($str1, $str2, $len)
5312
  {
5313
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5314
  }
5315
5316
  /**
5317
   * String comparison of the first n characters.
5318
   *
5319
   * @link  http://php.net/manual/en/function.strncmp.php
5320
   *
5321
   * @param string $str1 <p>
5322
   *                     The first string.
5323
   *                     </p>
5324
   * @param string $str2 <p>
5325
   *                     The second string.
5326
   *                     </p>
5327
   * @param int    $len  <p>
5328
   *                     Number of characters to use in the comparison.
5329
   *                     </p>
5330
   *
5331
   * @return int &lt; 0 if <i>str1</i> is less than
5332
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5333
   * is greater than <i>str2</i>, and 0 if they are
5334
   * equal.
5335
   */
5336
  public static function strncmp($str1, $str2, $len)
5337
  {
5338
    $str1 = self::substr($str1, 0, $len);
5339 6
    $str2 = self::substr($str2, 0, $len);
5340
5341 6
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5338 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5339 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5342
  }
5343
5344
  /**
5345 6
   * Search a string for any of a set of characters
5346
   *
5347
   * @link  http://php.net/manual/en/function.strpbrk.php
5348
   *
5349
   * @param string $haystack  <p>
5350
   *                          The string where char_list is looked for.
5351
   *                          </p>
5352
   * @param string $char_list <p>
5353
   *                          This parameter is case sensitive.
5354
   *                          </p>
5355
   *
5356
   * @return string a string starting from the character found, or false if it is not found.
5357
   */
5358
  public static function strpbrk($haystack, $char_list)
5359
  {
5360
    $haystack = (string)$haystack;
5361
    $char_list = (string)$char_list;
5362
5363
    if (!isset($haystack[0], $char_list[0])) {
5364
      return false;
5365
    }
5366 1
5367
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5368 1
      return substr($haystack, strpos($haystack, $m[0]));
5369
    } else {
5370 1
      return false;
5371
    }
5372
  }
5373
5374
  /**
5375
   * Find position of first occurrence of string in a string.
5376
   *
5377
   * @link http://php.net/manual/en/function.mb-strpos.php
5378
   *
5379
   * @param string  $haystack     <p>
5380
   *                              The string being checked.
5381
   *                              </p>
5382
   * @param string  $needle       <p>
5383 10
   *                              The position counted from the beginning of haystack.
5384
   *                              </p>
5385 10
   * @param int     $offset       [optional] <p>
5386 10
   *                              The search offset. If it is not specified, 0 is used.
5387 10
   *                              </p>
5388
   * @param string  $encoding
5389 10
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5390 1
   *
5391 1
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5392 1
   *             If needle is not found it returns false.
5393
   */
5394 10
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5395
  {
5396 10
    $haystack = (string)$haystack;
5397
    $needle = (string)$needle;
5398 10
5399 1
    if (!isset($haystack[0], $needle[0])) {
5400 1
      return false;
5401
    }
5402
5403 10
    // init
5404 10
    self::checkForSupport();
5405
    $offset = (int)$offset;
5406 10
5407
    // iconv and mbstring do not support integer $needle
5408 10
5409
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5410
      $needle = self::chr($needle);
5411
    }
5412
5413
    if ($cleanUtf8 === true) {
5414
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5415
      // iconv_strpos is not tolerant to invalid characters
5416
5417
      $needle = self::clean((string)$needle);
5418
      $haystack = self::clean($haystack);
5419
    }
5420
5421 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5422
5423
      // INFO: this is only a fallback for old versions
5424 20
      if ($encoding === true || $encoding === false) {
5425
        $encoding = 'UTF-8';
5426 20
      } else {
5427
        $encoding = self::normalizeEncoding($encoding);
5428 20
      }
5429 5
5430
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5431
    }
5432
5433 18
    if (self::$support['iconv'] === true) {
5434
      // ignore invalid negative offset to keep compatility
5435 18
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5436
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5437
    }
5438
5439
    if ($offset > 0) {
5440
      $haystack = self::substr($haystack, $offset);
5441
    }
5442
5443 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5444
      $left = substr($haystack, 0, $pos);
5445 3
5446
      // negative offset not supported in PHP strpos(), ignoring
5447 3
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5448
    }
5449
5450
    return false;
5451
  }
5452
5453
  /**
5454
   * Finds the last occurrence of a character in a string within another.
5455
   *
5456
   * @link http://php.net/manual/en/function.mb-strrchr.php
5457
   *
5458
   * @param string $haystack <p>
5459
   *                         The string from which to get the last occurrence
5460
   *                         of needle
5461
   *                         </p>
5462 16
   * @param string $needle   <p>
5463
   *                         The string to find in haystack
5464 16
   *                         </p>
5465
   * @param bool   $part     [optional] <p>
5466 16
   *                         Determines which portion of haystack
5467 4
   *                         this function returns.
5468
   *                         If set to true, it returns all of haystack
5469
   *                         from the beginning to the last occurrence of needle.
5470
   *                         If set to false, it returns all of haystack
5471 15
   *                         from the last occurrence of needle to the end,
5472
   *                         </p>
5473 15
   * @param string $encoding [optional] <p>
5474 15
   *                         Character encoding name to use.
5475
   *                         If it is omitted, internal character encoding is used.
5476
   *                         </p>
5477
   *
5478
   * @return string the portion of haystack.
5479
   * or false if needle is not found.
5480
   */
5481 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5482
  {
5483
    self::checkForSupport();
5484
    $encoding = self::normalizeEncoding($encoding);
5485
5486
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5487
  }
5488
5489
  /**
5490
   * Reverses characters order in the string.
5491
   *
5492
   * @param    string $str The input string
5493
   *
5494
   * @return   string The string with characters in the reverse sequence
5495
   */
5496
  public static function strrev($str)
5497
  {
5498
    return implode(array_reverse(self::split($str)));
5499
  }
5500
5501
  /**
5502
   * Finds the last occurrence of a character in a string within another, case insensitive.
5503 1
   *
5504
   * @link http://php.net/manual/en/function.mb-strrichr.php
5505 1
   *
5506
   * @param string $haystack <p>
5507
   *                         The string from which to get the last occurrence
5508
   *                         of needle
5509
   *                         </p>
5510
   * @param string $needle   <p>
5511
   *                         The string to find in haystack
5512
   *                         </p>
5513
   * @param bool   $part     [optional] <p>
5514
   *                         Determines which portion of haystack
5515
   *                         this function returns.
5516
   *                         If set to true, it returns all of haystack
5517
   *                         from the beginning to the last occurrence of needle.
5518
   *                         If set to false, it returns all of haystack
5519
   *                         from the last occurrence of needle to the end,
5520 1
   *                         </p>
5521
   * @param string $encoding [optional] <p>
5522
   *                         Character encoding name to use.
5523
   *                         If it is omitted, internal character encoding is used.
5524
   *                         </p>
5525
   *
5526
   * @return string the portion of haystack.
5527
   * or false if needle is not found.
5528
   */
5529 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5530 1
  {
5531
    self::checkForSupport();
5532
    $encoding = self::normalizeEncoding($encoding);
5533 1
5534
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5535 1
  }
5536
5537
  /**
5538
   * Find position of last occurrence of a case-insensitive string.
5539
   *
5540
   * @param    string $haystack The string to look in
5541
   * @param    string $needle   The string to look for
5542
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5543
   *
5544
   * @return   int The position of offset
5545
   */
5546
  public static function strripos($haystack, $needle, $offset = 0)
5547
  {
5548
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5549
  }
5550
5551
  /**
5552
   * Find position of last occurrence of a string in a string.
5553
   *
5554
   * @link http://php.net/manual/en/function.mb-strrpos.php
5555
   *
5556
   * @param string     $haystack  <p>
5557
   *                              The string being checked, for the last occurrence
5558 39
   *                              of needle
5559
   *                              </p>
5560 39
   * @param string|int $needle    <p>
5561
   *                              The string to find in haystack.
5562 39
   *                              Or a code point as int.
5563 9
   *                              </p>
5564
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5565
   *                              the string. Negative values will stop searching at an arbitrary point
5566
   *                              prior to the end of the string.
5567 37
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5568
   *
5569 37
   * @return int the numeric position of
5570
   * the last occurrence of needle in the
5571
   * haystack string. If
5572
   * needle is not found, it returns false.
5573 1
   */
5574 1
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5575
  {
5576 37
    $haystack = (string)$haystack;
5577 22
5578 22
    if (((int)$needle) === $needle && ($needle >= 0)) {
5579 33
      $needle = self::chr($needle);
5580
    }
5581
5582 37
    $needle = (string)$needle;
5583
5584
    if (!isset($haystack[0], $needle[0])) {
5585 37
      return false;
5586 1
    }
5587 1
5588
    // init
5589 37
    self::checkForSupport();
5590
5591
    $needle = (string)$needle;
5592
    $offset = (int)$offset;
5593
5594
    if ($cleanUtf8 === true) {
5595
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5596
5597
      $needle = self::clean($needle);
5598
      $haystack = self::clean($haystack);
5599
    }
5600
5601
    if (self::$support['mbstring'] === true) {
5602
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5603
    }
5604
5605
    if (self::$support['iconv'] === true) {
5606
      return \grapheme_strrpos($haystack, $needle, $offset);
5607
    }
5608
5609
    // fallback
5610
5611
    if ($offset > 0) {
5612
      $haystack = self::substr($haystack, $offset);
5613
    } elseif ($offset < 0) {
5614
      $haystack = self::substr($haystack, 0, $offset);
5615
    }
5616
5617 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5618 1
      $left = substr($haystack, 0, $pos);
5619
5620 1
      // negative offset not supported in PHP strpos(), ignoring
5621 1
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5622
    }
5623 1
5624
    return false;
5625
  }
5626
5627
  /**
5628
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5629
   * mask.
5630
   *
5631
   * @param string $str
5632
   * @param string $mask
5633
   * @param int    $offset
5634
   * @param int    $length
5635
   *
5636
   * @return int|null
5637
   */
5638
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5639
  {
5640
    if ($offset || 2147483647 !== $length) {
5641
      $str = self::substr($str, $offset, $length);
5642
    }
5643
5644
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5645
  }
5646
5647
  /**
5648
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5649
   *
5650
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5651
   *
5652
   * @param string $haystack      <p>
5653
   *                              The input string. Must be valid UTF-8.
5654
   *                              </p>
5655
   * @param string $needle        <p>
5656
   *                              The string to look for. Must be valid UTF-8.
5657
   *                              </p>
5658
   * @param bool   $before_needle [optional] <p>
5659
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5660
   *                              haystack before the first occurrence of the needle (excluding the needle).
5661
   *                              </p>
5662
   *
5663
   * @return string the portion of string, or FALSE if needle is not found.
5664
   */
5665 6
  public static function strstr($haystack, $needle, $before_needle = false)
5666
  {
5667
    self::checkForSupport();
5668 6
5669 1
    return \grapheme_strstr($haystack, $needle, $before_needle);
5670
  }
5671
5672 1
  /**
5673 1
   * Unicode transformation for case-less matching.
5674 1
   *
5675 1
   * @link http://unicode.org/reports/tr21/tr21-5.html
5676
   *
5677
   * @param string $str
5678
   * @param bool   $full
5679 1
   *
5680 1
   * @return string
5681 1
   */
5682 1
  public static function strtocasefold($str, $full = true)
5683 1
  {
5684 1
    static $fullCaseFold = null;
5685 1
    static $commonCaseFoldKeys = null;
5686 1
    static $commonCaseFoldValues = null;
5687
5688
    if ($commonCaseFoldKeys === null) {
5689
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5690 1
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5691 1
    }
5692 1
5693 1
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5694 1
5695 1
    if ($full) {
5696 1
5697 1
      if ($fullCaseFold === null) {
5698
        $fullCaseFold = self::getData('caseFolding_full');
5699
      }
5700 1
5701 1
      /** @noinspection OffsetOperationsInspection */
5702 1
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5703 1
    }
5704
5705
    $str = self::clean($str);
5706
5707 1
    return self::strtolower($str);
5708
  }
5709 6
5710 1
  /**
5711 1
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5712 1
   * Make a string lowercase.
5713 1
   *
5714
   * @link http://php.net/manual/en/function.mb-strtolower.php
5715 1
   *
5716
   * @param string $str <p>
5717
   *                    The string being lowercased.
5718 6
   *                    </p>
5719 6
   * @param string $encoding
5720
   *
5721 6
   * @return string str with all alphabetic characters converted to lowercase.
5722 4
   */
5723
  public static function strtolower($str, $encoding = 'UTF-8')
5724 4
  {
5725 4
    $str = (string)$str;
5726
5727 6
    if (!isset($str[0])) {
5728
      return '';
5729 6
    }
5730
5731
    // init
5732
    self::checkForSupport();
5733
    $encoding = self::normalizeEncoding($encoding);
5734
5735
    return \mb_strtolower($str, $encoding);
5736
  }
5737
5738
  /**
5739
   * Generic case sensitive transformation for collation matching.
5740 1
   *
5741
   * @param string $s
5742 1
   *
5743
   * @return string
5744 1
   */
5745 1
  protected static function strtonatfold($s)
5746
  {
5747
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5748 1
  }
5749
5750 1
  /**
5751 1
   * Make a string uppercase.
5752
   *
5753 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5754
   *
5755 1
   * @param string $str <p>
5756 1
   *                    The string being uppercased.
5757
   *                    </p>
5758 1
   * @param string $encoding
5759
   *
5760 1
   * @return string str with all alphabetic characters converted to uppercase.
5761
   */
5762 1
  public static function strtoupper($str, $encoding = 'UTF-8')
5763
  {
5764 1
    $str = (string)$str;
5765
5766
    if (!isset($str[0])) {
5767
      return '';
5768
    }
5769
5770
    // init
5771
    self::checkForSupport();
5772
5773
    if (self::$support['mbstring'] === true) {
5774
      $encoding = self::normalizeEncoding($encoding);
5775 6
5776
      return \mb_strtoupper($str, $encoding);
5777 6
    } else {
5778
5779
      // fallback
5780
5781
      static $caseTableKeys = null;
5782
      static $caseTableValues = null;
5783
5784
      if ($caseTableKeys === null) {
5785
        $caseTable = self::case_table();
5786
        $caseTableKeys = array_keys($caseTable);
5787
        $caseTableValues = array_values($caseTable);
5788
      }
5789
5790
      $str = self::clean($str);
5791
5792
      return str_replace($caseTableKeys, $caseTableValues, $str);
5793
    }
5794
  }
5795
5796
  /**
5797
   * Translate characters or replace sub-strings.
5798
   *
5799
   * @link  http://php.net/manual/en/function.strtr.php
5800
   *
5801
   * @param string       $str  <p>
5802
   *                           The string being translated.
5803
   *                           </p>
5804
   * @param string|array $from <p>
5805
   *                           The string replacing from.
5806
   *                           </p>
5807
   * @param string|array $to   <p>
5808
   *                           The string being translated to to.
5809
   *                           </p>
5810
   *
5811
   * @return string This function returns a copy of str,
5812 7
   * translating all occurrences of each character in
5813
   * from to the corresponding character in
5814 7
   * to.
5815
   */
5816 7
  public static function strtr($str, $from, $to = INF)
5817
  {
5818 7
    if (INF !== $to) {
5819 2
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5819 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5820
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5820 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5821
      $countFrom = count($from);
5822 6
      $countTo = count($to);
5823
5824 6
      if ($countFrom > $countTo) {
5825 3
        $from = array_slice($from, 0, $countTo);
5826
      } elseif ($countFrom < $countTo) {
5827 3
        $to = array_slice($to, 0, $countFrom);
5828
      }
5829 3
5830
      $from = array_combine($from, $to);
5831
    }
5832 3
5833
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5816 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5834 3
  }
5835 3
5836
  /**
5837
   * Return the width of a string.
5838 3
   *
5839 3
   * @param string $s
5840 3
   *
5841
   * @return int
5842
   */
5843
  public static function strwidth($s)
5844
  {
5845
    // init
5846
    self::checkForSupport();
5847
5848
    return \mb_strwidth($s, 'UTF-8');
5849
  }
5850
5851
  /**
5852 3
   * Get part of a string.
5853
   *
5854 1
   * @link http://php.net/manual/en/function.mb-substr.php
5855 1
   *
5856 1
   * @param string  $str       <p>
5857
   *                           The string being checked.
5858 1
   *                           </p>
5859 1
   * @param int     $start     <p>
5860 1
   *                           The first position used in str.
5861 1
   *                           </p>
5862
   * @param int     $length    [optional] <p>
5863 1
   *                           The maximum length of the returned string.
5864
   *                           </p>
5865
   * @param string  $encoding
5866 1
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5867
   *
5868
   * @return string Returns a sub-string specified by the start and length parameters.
5869 1
   */
5870
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5871 3
  {
5872 1
    $str = (string)$str;
5873 1
5874
    if (!isset($str[0])) {
5875 3
      return '';
5876 3
    }
5877
5878 3
    // init
5879 3
    self::checkForSupport();
5880
5881 6
    if ($cleanUtf8 === true) {
5882
      // iconv and mbstring are not tolerant to invalid encoding
5883
      // further, their behaviour is inconsistent with that of PHP's substr
5884
5885
      $str = self::clean($str);
5886
    }
5887
5888
    $str_length = 0;
5889
    if ($start || $length === null) {
5890
      $str_length = (int)self::strlen($str);
5891
    }
5892
5893
    if ($start && $start > $str_length) {
5894
      return false;
5895
    }
5896
5897
    if ($length === null) {
5898
      $length = $str_length;
5899
    } else {
5900
      $length = (int)$length;
5901
    }
5902
5903 2 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5904
5905 2
      // INFO: this is only a fallback for old versions
5906
      if ($encoding === true || $encoding === false) {
5907
        $encoding = 'UTF-8';
5908
      } else {
5909
        $encoding = self::normalizeEncoding($encoding);
5910
      }
5911
5912
      return \mb_substr($str, $start, $length, $encoding);
5913
    }
5914
5915
    if (self::$support['iconv'] === true) {
5916
      return (string)\grapheme_substr($str, $start, $length);
5917
    }
5918
5919
    // fallback
5920
5921
    // split to array, and remove invalid characters
5922
    $array = self::split($str);
5923
5924
    // extract relevant part, and join to make sting again
5925
    return implode(array_slice($array, $start, $length));
5926
  }
5927
5928
  /**
5929 20
   * Binary safe comparison of two strings from an offset, up to length characters.
5930
   *
5931 20
   * @param string  $main_str           The main string being compared.
5932 2
   * @param string  $str                The secondary string being compared.
5933
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5934 2
   *                                    end of the string.
5935 2
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5936
   *                                    the str compared to the length of main_str less the offset.
5937 2
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5938
   *
5939
   * @return int
5940 20
   */
5941
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5942 20
  {
5943 9
    $main_str = self::substr($main_str, $offset, $length);
5944
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5943 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5945
5946 20
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5943 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5944 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5943 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5944 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5947
  }
5948 20
5949
  /**
5950 20
   * Count the number of substring occurrences
5951 20
   *
5952
   * @link  http://php.net/manual/en/function.substr-count.php
5953 20
   *
5954 20
   * @param string $haystack <p>
5955 20
   *                         The string to search in
5956 20
   *                         </p>
5957
   * @param string $needle   <p>
5958 20
   *                         The substring to search for
5959
   *                         </p>
5960 18
   * @param int    $offset   [optional] <p>
5961 17
   *                         The offset where to start counting
5962 17
   *                         </p>
5963 17
   * @param int    $length   [optional] <p>
5964 5
   *                         The maximum length after the specified offset to search for the
5965 5
   *                         substring. It outputs a warning if the offset plus the length is
5966 5
   *                         greater than the haystack length.
5967
   *                         </p>
5968
   *
5969 20
   * @return int This functions returns an integer.
5970
   */
5971 18
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5972 14
  {
5973 14
    $haystack = (string)$haystack;
5974 14
    $needle = (string)$needle;
5975 8
5976 8
    if (!isset($haystack[0], $needle[0])) {
5977 8
      return false;
5978
    }
5979
5980 19
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5981
      $offset = (int)$offset;
5982 9
      $length = (int)$length;
5983 3
5984 3
      if ($length + $offset <= 0) {
5985 3
        return false;
5986 6
      }
5987 6
5988 6
      $haystack = self::substr($haystack, $offset, $length);
5989
    }
5990
5991 9
    self::checkForSupport();
5992 6
5993 6
    return \mb_substr_count($haystack, $needle);
5994 6
  }
5995
5996
  /**
5997 20
   * Replace text within a portion of a string.
5998
   *
5999 2
   * source: https://gist.github.com/stemar/8287074
6000 2
   *
6001
   * @param string|array   $str
6002
   * @param string|array   $replacement
6003 2
   * @param int|array      $start
6004 2
   * @param null|int|array $length
6005 2
   *
6006
   * @return array|string
6007
   */
6008 2
  public static function substr_replace($str, $replacement, $start, $length = null)
6009 18
  {
6010
    if (is_array($str)) {
6011 20
      $num = count($str);
6012
6013 20
      // $replacement
6014
      if (is_array($replacement)) {
6015
        $replacement = array_slice($replacement, 0, $num);
6016 20
      } else {
6017 20
        $replacement = array_pad(array($replacement), $num, $replacement);
6018
      }
6019 3
6020 20
      // $start
6021 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6022 20
        $start = array_slice($start, 0, $num);
6023
        foreach ($start as &$valueTmp) {
6024
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6025 20
        }
6026 20
        unset($valueTmp);
6027 20
      } else {
6028 2
        $start = array_pad(array($start), $num, $start);
6029 20
      }
6030
6031 20
      // $length
6032
      if (!isset($length)) {
6033 20
        $length = array_fill(0, $num, 0);
6034 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6035
        $length = array_slice($length, 0, $num);
6036
        foreach ($length as &$valueTmpV2) {
6037
          if (isset($valueTmpV2)) {
6038
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6039
          } else {
6040
            $valueTmpV2 = 0;
6041
          }
6042
        }
6043 2
        unset($valueTmpV2);
6044
      } else {
6045 2
        $length = array_pad(array($length), $num, $length);
6046
      }
6047 1
6048
      // Recursive call
6049 1
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6050 1
    } else {
6051
      if (is_array($replacement)) {
6052 1
        if (count($replacement) > 0) {
6053 2
          $replacement = $replacement[0];
6054 2
        } else {
6055
          $replacement = '';
6056
        }
6057
      }
6058
    }
6059
6060
    preg_match_all('/./us', (string)$str, $smatches);
6061
    preg_match_all('/./us', (string)$replacement, $rmatches);
6062
6063
    if ($length === null) {
6064
      self::checkForSupport();
6065
6066
      $length = \mb_strlen($str);
6067
    }
6068
6069
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6070
6071
    return implode($smatches[0], null);
6072
  }
6073 26
6074
  /**
6075 26
   * Returns a case swapped version of the string.
6076
   *
6077 26
   * @param string $str
6078 5
   * @param string $encoding
6079
   *
6080
   * @return string each character's case swapped
6081
   */
6082 22
  public static function swapCase($str, $encoding = 'UTF-8')
6083 6
  {
6084
    $str = (string)$str;
6085
6086 16
    if (!isset($str[0])) {
6087
      return '';
6088
    }
6089
6090
    $encoding = self::normalizeEncoding($encoding);
6091
    $str = self::clean($str);
6092
6093
    $strSwappedCase = preg_replace_callback(
6094
        '/[\S]/u',
6095
        function ($match) use ($encoding) {
6096 14
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6097
6098 14
          if ($match[0] === $marchToUpper) {
6099
            return UTF8::strtolower($match[0], $encoding);
6100
          } else {
6101
            return $marchToUpper;
6102
          }
6103
        },
6104
        $str
6105
    );
6106
6107
    return $strSwappedCase;
6108
  }
6109
6110
  /**
6111
   * alias for "UTF8::to_ascii()"
6112
   *
6113
   * @see UTF8::to_ascii()
6114
   *
6115
   * @param string $s The input string e.g. a UTF-8 String
6116
   * @param string $subst_chr
6117
   *
6118
   * @return string
6119
   */
6120
  public static function toAscii($s, $subst_chr = '?')
6121 8
  {
6122
    return self::to_ascii($s, $subst_chr);
6123 8
  }
6124 2
6125
  /**
6126
   * alias for "UTF8::to_latin1()"
6127
   *
6128 7
   * @see UTF8::to_latin1()
6129 7
   *
6130
   * @param $str
6131 7
   *
6132 1
   * @return string
6133 1
   */
6134 7
  public static function toLatin1($str)
6135
  {
6136
    return self::to_latin1($str);
6137 7
  }
6138
6139 7
  /**
6140
   * alias for "UTF8::to_utf8()"
6141
   *
6142
   * @see UTF8::to_utf8()
6143 1
   *
6144 1
   * @param string $str
6145 1
   *
6146 7
   * @return string
6147 7
   */
6148 7
  public static function toUTF8($str)
6149 7
  {
6150 7
    return self::to_utf8($str);
6151
  }
6152 7
6153
  /**
6154
   * convert to ASCII
6155
   *
6156
   * @param string $str     The input string.
6157
   * @param string $unknown Character use if character unknown. (default is ?)
6158
   *
6159
   * @return string
6160
   */
6161
  public static function to_ascii($str, $unknown = '?')
6162
  {
6163
    static $UTF8_TO_ASCII;
6164
6165
    // init
6166
    $str = (string)$str;
6167
6168
    if (!isset($str[0])) {
6169
      return '';
6170
    }
6171
6172 1
    $str = self::clean($str);
6173
6174 1
    self::checkForSupport();
6175
    if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6176 1
      $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6177 1
6178
      // check again, if we only have ASCII, now ...
6179
      if (!preg_match("/[\x80-\xFF]/", $str)) {
6180 1
        return $str;
6181
      }
6182 1
    }
6183
6184 1
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6185 1
    $chars = $ar[0];
6186 1
    foreach ($chars as &$c) {
6187 1
6188
      $ordC0 = ord($c[0]);
6189 1
6190 1
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6191 1
        continue;
6192
      }
6193 1
6194
      $ordC1 = ord($c[1]);
6195
6196
      // ASCII - next please
6197
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6198
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6199
      }
6200
6201
      if ($ordC0 >= 224) {
6202
        $ordC2 = ord($c[2]);
6203
6204
        if ($ordC0 <= 239) {
6205
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6206
        }
6207
6208
        if ($ordC0 >= 240) {
6209
          $ordC3 = ord($c[3]);
6210
6211
          if ($ordC0 <= 247) {
6212
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6213
          }
6214
6215
          if ($ordC0 >= 248) {
6216
            $ordC4 = ord($c[4]);
6217
6218 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6219
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6220
            }
6221
6222
            if ($ordC0 >= 252) {
6223
              $ordC5 = ord($c[5]);
6224
6225 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6226
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6227
              }
6228
            }
6229
          }
6230
        }
6231
      }
6232
6233
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6234
        $c = $unknown;
6235
        continue;
6236
      }
6237
6238
      if (!isset($ord)) {
6239
        $c = $unknown;
6240
        continue;
6241
      }
6242
6243
      $bank = $ord >> 8;
6244
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6245
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6246
        if (file_exists($bankfile)) {
6247
          /** @noinspection PhpIncludeInspection */
6248
          require $bankfile;
6249
        } else {
6250
          $UTF8_TO_ASCII[$bank] = array();
6251
        }
6252
      }
6253
6254
      $newchar = $ord & 255;
6255
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6256
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6257
      } else {
6258
        $c = $unknown;
6259
      }
6260
    }
6261
6262
    return implode('', $chars);
6263
  }
6264
6265
  /**
6266
   * alias for "UTF8::to_win1252()"
6267
   *
6268
   * @see UTF8::to_win1252()
6269
   *
6270
   * @param   string $str
6271
   *
6272
   * @return  array|string
6273
   */
6274
  public static function to_iso8859($str)
6275
  {
6276
    return self::to_win1252($str);
6277
  }
6278
6279
  /**
6280
   * alias for "UTF8::to_win1252()"
6281
   *
6282
   * @see UTF8::to_win1252()
6283
   *
6284
   * @param string|array $str
6285
   *
6286
   * @return string|array
6287
   */
6288
  public static function to_latin1($str)
6289
  {
6290
    return self::to_win1252($str);
6291
  }
6292
6293
  /**
6294
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6295
   *
6296
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6297
   *
6298
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6299
   *
6300
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6301
   *    are followed by any of these:  ("group B")
6302
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6303
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6304
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6305
   * is also a valid unicode character, and will be left unchanged.
6306
   *
6307
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6308
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6309
   *
6310
   * @param string|array $str Any string or array.
6311
   *
6312
   * @return string The same string, but UTF8 encoded.
6313
   */
6314
  public static function to_utf8($str)
6315
  {
6316
    if (is_array($str)) {
6317
      foreach ($str as $k => $v) {
6318
        /** @noinspection AlterInForeachInspection */
6319
        $str[$k] = self::to_utf8($v);
6320
      }
6321
6322
      return $str;
6323
    }
6324
6325
    $str = (string)$str;
6326
6327
    if (!isset($str[0])) {
6328
      return $str;
6329
    }
6330
6331
    $max = strlen($str);
6332
    $buf = '';
6333
6334
    /** @noinspection ForeachInvariantsInspection */
6335
    for ($i = 0; $i < $max; $i++) {
6336
      $c1 = $str[$i];
6337
6338
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6339
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6340
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6341
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6342
6343
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6344
6345
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6346
            $buf .= $c1 . $c2;
6347
            $i++;
6348
          } else { // not valid UTF8 - convert it
6349
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6350
            $cc2 = ($c1 & "\x3f") | "\x80";
6351
            $buf .= $cc1 . $cc2;
6352
          }
6353
6354 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6355
6356
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6357
            $buf .= $c1 . $c2 . $c3;
6358
            $i += 2;
6359
          } else { // not valid UTF8 - convert it
6360
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6361
            $cc2 = ($c1 & "\x3f") | "\x80";
6362
            $buf .= $cc1 . $cc2;
6363
          }
6364
6365
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6366
6367 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6368
            $buf .= $c1 . $c2 . $c3 . $c4;
6369
            $i += 3;
6370
          } else { // not valid UTF8 - convert it
6371
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6372
            $cc2 = ($c1 & "\x3f") | "\x80";
6373
            $buf .= $cc1 . $cc2;
6374
          }
6375
6376
        } else { // doesn't look like UTF8, but should be converted
6377
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6378
          $cc2 = (($c1 & "\x3f") | "\x80");
6379
          $buf .= $cc1 . $cc2;
6380
        }
6381
6382
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6383
6384
        $ordC1 = ord($c1);
6385
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6386
          $buf .= self::$win1252ToUtf8[$ordC1];
6387
        } else {
6388
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6389
          $cc2 = (($c1 & "\x3f") | "\x80");
6390
          $buf .= $cc1 . $cc2;
6391
        }
6392
6393
      } else { // it doesn't need conversion
6394
        $buf .= $c1;
6395
      }
6396
    }
6397
6398
    self::checkForSupport();
6399
6400
    // decode unicode escape sequences
6401
    $buf = preg_replace_callback(
6402
        '/\\\\u([0-9a-f]{4})/i',
6403
        function ($match) {
6404
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6405
        },
6406
        $buf
6407
    );
6408
6409
    // decode UTF-8 codepoints
6410
    $buf = preg_replace_callback(
6411
        '/&#\d{2,4};/',
6412
        function ($match) {
6413
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6414
        },
6415
        $buf
6416
    );
6417
6418
    return $buf;
6419
  }
6420
6421
  /**
6422
   * Convert a string into "win1252"-encoding.
6423
   *
6424
   * @param  string|array $str
6425
   *
6426
   * @return string|array
6427
   */
6428
  protected static function to_win1252($str)
6429
  {
6430
    if (is_array($str)) {
6431
6432
      foreach ($str as $k => $v) {
6433
        /** @noinspection AlterInForeachInspection */
6434
        $str[$k] = self::to_win1252($v);
6435
      }
6436
6437
      return $str;
6438
    }
6439
6440 6
    $str = (string)$str;
6441
6442 6
    if (!isset($str[0])) {
6443 6
      return '';
6444
    }
6445 6
6446
    return self::utf8_decode($str);
6447 6
  }
6448 5
6449
  /**
6450
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6451
   *
6452 6
   * INFO: This is slower then "trim()"
6453
   *
6454 6
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6455
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6456 6
   *
6457 1
   * @param    string $str   The string to be trimmed
6458 1
   * @param    string $chars Optional characters to be stripped
6459 1
   *
6460
   * @return   string The trimmed string
6461 6
   */
6462
  public static function trim($str = '', $chars = INF)
6463
  {
6464
    $str = (string)$str;
6465
6466
    if (!isset($str[0])) {
6467
      return '';
6468
    }
6469
6470
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6471 6
    if ($chars === INF || !$chars) {
6472
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6473 6
    }
6474
6475 6
    return self::rtrim(self::ltrim($str, $chars), $chars);
6476 6
  }
6477
6478
  /**
6479 5
   * Makes string's first char uppercase.
6480 5
   *
6481
   * @param    string $str The input string
6482 5
   *
6483 1
   * @return   string The resulting string
6484 1
   */
6485 1
  public static function ucfirst($str)
6486
  {
6487 5
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6488
  }
6489
6490
  /**
6491
   * alias for "UTF8::ucfirst()"
6492
   *
6493
   * @see UTF8::ucfirst()
6494
   *
6495
   * @param string $word
6496
   *
6497
   * @return string
6498
   */
6499
  public static function ucword($word)
6500
  {
6501
    return self::ucfirst($word);
6502
  }
6503
6504
  /**
6505
   * Uppercase for all words in the string.
6506
   *
6507
   * @param  string $str
6508
   * @param array   $exceptions
6509
   *
6510
   * @return string
6511
   */
6512
  public static function ucwords($str, $exceptions = array())
6513
  {
6514
    if (!$str) {
6515
      return '';
6516
    }
6517
6518
    // init
6519 1
    $words = explode(' ', $str);
6520
    $newwords = array();
6521 1
6522
    if (count($exceptions) > 0) {
6523
      $useExceptions = true;
6524
    } else {
6525
      $useExceptions = false;
6526
    }
6527
6528
    foreach ($words as $word) {
6529
      if (
6530
          ($useExceptions === false)
6531
          ||
6532
          (
6533 1
              $useExceptions === true
6534
              &&
6535 1
              !in_array($word, $exceptions, true)
6536
          )
6537
      ) {
6538
        $word = self::ucfirst($word);
6539 1
      }
6540
      $newwords[] = $word;
6541 1
    }
6542
6543
    return self::ucfirst(implode(' ', $newwords));
6544 1
  }
6545 1
6546 1
  /**
6547 1
   * Multi decode html entity & fix urlencoded-win1252-chars.
6548 1
   *
6549
   * e.g:
6550
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6551 1
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6552
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6553
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6554
   * 'Düsseldorf'                   => 'Düsseldorf'
6555
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6556
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6557
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6558
   *
6559
   * @param string $str
6560
   *
6561
   * @return string
6562
   */
6563
  public static function urldecode($str)
6564 4
  {
6565
    $str = (string)$str;
6566 4
6567
    if (!isset($str[0])) {
6568
      return '';
6569
    }
6570 4
6571 4
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6572 4
6573
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6574 4
6575 4
    $str = self::fix_simple_utf8(
6576 4
        rawurldecode(
6577 4
            self::html_entity_decode(
6578
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6579 4
                $flags
6580
            )
6581
        )
6582
    );
6583
6584 4
    return (string)$str;
6585
  }
6586 4
6587
  /**
6588
   * Return a array with "urlencoded"-win1252 -> UTF-8
6589
   *
6590
   * @return mixed
6591 4
   */
6592 4
  public static function urldecode_fix_win1252_chars()
6593
  {
6594 4
    static $array = array(
6595 4
        '%20' => ' ',
6596 4
        '%21' => '!',
6597 4
        '%22' => '"',
6598 4
        '%23' => '#',
6599
        '%24' => '$',
6600 4
        '%25' => '%',
6601 4
        '%26' => '&',
6602 4
        '%27' => "'",
6603 4
        '%28' => '(',
6604
        '%29' => ')',
6605 4
        '%2A' => '*',
6606 3
        '%2B' => '+',
6607 3
        '%2C' => ',',
6608 3
        '%2D' => '-',
6609 3
        '%2E' => '.',
6610
        '%2F' => '/',
6611 3
        '%30' => '0',
6612
        '%31' => '1',
6613
        '%32' => '2',
6614
        '%33' => '3',
6615 3
        '%34' => '4',
6616 3
        '%35' => '5',
6617
        '%36' => '6',
6618 4
        '%37' => '7',
6619
        '%38' => '8',
6620
        '%39' => '9',
6621
        '%3A' => ':',
6622
        '%3B' => ';',
6623
        '%3C' => '<',
6624
        '%3D' => '=',
6625
        '%3E' => '>',
6626
        '%3F' => '?',
6627
        '%40' => '@',
6628
        '%41' => 'A',
6629
        '%42' => 'B',
6630
        '%43' => 'C',
6631
        '%44' => 'D',
6632
        '%45' => 'E',
6633
        '%46' => 'F',
6634
        '%47' => 'G',
6635
        '%48' => 'H',
6636
        '%49' => 'I',
6637
        '%4A' => 'J',
6638
        '%4B' => 'K',
6639
        '%4C' => 'L',
6640
        '%4D' => 'M',
6641
        '%4E' => 'N',
6642
        '%4F' => 'O',
6643
        '%50' => 'P',
6644
        '%51' => 'Q',
6645
        '%52' => 'R',
6646
        '%53' => 'S',
6647
        '%54' => 'T',
6648
        '%55' => 'U',
6649
        '%56' => 'V',
6650
        '%57' => 'W',
6651
        '%58' => 'X',
6652
        '%59' => 'Y',
6653
        '%5A' => 'Z',
6654
        '%5B' => '[',
6655
        '%5C' => '\\',
6656
        '%5D' => ']',
6657
        '%5E' => '^',
6658
        '%5F' => '_',
6659
        '%60' => '`',
6660
        '%61' => 'a',
6661
        '%62' => 'b',
6662
        '%63' => 'c',
6663
        '%64' => 'd',
6664
        '%65' => 'e',
6665
        '%66' => 'f',
6666
        '%67' => 'g',
6667
        '%68' => 'h',
6668
        '%69' => 'i',
6669
        '%6A' => 'j',
6670
        '%6B' => 'k',
6671
        '%6C' => 'l',
6672
        '%6D' => 'm',
6673
        '%6E' => 'n',
6674
        '%6F' => 'o',
6675
        '%70' => 'p',
6676
        '%71' => 'q',
6677
        '%72' => 'r',
6678
        '%73' => 's',
6679
        '%74' => 't',
6680
        '%75' => 'u',
6681
        '%76' => 'v',
6682
        '%77' => 'w',
6683
        '%78' => 'x',
6684
        '%79' => 'y',
6685
        '%7A' => 'z',
6686
        '%7B' => '{',
6687
        '%7C' => '|',
6688
        '%7D' => '}',
6689
        '%7E' => '~',
6690
        '%7F' => '',
6691
        '%80' => '`',
6692
        '%81' => '',
6693
        '%82' => '‚',
6694
        '%83' => 'ƒ',
6695
        '%84' => '„',
6696
        '%85' => '…',
6697
        '%86' => '†',
6698
        '%87' => '‡',
6699
        '%88' => 'ˆ',
6700
        '%89' => '‰',
6701
        '%8A' => 'Š',
6702
        '%8B' => '‹',
6703
        '%8C' => 'Œ',
6704
        '%8D' => '',
6705
        '%8E' => 'Ž',
6706
        '%8F' => '',
6707
        '%90' => '',
6708
        '%91' => '‘',
6709
        '%92' => '’',
6710
        '%93' => '“',
6711
        '%94' => '”',
6712
        '%95' => '•',
6713
        '%96' => '–',
6714
        '%97' => '—',
6715
        '%98' => '˜',
6716
        '%99' => '™',
6717
        '%9A' => 'š',
6718
        '%9B' => '›',
6719
        '%9C' => 'œ',
6720
        '%9D' => '',
6721
        '%9E' => 'ž',
6722
        '%9F' => 'Ÿ',
6723
        '%A0' => '',
6724
        '%A1' => '¡',
6725
        '%A2' => '¢',
6726
        '%A3' => '£',
6727
        '%A4' => '¤',
6728
        '%A5' => '¥',
6729
        '%A6' => '¦',
6730
        '%A7' => '§',
6731
        '%A8' => '¨',
6732
        '%A9' => '©',
6733
        '%AA' => 'ª',
6734
        '%AB' => '«',
6735
        '%AC' => '¬',
6736
        '%AD' => '',
6737
        '%AE' => '®',
6738
        '%AF' => '¯',
6739
        '%B0' => '°',
6740
        '%B1' => '±',
6741
        '%B2' => '²',
6742
        '%B3' => '³',
6743
        '%B4' => '´',
6744
        '%B5' => 'µ',
6745
        '%B6' => '¶',
6746
        '%B7' => '·',
6747
        '%B8' => '¸',
6748
        '%B9' => '¹',
6749
        '%BA' => 'º',
6750
        '%BB' => '»',
6751
        '%BC' => '¼',
6752
        '%BD' => '½',
6753
        '%BE' => '¾',
6754
        '%BF' => '¿',
6755
        '%C0' => 'À',
6756
        '%C1' => 'Á',
6757
        '%C2' => 'Â',
6758
        '%C3' => 'Ã',
6759
        '%C4' => 'Ä',
6760
        '%C5' => 'Å',
6761
        '%C6' => 'Æ',
6762
        '%C7' => 'Ç',
6763
        '%C8' => 'È',
6764
        '%C9' => 'É',
6765
        '%CA' => 'Ê',
6766
        '%CB' => 'Ë',
6767
        '%CC' => 'Ì',
6768
        '%CD' => 'Í',
6769
        '%CE' => 'Î',
6770
        '%CF' => 'Ï',
6771
        '%D0' => 'Ð',
6772
        '%D1' => 'Ñ',
6773
        '%D2' => 'Ò',
6774
        '%D3' => 'Ó',
6775
        '%D4' => 'Ô',
6776
        '%D5' => 'Õ',
6777
        '%D6' => 'Ö',
6778
        '%D7' => '×',
6779
        '%D8' => 'Ø',
6780
        '%D9' => 'Ù',
6781
        '%DA' => 'Ú',
6782
        '%DB' => 'Û',
6783
        '%DC' => 'Ü',
6784
        '%DD' => 'Ý',
6785
        '%DE' => 'Þ',
6786
        '%DF' => 'ß',
6787
        '%E0' => 'à',
6788
        '%E1' => 'á',
6789
        '%E2' => 'â',
6790
        '%E3' => 'ã',
6791
        '%E4' => 'ä',
6792
        '%E5' => 'å',
6793
        '%E6' => 'æ',
6794
        '%E7' => 'ç',
6795
        '%E8' => 'è',
6796
        '%E9' => 'é',
6797
        '%EA' => 'ê',
6798
        '%EB' => 'ë',
6799
        '%EC' => 'ì',
6800
        '%ED' => 'í',
6801
        '%EE' => 'î',
6802
        '%EF' => 'ï',
6803
        '%F0' => 'ð',
6804
        '%F1' => 'ñ',
6805
        '%F2' => 'ò',
6806
        '%F3' => 'ó',
6807
        '%F4' => 'ô',
6808
        '%F5' => 'õ',
6809
        '%F6' => 'ö',
6810
        '%F7' => '÷',
6811
        '%F8' => 'ø',
6812
        '%F9' => 'ù',
6813
        '%FA' => 'ú',
6814
        '%FB' => 'û',
6815
        '%FC' => 'ü',
6816
        '%FD' => 'ý',
6817
        '%FE' => 'þ',
6818
        '%FF' => 'ÿ',
6819
    );
6820
6821
    return $array;
6822
  }
6823
6824
  /**
6825
   * Decodes an UTF-8 string to ISO-8859-1.
6826
   *
6827
   * @param string $str
6828
   *
6829
   * @return string
6830
   */
6831
  public static function utf8_decode($str)
6832
  {
6833
    static $utf8ToWin1252Keys = null;
6834
    static $utf8ToWin1252Values = null;
6835
6836
    $str = (string)$str;
6837
6838
    if (!isset($str[0])) {
6839
      return '';
6840
    }
6841
6842
    // init
6843
    self::checkForSupport();
6844
6845
    $str = self::to_utf8($str);
6846
6847
    if ($utf8ToWin1252Keys === null) {
6848
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6849
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6850
    }
6851
6852
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6853
  }
6854
6855
  /**
6856
   * Encodes an ISO-8859-1 string to UTF-8.
6857
   *
6858
   * @param string $str
6859
   *
6860
   * @return string
6861
   */
6862
  public static function utf8_encode($str)
6863
  {
6864
    $str = \utf8_encode($str);
6865
6866
    if (false === strpos($str, "\xC2")) {
6867
      return $str;
6868
    } else {
6869
6870
      static $cp1252ToUtf8Keys = null;
6871
      static $cp1252ToUtf8Values = null;
6872
6873
      if ($cp1252ToUtf8Keys === null) {
6874
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6875
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6876
      }
6877
6878
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6879
    }
6880
  }
6881
6882
  /**
6883
   * fix -> utf8-win1252 chars
6884
   *
6885
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6886
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6887
   * See: http://en.wikipedia.org/wiki/Windows-1252
6888
   *
6889
   * @deprecated use "UTF8::fix_simple_utf8()"
6890
   *
6891
   * @param   string $str
6892
   *
6893
   * @return  string
6894
   */
6895
  public static function utf8_fix_win1252_chars($str)
6896
  {
6897
    return self::fix_simple_utf8($str);
6898
  }
6899
6900
  /**
6901
   * Returns an array with all utf8 whitespace characters.
6902
   *
6903
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6904
   *
6905
   * @author: Derek E. [email protected]
6906
   *
6907
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6908
   *         as defined in above URL
6909
   */
6910
  public static function whitespace_table()
6911
  {
6912
    return self::$whitespaceTable;
6913
  }
6914
6915
  /**
6916
   * Limit the number of words in a string.
6917
   *
6918
   * @param  string $str
6919
   * @param  int    $words
6920
   * @param  string $strAddOn
6921
   *
6922
   * @return string
6923
   */
6924
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6925
  {
6926
    $str = (string)$str;
6927
6928
    if (!isset($str[0])) {
6929
      return '';
6930
    }
6931
6932
    $words = (int)$words;
6933
6934
    if ($words < 1) {
6935
      return '';
6936
    }
6937
6938
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6939
6940
    if (
6941
        !isset($matches[0])
6942
        ||
6943
        self::strlen($str) === self::strlen($matches[0])
6944
    ) {
6945
      return $str;
6946
    }
6947
6948
    return self::rtrim($matches[0]) . $strAddOn;
6949
  }
6950
6951
  /**
6952
   * Wraps a string to a given number of characters
6953
   *
6954
   * @link  http://php.net/manual/en/function.wordwrap.php
6955
   *
6956
   * @param string $str   <p>
6957
   *                      The input string.
6958
   *                      </p>
6959
   * @param int    $width [optional] <p>
6960
   *                      The column width.
6961
   *                      </p>
6962
   * @param string $break [optional] <p>
6963
   *                      The line is broken using the optional
6964
   *                      break parameter.
6965
   *                      </p>
6966
   * @param bool   $cut   [optional] <p>
6967
   *                      If the cut is set to true, the string is
6968
   *                      always wrapped at or before the specified width. So if you have
6969
   *                      a word that is larger than the given width, it is broken apart.
6970
   *                      (See second example).
6971
   *                      </p>
6972
   *
6973
   * @return string the given string wrapped at the specified column.
6974
   */
6975
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6976
  {
6977
    $str = (string)$str;
6978
    $break = (string)$break;
6979
6980
    if (!isset($str[0], $break[0])) {
6981
      return '';
6982
    }
6983
6984
    $w = '';
6985
    $strSplit = explode($break, $str);
6986
    $count = count($strSplit);
6987
6988
    if (1 === $count && '' === $strSplit[0]) {
6989
      return '';
6990
    }
6991
6992
    $chars = array();
6993
    /** @noinspection ForeachInvariantsInspection */
6994
    for ($i = 0; $i < $count; ++$i) {
6995
6996
      if ($i) {
6997
        $chars[] = $break;
6998
        $w .= '#';
6999
      }
7000
7001
      $c = $strSplit[$i];
7002
      unset($strSplit[$i]);
7003
7004
      foreach (self::split($c) as $c) {
7005
        $chars[] = $c;
7006
        $w .= ' ' === $c ? ' ' : '?';
7007
      }
7008
    }
7009
7010
    $strReturn = '';
7011
    $j = 0;
7012
    $b = $i = -1;
7013
    $w = wordwrap($w, $width, '#', $cut);
7014
7015
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7016
      for (++$i; $i < $b; ++$i) {
7017
        $strReturn .= $chars[$j];
7018
        unset($chars[$j++]);
7019
      }
7020
7021
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7022
        unset($chars[$j++]);
7023
      }
7024
7025
      $strReturn .= $break;
7026
    }
7027
7028
    return $strReturn . implode('', $chars);
7029
  }
7030
7031
  /**
7032
   * Returns an array of Unicode White Space characters.
7033
   *
7034
   * @return   array An array with numeric code point as key and White Space Character as value.
7035
   */
7036
  public static function ws()
7037
  {
7038
    return self::$whitespace;
7039
  }
7040
7041
}
7042