Completed
Push — master ( c052f8...7a68f8 )
by Lars
22:11 queued 05:25
created

UTF8::is_html()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 19
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3.0123

Importance

Changes 3
Bugs 2 Features 2
Metric Value
c 3
b 2
f 2
dl 0
loc 19
ccs 8
cts 9
cp 0.8889
rs 9.4285
cc 3
eloc 10
nc 3
nop 1
crap 3.0123
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  protected static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  protected static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  protected static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  protected static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  protected static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  protected static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  protected static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  protected static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792
      'ISO-IR-230',
793
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809 1
    self::checkForSupport();
810 1
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820 2
  public static function access($str, $pos)
821
  {
822 2
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833
   */
834
  public static function add_bom_to_string($str)
835
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Convert binary into an string.
845
   *
846
   * @param mixed $bin 1|0
847
   *
848
   * @return string
849
   */
850 1
  public static function binary_to_str($bin)
851
  {
852 1
    return pack('H*', base_convert($bin, 2, 16));
853
  }
854
855
  /**
856
   * Returns the UTF-8 Byte Order Mark Character.
857
   *
858
   * @return string UTF-8 Byte Order Mark
859
   */
860 1
  public static function bom()
861
  {
862 1
    return "\xEF\xBB\xBF";
863
  }
864
865
  /**
866
   * @alias of UTF8::chr_map()
867
   * @see   UTF8::chr_map()
868
   *
869
   * @param string|array $callback
870
   * @param string       $str
871
   *
872
   * @return array
873
   */
874 1
  public static function callback($callback, $str)
875
  {
876 1
    return self::chr_map($callback, $str);
877
  }
878
879
  /**
880
   * Returns an array of all lower and upper case UTF-8 encoded characters.
881
   *
882
   * @return   string An array with lower case chars as keys and upper chars as values.
883
   */
884
  protected static function case_table()
885
  {
886
    static $case = array(
887
888
      // lower => upper
889
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
890
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
891
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
892
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
893
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
894
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
895
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
896
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
897
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
898
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
899
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
900
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
901
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
902
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
903
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
904
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
905
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
906
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
907
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
908
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
909
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
910
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
911
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
912
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
913
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
914
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
915
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
916
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
917
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
918
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
919
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
920
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
921
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
922
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
923
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
924
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
925
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
926
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
927
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
928
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
929
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
930
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
931
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
932
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
933
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
934
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
935
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
936
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
937
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
938
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
939
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
940
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
941
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
942
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
943
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
944
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
945
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
946
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
947
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
948
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
949
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
950
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
951
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
952
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
953
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
954
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
955
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
956
      "\xea\x9e\x87"     => "\xea\x9e\x86",
957
      "\xea\x9e\x85"     => "\xea\x9e\x84",
958
      "\xea\x9e\x83"     => "\xea\x9e\x82",
959
      "\xea\x9e\x81"     => "\xea\x9e\x80",
960
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
961
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
962
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
963
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
964
      "\xea\x9d\xad"     => "\xea\x9d\xac",
965
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
966
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
967
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
968
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
969
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
970
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
971
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
972
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
973
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
974
      "\xea\x9d\x99"     => "\xea\x9d\x98",
975
      "\xea\x9d\x97"     => "\xea\x9d\x96",
976
      "\xea\x9d\x95"     => "\xea\x9d\x94",
977
      "\xea\x9d\x93"     => "\xea\x9d\x92",
978
      "\xea\x9d\x91"     => "\xea\x9d\x90",
979
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
980
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
981
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
982
      "\xea\x9d\x89"     => "\xea\x9d\x88",
983
      "\xea\x9d\x87"     => "\xea\x9d\x86",
984
      "\xea\x9d\x85"     => "\xea\x9d\x84",
985
      "\xea\x9d\x83"     => "\xea\x9d\x82",
986
      "\xea\x9d\x81"     => "\xea\x9d\x80",
987
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
988
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
989
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
990
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
991
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
992
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
993
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
994
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
995
      "\xea\x9c\xad"     => "\xea\x9c\xac",
996
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
997
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
998
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
999
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
1000
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
1001
      "\xea\x9a\x97"     => "\xea\x9a\x96",
1002
      "\xea\x9a\x95"     => "\xea\x9a\x94",
1003
      "\xea\x9a\x93"     => "\xea\x9a\x92",
1004
      "\xea\x9a\x91"     => "\xea\x9a\x90",
1005
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
1006
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
1007
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
1008
      "\xea\x9a\x89"     => "\xea\x9a\x88",
1009
      "\xea\x9a\x87"     => "\xea\x9a\x86",
1010
      "\xea\x9a\x85"     => "\xea\x9a\x84",
1011
      "\xea\x9a\x83"     => "\xea\x9a\x82",
1012
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1013
      "\xea\x99\xad"     => "\xea\x99\xac",
1014
      "\xea\x99\xab"     => "\xea\x99\xaa",
1015
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1016
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1017
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1018
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1019
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1020
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1021
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1022
      "\xea\x99\x99"     => "\xea\x99\x98",
1023
      "\xea\x99\x97"     => "\xea\x99\x96",
1024
      "\xea\x99\x95"     => "\xea\x99\x94",
1025
      "\xea\x99\x93"     => "\xea\x99\x92",
1026
      "\xea\x99\x91"     => "\xea\x99\x90",
1027
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1028
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1029
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1030
      "\xea\x99\x89"     => "\xea\x99\x88",
1031
      "\xea\x99\x87"     => "\xea\x99\x86",
1032
      "\xea\x99\x85"     => "\xea\x99\x84",
1033
      "\xea\x99\x83"     => "\xea\x99\x82",
1034
      "\xea\x99\x81"     => "\xea\x99\x80",
1035
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1036
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1037
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1038
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1039
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1040
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1041
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1042
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1043
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1044
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1045
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1046
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1047
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1048
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1049
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1050
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1051
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1052
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1053
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1054
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1055
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1056
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1057
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1058
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1059
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1060
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1061
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1062
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1063
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1064
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1065
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1066
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1067
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1068
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1069
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1070
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1071
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1072
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1073
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1074
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1075
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1076
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1077
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1078
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1079
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1080
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1081
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1082
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1083
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1084
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1085
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1086
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1087
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1088
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1089
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1090
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1091
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1092
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1093
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1094
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1095
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1096
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1097
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1098
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1099
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1100
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1101
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1102
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1103
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1104
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1105
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1106
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1107
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1108
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1109
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1110
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1111
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1112
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1113
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1114
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1115
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1116
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1117
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1118
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1119
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1120
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1121
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1122
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1123
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1124
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1125
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1126
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1127
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1128
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1129
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1130
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1131
      "\xe2\xb1\xa5"     => "\xc8\xba",
1132
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1133
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1134
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1135
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1136
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1137
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1138
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1139
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1140
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1141
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1142
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1143
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1144
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1145
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1146
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1147
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1148
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1149
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1150
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1151
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1152
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1153
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1154
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1155
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1156
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1157
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1158
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1159
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1160
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1161
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1162
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1163
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1164
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1165
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1166
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1167
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1168
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1169
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1170
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1171
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1172
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1173
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1174
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1175
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1176
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1177
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1178
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1179
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1180
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1181
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1182
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1183
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1184
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1185
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1186
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1187
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1188
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1189
      "\xe1\xbe\xbe"     => "\xce\x99",
1190
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1191
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1192
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1193
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1194
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1195
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1196
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1197
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1198
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1199
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1200
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1201
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1202
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1203
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1204
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1205
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1206
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1207
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1208
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1209
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1210
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1211
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1212
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1213
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1214
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1215
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1216
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1217
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1218
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1219
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1220
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1221
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1222
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1223
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1224
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1225
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1226
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1227
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1228
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1229
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1230
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1231
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1232
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1233
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1234
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1235
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1236
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1237
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1238
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1239
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1240
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1241
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1242
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1243
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1244
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1245
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1246
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1247
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1248
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1249
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1250
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1251
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1252
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1253
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1254
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1255
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1256
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1257
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1258
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1259
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1260
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1261
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1262
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1263
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1264
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1265
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1266
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1267
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1268
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1269
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1270
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1271
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1272
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1273
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1274
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1275
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1276
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1277
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1278
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1279
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1280
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1281
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1282
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1283
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1284
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1285
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1286
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1287
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1288
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1289
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1290
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1291
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1292
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1293
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1294
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1295
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1296
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1297
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1298
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1299
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1300
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1301
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1302
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1303
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1304
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1305
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1306
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1307
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1308
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1309
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1310
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1311
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1312
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1313
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1314
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1315
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1316
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1317
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1318
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1319
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1320
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1321
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1322
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1323
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1324
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1325
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1326
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1327
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1328
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1329
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1330
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1331
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1332
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1333
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1334
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1335
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1336
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1337
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1338
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1339
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1340
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1341
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1342
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1343
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1344
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1345
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1346
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1347
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1348
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1349
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1350
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1351
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1352
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1353
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1354
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1355
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1356
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1357
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1358
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1359
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1360
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1361
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1362
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1363
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1364
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1365
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1366
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1367
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1368
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1369
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1370
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1371
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1372
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1373
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1374
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1375
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1376
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1377
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1378
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1379
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1380
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1381
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1382
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1383
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1384
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1385
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1386
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1387
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1388
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1389
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1390
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1391
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1392
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1393
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1394
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1395
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1396
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1397
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1398
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1399
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1400
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1401
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1402
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1403
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1404
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1405
      "\xd6\x86"         => "\xd5\x96",
1406
      "\xd6\x85"         => "\xd5\x95",
1407
      "\xd6\x84"         => "\xd5\x94",
1408
      "\xd6\x83"         => "\xd5\x93",
1409
      "\xd6\x82"         => "\xd5\x92",
1410
      "\xd6\x81"         => "\xd5\x91",
1411
      "\xd6\x80"         => "\xd5\x90",
1412
      "\xd5\xbf"         => "\xd5\x8f",
1413
      "\xd5\xbe"         => "\xd5\x8e",
1414
      "\xd5\xbd"         => "\xd5\x8d",
1415
      "\xd5\xbc"         => "\xd5\x8c",
1416
      "\xd5\xbb"         => "\xd5\x8b",
1417
      "\xd5\xba"         => "\xd5\x8a",
1418
      "\xd5\xb9"         => "\xd5\x89",
1419
      "\xd5\xb8"         => "\xd5\x88",
1420
      "\xd5\xb7"         => "\xd5\x87",
1421
      "\xd5\xb6"         => "\xd5\x86",
1422
      "\xd5\xb5"         => "\xd5\x85",
1423
      "\xd5\xb4"         => "\xd5\x84",
1424
      "\xd5\xb3"         => "\xd5\x83",
1425
      "\xd5\xb2"         => "\xd5\x82",
1426
      "\xd5\xb1"         => "\xd5\x81",
1427
      "\xd5\xb0"         => "\xd5\x80",
1428
      "\xd5\xaf"         => "\xd4\xbf",
1429
      "\xd5\xae"         => "\xd4\xbe",
1430
      "\xd5\xad"         => "\xd4\xbd",
1431
      "\xd5\xac"         => "\xd4\xbc",
1432
      "\xd5\xab"         => "\xd4\xbb",
1433
      "\xd5\xaa"         => "\xd4\xba",
1434
      "\xd5\xa9"         => "\xd4\xb9",
1435
      "\xd5\xa8"         => "\xd4\xb8",
1436
      "\xd5\xa7"         => "\xd4\xb7",
1437
      "\xd5\xa6"         => "\xd4\xb6",
1438
      "\xd5\xa5"         => "\xd4\xb5",
1439
      "\xd5\xa4"         => "\xd4\xb4",
1440
      "\xd5\xa3"         => "\xd4\xb3",
1441
      "\xd5\xa2"         => "\xd4\xb2",
1442
      "\xd5\xa1"         => "\xd4\xb1",
1443
      "\xd4\xa5"         => "\xd4\xa4",
1444
      "\xd4\xa3"         => "\xd4\xa2",
1445
      "\xd4\xa1"         => "\xd4\xa0",
1446
      "\xd4\x9f"         => "\xd4\x9e",
1447
      "\xd4\x9d"         => "\xd4\x9c",
1448
      "\xd4\x9b"         => "\xd4\x9a",
1449
      "\xd4\x99"         => "\xd4\x98",
1450
      "\xd4\x97"         => "\xd4\x96",
1451
      "\xd4\x95"         => "\xd4\x94",
1452
      "\xd4\x93"         => "\xd4\x92",
1453
      "\xd4\x91"         => "\xd4\x90",
1454
      "\xd4\x8f"         => "\xd4\x8e",
1455
      "\xd4\x8d"         => "\xd4\x8c",
1456
      "\xd4\x8b"         => "\xd4\x8a",
1457
      "\xd4\x89"         => "\xd4\x88",
1458
      "\xd4\x87"         => "\xd4\x86",
1459
      "\xd4\x85"         => "\xd4\x84",
1460
      "\xd4\x83"         => "\xd4\x82",
1461
      "\xd4\x81"         => "\xd4\x80",
1462
      "\xd3\xbf"         => "\xd3\xbe",
1463
      "\xd3\xbd"         => "\xd3\xbc",
1464
      "\xd3\xbb"         => "\xd3\xba",
1465
      "\xd3\xb9"         => "\xd3\xb8",
1466
      "\xd3\xb7"         => "\xd3\xb6",
1467
      "\xd3\xb5"         => "\xd3\xb4",
1468
      "\xd3\xb3"         => "\xd3\xb2",
1469
      "\xd3\xb1"         => "\xd3\xb0",
1470
      "\xd3\xaf"         => "\xd3\xae",
1471
      "\xd3\xad"         => "\xd3\xac",
1472
      "\xd3\xab"         => "\xd3\xaa",
1473
      "\xd3\xa9"         => "\xd3\xa8",
1474
      "\xd3\xa7"         => "\xd3\xa6",
1475
      "\xd3\xa5"         => "\xd3\xa4",
1476
      "\xd3\xa3"         => "\xd3\xa2",
1477
      "\xd3\xa1"         => "\xd3\xa0",
1478
      "\xd3\x9f"         => "\xd3\x9e",
1479
      "\xd3\x9d"         => "\xd3\x9c",
1480
      "\xd3\x9b"         => "\xd3\x9a",
1481
      "\xd3\x99"         => "\xd3\x98",
1482
      "\xd3\x97"         => "\xd3\x96",
1483
      "\xd3\x95"         => "\xd3\x94",
1484
      "\xd3\x93"         => "\xd3\x92",
1485
      "\xd3\x91"         => "\xd3\x90",
1486
      "\xd3\x8f"         => "\xd3\x80",
1487
      "\xd3\x8e"         => "\xd3\x8d",
1488
      "\xd3\x8c"         => "\xd3\x8b",
1489
      "\xd3\x8a"         => "\xd3\x89",
1490
      "\xd3\x88"         => "\xd3\x87",
1491
      "\xd3\x86"         => "\xd3\x85",
1492
      "\xd3\x84"         => "\xd3\x83",
1493
      "\xd3\x82"         => "\xd3\x81",
1494
      "\xd2\xbf"         => "\xd2\xbe",
1495
      "\xd2\xbd"         => "\xd2\xbc",
1496
      "\xd2\xbb"         => "\xd2\xba",
1497
      "\xd2\xb9"         => "\xd2\xb8",
1498
      "\xd2\xb7"         => "\xd2\xb6",
1499
      "\xd2\xb5"         => "\xd2\xb4",
1500
      "\xd2\xb3"         => "\xd2\xb2",
1501
      "\xd2\xb1"         => "\xd2\xb0",
1502
      "\xd2\xaf"         => "\xd2\xae",
1503
      "\xd2\xad"         => "\xd2\xac",
1504
      "\xd2\xab"         => "\xd2\xaa",
1505
      "\xd2\xa9"         => "\xd2\xa8",
1506
      "\xd2\xa7"         => "\xd2\xa6",
1507
      "\xd2\xa5"         => "\xd2\xa4",
1508
      "\xd2\xa3"         => "\xd2\xa2",
1509
      "\xd2\xa1"         => "\xd2\xa0",
1510
      "\xd2\x9f"         => "\xd2\x9e",
1511
      "\xd2\x9d"         => "\xd2\x9c",
1512
      "\xd2\x9b"         => "\xd2\x9a",
1513
      "\xd2\x99"         => "\xd2\x98",
1514
      "\xd2\x97"         => "\xd2\x96",
1515
      "\xd2\x95"         => "\xd2\x94",
1516
      "\xd2\x93"         => "\xd2\x92",
1517
      "\xd2\x91"         => "\xd2\x90",
1518
      "\xd2\x8f"         => "\xd2\x8e",
1519
      "\xd2\x8d"         => "\xd2\x8c",
1520
      "\xd2\x8b"         => "\xd2\x8a",
1521
      "\xd2\x81"         => "\xd2\x80",
1522
      "\xd1\xbf"         => "\xd1\xbe",
1523
      "\xd1\xbd"         => "\xd1\xbc",
1524
      "\xd1\xbb"         => "\xd1\xba",
1525
      "\xd1\xb9"         => "\xd1\xb8",
1526
      "\xd1\xb7"         => "\xd1\xb6",
1527
      "\xd1\xb5"         => "\xd1\xb4",
1528
      "\xd1\xb3"         => "\xd1\xb2",
1529
      "\xd1\xb1"         => "\xd1\xb0",
1530
      "\xd1\xaf"         => "\xd1\xae",
1531
      "\xd1\xad"         => "\xd1\xac",
1532
      "\xd1\xab"         => "\xd1\xaa",
1533
      "\xd1\xa9"         => "\xd1\xa8",
1534
      "\xd1\xa7"         => "\xd1\xa6",
1535
      "\xd1\xa5"         => "\xd1\xa4",
1536
      "\xd1\xa3"         => "\xd1\xa2",
1537
      "\xd1\xa1"         => "\xd1\xa0",
1538
      "\xd1\x9f"         => "\xd0\x8f",
1539
      "\xd1\x9e"         => "\xd0\x8e",
1540
      "\xd1\x9d"         => "\xd0\x8d",
1541
      "\xd1\x9c"         => "\xd0\x8c",
1542
      "\xd1\x9b"         => "\xd0\x8b",
1543
      "\xd1\x9a"         => "\xd0\x8a",
1544
      "\xd1\x99"         => "\xd0\x89",
1545
      "\xd1\x98"         => "\xd0\x88",
1546
      "\xd1\x97"         => "\xd0\x87",
1547
      "\xd1\x96"         => "\xd0\x86",
1548
      "\xd1\x95"         => "\xd0\x85",
1549
      "\xd1\x94"         => "\xd0\x84",
1550
      "\xd1\x93"         => "\xd0\x83",
1551
      "\xd1\x92"         => "\xd0\x82",
1552
      "\xd1\x91"         => "\xd0\x81",
1553
      "\xd1\x90"         => "\xd0\x80",
1554
      "\xd1\x8f"         => "\xd0\xaf",
1555
      "\xd1\x8e"         => "\xd0\xae",
1556
      "\xd1\x8d"         => "\xd0\xad",
1557
      "\xd1\x8c"         => "\xd0\xac",
1558
      "\xd1\x8b"         => "\xd0\xab",
1559
      "\xd1\x8a"         => "\xd0\xaa",
1560
      "\xd1\x89"         => "\xd0\xa9",
1561
      "\xd1\x88"         => "\xd0\xa8",
1562
      "\xd1\x87"         => "\xd0\xa7",
1563
      "\xd1\x86"         => "\xd0\xa6",
1564
      "\xd1\x85"         => "\xd0\xa5",
1565
      "\xd1\x84"         => "\xd0\xa4",
1566
      "\xd1\x83"         => "\xd0\xa3",
1567
      "\xd1\x82"         => "\xd0\xa2",
1568
      "\xd1\x81"         => "\xd0\xa1",
1569
      "\xd1\x80"         => "\xd0\xa0",
1570
      "\xd0\xbf"         => "\xd0\x9f",
1571
      "\xd0\xbe"         => "\xd0\x9e",
1572
      "\xd0\xbd"         => "\xd0\x9d",
1573
      "\xd0\xbc"         => "\xd0\x9c",
1574
      "\xd0\xbb"         => "\xd0\x9b",
1575
      "\xd0\xba"         => "\xd0\x9a",
1576
      "\xd0\xb9"         => "\xd0\x99",
1577
      "\xd0\xb8"         => "\xd0\x98",
1578
      "\xd0\xb7"         => "\xd0\x97",
1579
      "\xd0\xb6"         => "\xd0\x96",
1580
      "\xd0\xb5"         => "\xd0\x95",
1581
      "\xd0\xb4"         => "\xd0\x94",
1582
      "\xd0\xb3"         => "\xd0\x93",
1583
      "\xd0\xb2"         => "\xd0\x92",
1584
      "\xd0\xb1"         => "\xd0\x91",
1585
      "\xd0\xb0"         => "\xd0\x90",
1586
      "\xcf\xbb"         => "\xcf\xba",
1587
      "\xcf\xb8"         => "\xcf\xb7",
1588
      "\xcf\xb5"         => "\xce\x95",
1589
      "\xcf\xb2"         => "\xcf\xb9",
1590
      "\xcf\xb1"         => "\xce\xa1",
1591
      "\xcf\xb0"         => "\xce\x9a",
1592
      "\xcf\xaf"         => "\xcf\xae",
1593
      "\xcf\xad"         => "\xcf\xac",
1594
      "\xcf\xab"         => "\xcf\xaa",
1595
      "\xcf\xa9"         => "\xcf\xa8",
1596
      "\xcf\xa7"         => "\xcf\xa6",
1597
      "\xcf\xa5"         => "\xcf\xa4",
1598
      "\xcf\xa3"         => "\xcf\xa2",
1599
      "\xcf\xa1"         => "\xcf\xa0",
1600
      "\xcf\x9f"         => "\xcf\x9e",
1601
      "\xcf\x9d"         => "\xcf\x9c",
1602
      "\xcf\x9b"         => "\xcf\x9a",
1603
      "\xcf\x99"         => "\xcf\x98",
1604
      "\xcf\x97"         => "\xcf\x8f",
1605
      "\xcf\x96"         => "\xce\xa0",
1606
      "\xcf\x95"         => "\xce\xa6",
1607
      "\xcf\x91"         => "\xce\x98",
1608
      "\xcf\x90"         => "\xce\x92",
1609
      "\xcf\x8e"         => "\xce\x8f",
1610
      "\xcf\x8d"         => "\xce\x8e",
1611
      "\xcf\x8c"         => "\xce\x8c",
1612
      "\xcf\x8b"         => "\xce\xab",
1613
      "\xcf\x8a"         => "\xce\xaa",
1614
      "\xcf\x89"         => "\xce\xa9",
1615
      "\xcf\x88"         => "\xce\xa8",
1616
      "\xcf\x87"         => "\xce\xa7",
1617
      "\xcf\x86"         => "\xce\xa6",
1618
      "\xcf\x85"         => "\xce\xa5",
1619
      "\xcf\x84"         => "\xce\xa4",
1620
      "\xcf\x83"         => "\xce\xa3",
1621
      "\xcf\x82"         => "\xce\xa3",
1622
      "\xcf\x81"         => "\xce\xa1",
1623
      "\xcf\x80"         => "\xce\xa0",
1624
      "\xce\xbf"         => "\xce\x9f",
1625
      "\xce\xbe"         => "\xce\x9e",
1626
      "\xce\xbd"         => "\xce\x9d",
1627
      "\xce\xbc"         => "\xce\x9c",
1628
      "\xce\xbb"         => "\xce\x9b",
1629
      "\xce\xba"         => "\xce\x9a",
1630
      "\xce\xb9"         => "\xce\x99",
1631
      "\xce\xb8"         => "\xce\x98",
1632
      "\xce\xb7"         => "\xce\x97",
1633
      "\xce\xb6"         => "\xce\x96",
1634
      "\xce\xb5"         => "\xce\x95",
1635
      "\xce\xb4"         => "\xce\x94",
1636
      "\xce\xb3"         => "\xce\x93",
1637
      "\xce\xb2"         => "\xce\x92",
1638
      "\xce\xb1"         => "\xce\x91",
1639
      "\xce\xaf"         => "\xce\x8a",
1640
      "\xce\xae"         => "\xce\x89",
1641
      "\xce\xad"         => "\xce\x88",
1642
      "\xce\xac"         => "\xce\x86",
1643
      "\xcd\xbd"         => "\xcf\xbf",
1644
      "\xcd\xbc"         => "\xcf\xbe",
1645
      "\xcd\xbb"         => "\xcf\xbd",
1646
      "\xcd\xb7"         => "\xcd\xb6",
1647
      "\xcd\xb3"         => "\xcd\xb2",
1648
      "\xcd\xb1"         => "\xcd\xb0",
1649
      "\xca\x92"         => "\xc6\xb7",
1650
      "\xca\x8c"         => "\xc9\x85",
1651
      "\xca\x8b"         => "\xc6\xb2",
1652
      "\xca\x8a"         => "\xc6\xb1",
1653
      "\xca\x89"         => "\xc9\x84",
1654
      "\xca\x88"         => "\xc6\xae",
1655
      "\xca\x83"         => "\xc6\xa9",
1656
      "\xca\x80"         => "\xc6\xa6",
1657
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1658
      "\xc9\xb5"         => "\xc6\x9f",
1659
      "\xc9\xb2"         => "\xc6\x9d",
1660
      "\xc9\xb1"         => "\xe2\xb1\xae",
1661
      "\xc9\xaf"         => "\xc6\x9c",
1662
      "\xc9\xab"         => "\xe2\xb1\xa2",
1663
      "\xc9\xa9"         => "\xc6\x96",
1664
      "\xc9\xa8"         => "\xc6\x97",
1665
      "\xc9\xa5"         => "\xea\x9e\x8d",
1666
      "\xc9\xa3"         => "\xc6\x94",
1667
      "\xc9\xa0"         => "\xc6\x93",
1668
      "\xc9\x9b"         => "\xc6\x90",
1669
      "\xc9\x99"         => "\xc6\x8f",
1670
      "\xc9\x97"         => "\xc6\x8a",
1671
      "\xc9\x96"         => "\xc6\x89",
1672
      "\xc9\x94"         => "\xc6\x86",
1673
      "\xc9\x93"         => "\xc6\x81",
1674
      "\xc9\x92"         => "\xe2\xb1\xb0",
1675
      "\xc9\x91"         => "\xe2\xb1\xad",
1676
      "\xc9\x90"         => "\xe2\xb1\xaf",
1677
      "\xc9\x8f"         => "\xc9\x8e",
1678
      "\xc9\x8d"         => "\xc9\x8c",
1679
      "\xc9\x8b"         => "\xc9\x8a",
1680
      "\xc9\x89"         => "\xc9\x88",
1681
      "\xc9\x87"         => "\xc9\x86",
1682
      "\xc9\x82"         => "\xc9\x81",
1683
      "\xc9\x80"         => "\xe2\xb1\xbf",
1684
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1685
      "\xc8\xbc"         => "\xc8\xbb",
1686
      "\xc8\xb3"         => "\xc8\xb2",
1687
      "\xc8\xb1"         => "\xc8\xb0",
1688
      "\xc8\xaf"         => "\xc8\xae",
1689
      "\xc8\xad"         => "\xc8\xac",
1690
      "\xc8\xab"         => "\xc8\xaa",
1691
      "\xc8\xa9"         => "\xc8\xa8",
1692
      "\xc8\xa7"         => "\xc8\xa6",
1693
      "\xc8\xa5"         => "\xc8\xa4",
1694
      "\xc8\xa3"         => "\xc8\xa2",
1695
      "\xc8\x9f"         => "\xc8\x9e",
1696
      "\xc8\x9d"         => "\xc8\x9c",
1697
      "\xc8\x9b"         => "\xc8\x9a",
1698
      "\xc8\x99"         => "\xc8\x98",
1699
      "\xc8\x97"         => "\xc8\x96",
1700
      "\xc8\x95"         => "\xc8\x94",
1701
      "\xc8\x93"         => "\xc8\x92",
1702
      "\xc8\x91"         => "\xc8\x90",
1703
      "\xc8\x8f"         => "\xc8\x8e",
1704
      "\xc8\x8d"         => "\xc8\x8c",
1705
      "\xc8\x8b"         => "\xc8\x8a",
1706
      "\xc8\x89"         => "\xc8\x88",
1707
      "\xc8\x87"         => "\xc8\x86",
1708
      "\xc8\x85"         => "\xc8\x84",
1709
      "\xc8\x83"         => "\xc8\x82",
1710
      "\xc8\x81"         => "\xc8\x80",
1711
      "\xc7\xbf"         => "\xc7\xbe",
1712
      "\xc7\xbd"         => "\xc7\xbc",
1713
      "\xc7\xbb"         => "\xc7\xba",
1714
      "\xc7\xb9"         => "\xc7\xb8",
1715
      "\xc7\xb5"         => "\xc7\xb4",
1716
      "\xc7\xb3"         => "\xc7\xb2",
1717
      "\xc7\xaf"         => "\xc7\xae",
1718
      "\xc7\xad"         => "\xc7\xac",
1719
      "\xc7\xab"         => "\xc7\xaa",
1720
      "\xc7\xa9"         => "\xc7\xa8",
1721
      "\xc7\xa7"         => "\xc7\xa6",
1722
      "\xc7\xa5"         => "\xc7\xa4",
1723
      "\xc7\xa3"         => "\xc7\xa2",
1724
      "\xc7\xa1"         => "\xc7\xa0",
1725
      "\xc7\x9f"         => "\xc7\x9e",
1726
      "\xc7\x9d"         => "\xc6\x8e",
1727
      "\xc7\x9c"         => "\xc7\x9b",
1728
      "\xc7\x9a"         => "\xc7\x99",
1729
      "\xc7\x98"         => "\xc7\x97",
1730
      "\xc7\x96"         => "\xc7\x95",
1731
      "\xc7\x94"         => "\xc7\x93",
1732
      "\xc7\x92"         => "\xc7\x91",
1733
      "\xc7\x90"         => "\xc7\x8f",
1734
      "\xc7\x8e"         => "\xc7\x8d",
1735
      "\xc7\x8c"         => "\xc7\x8b",
1736
      "\xc7\x89"         => "\xc7\x88",
1737
      "\xc7\x86"         => "\xc7\x85",
1738
      "\xc6\xbf"         => "\xc7\xb7",
1739
      "\xc6\xbd"         => "\xc6\xbc",
1740
      "\xc6\xb9"         => "\xc6\xb8",
1741
      "\xc6\xb6"         => "\xc6\xb5",
1742
      "\xc6\xb4"         => "\xc6\xb3",
1743
      "\xc6\xb0"         => "\xc6\xaf",
1744
      "\xc6\xad"         => "\xc6\xac",
1745
      "\xc6\xa8"         => "\xc6\xa7",
1746
      "\xc6\xa5"         => "\xc6\xa4",
1747
      "\xc6\xa3"         => "\xc6\xa2",
1748
      "\xc6\xa1"         => "\xc6\xa0",
1749
      "\xc6\x9e"         => "\xc8\xa0",
1750
      "\xc6\x9a"         => "\xc8\xbd",
1751
      "\xc6\x99"         => "\xc6\x98",
1752
      "\xc6\x95"         => "\xc7\xb6",
1753
      "\xc6\x92"         => "\xc6\x91",
1754
      "\xc6\x8c"         => "\xc6\x8b",
1755
      "\xc6\x88"         => "\xc6\x87",
1756
      "\xc6\x85"         => "\xc6\x84",
1757
      "\xc6\x83"         => "\xc6\x82",
1758
      "\xc6\x80"         => "\xc9\x83",
1759
      "\xc5\xbf"         => "\x53",
1760
      "\xc5\xbe"         => "\xc5\xbd",
1761
      "\xc5\xbc"         => "\xc5\xbb",
1762
      "\xc5\xba"         => "\xc5\xb9",
1763
      "\xc5\xb7"         => "\xc5\xb6",
1764
      "\xc5\xb5"         => "\xc5\xb4",
1765
      "\xc5\xb3"         => "\xc5\xb2",
1766
      "\xc5\xb1"         => "\xc5\xb0",
1767
      "\xc5\xaf"         => "\xc5\xae",
1768
      "\xc5\xad"         => "\xc5\xac",
1769
      "\xc5\xab"         => "\xc5\xaa",
1770
      "\xc5\xa9"         => "\xc5\xa8",
1771
      "\xc5\xa7"         => "\xc5\xa6",
1772
      "\xc5\xa5"         => "\xc5\xa4",
1773
      "\xc5\xa3"         => "\xc5\xa2",
1774
      "\xc5\xa1"         => "\xc5\xa0",
1775
      "\xc5\x9f"         => "\xc5\x9e",
1776
      "\xc5\x9d"         => "\xc5\x9c",
1777
      "\xc5\x9b"         => "\xc5\x9a",
1778
      "\xc5\x99"         => "\xc5\x98",
1779
      "\xc5\x97"         => "\xc5\x96",
1780
      "\xc5\x95"         => "\xc5\x94",
1781
      "\xc5\x93"         => "\xc5\x92",
1782
      "\xc5\x91"         => "\xc5\x90",
1783
      "\xc5\x8f"         => "\xc5\x8e",
1784
      "\xc5\x8d"         => "\xc5\x8c",
1785
      "\xc5\x8b"         => "\xc5\x8a",
1786
      "\xc5\x88"         => "\xc5\x87",
1787
      "\xc5\x86"         => "\xc5\x85",
1788
      "\xc5\x84"         => "\xc5\x83",
1789
      "\xc5\x82"         => "\xc5\x81",
1790
      "\xc5\x80"         => "\xc4\xbf",
1791
      "\xc4\xbe"         => "\xc4\xbd",
1792
      "\xc4\xbc"         => "\xc4\xbb",
1793
      "\xc4\xba"         => "\xc4\xb9",
1794
      "\xc4\xb7"         => "\xc4\xb6",
1795
      "\xc4\xb5"         => "\xc4\xb4",
1796
      "\xc4\xb3"         => "\xc4\xb2",
1797
      "\xc4\xb1"         => "\x49",
1798
      "\xc4\xaf"         => "\xc4\xae",
1799
      "\xc4\xad"         => "\xc4\xac",
1800
      "\xc4\xab"         => "\xc4\xaa",
1801
      "\xc4\xa9"         => "\xc4\xa8",
1802
      "\xc4\xa7"         => "\xc4\xa6",
1803
      "\xc4\xa5"         => "\xc4\xa4",
1804
      "\xc4\xa3"         => "\xc4\xa2",
1805
      "\xc4\xa1"         => "\xc4\xa0",
1806
      "\xc4\x9f"         => "\xc4\x9e",
1807
      "\xc4\x9d"         => "\xc4\x9c",
1808
      "\xc4\x9b"         => "\xc4\x9a",
1809
      "\xc4\x99"         => "\xc4\x98",
1810
      "\xc4\x97"         => "\xc4\x96",
1811
      "\xc4\x95"         => "\xc4\x94",
1812
      "\xc4\x93"         => "\xc4\x92",
1813
      "\xc4\x91"         => "\xc4\x90",
1814
      "\xc4\x8f"         => "\xc4\x8e",
1815
      "\xc4\x8d"         => "\xc4\x8c",
1816
      "\xc4\x8b"         => "\xc4\x8a",
1817
      "\xc4\x89"         => "\xc4\x88",
1818
      "\xc4\x87"         => "\xc4\x86",
1819
      "\xc4\x85"         => "\xc4\x84",
1820
      "\xc4\x83"         => "\xc4\x82",
1821
      "\xc4\x81"         => "\xc4\x80",
1822
      "\xc3\xbf"         => "\xc5\xb8",
1823
      "\xc3\xbe"         => "\xc3\x9e",
1824
      "\xc3\xbd"         => "\xc3\x9d",
1825
      "\xc3\xbc"         => "\xc3\x9c",
1826
      "\xc3\xbb"         => "\xc3\x9b",
1827
      "\xc3\xba"         => "\xc3\x9a",
1828
      "\xc3\xb9"         => "\xc3\x99",
1829
      "\xc3\xb8"         => "\xc3\x98",
1830
      "\xc3\xb6"         => "\xc3\x96",
1831
      "\xc3\xb5"         => "\xc3\x95",
1832
      "\xc3\xb4"         => "\xc3\x94",
1833
      "\xc3\xb3"         => "\xc3\x93",
1834
      "\xc3\xb2"         => "\xc3\x92",
1835
      "\xc3\xb1"         => "\xc3\x91",
1836
      "\xc3\xb0"         => "\xc3\x90",
1837
      "\xc3\xaf"         => "\xc3\x8f",
1838
      "\xc3\xae"         => "\xc3\x8e",
1839
      "\xc3\xad"         => "\xc3\x8d",
1840
      "\xc3\xac"         => "\xc3\x8c",
1841
      "\xc3\xab"         => "\xc3\x8b",
1842
      "\xc3\xaa"         => "\xc3\x8a",
1843
      "\xc3\xa9"         => "\xc3\x89",
1844
      "\xc3\xa8"         => "\xc3\x88",
1845
      "\xc3\xa7"         => "\xc3\x87",
1846
      "\xc3\xa6"         => "\xc3\x86",
1847
      "\xc3\xa5"         => "\xc3\x85",
1848
      "\xc3\xa4"         => "\xc3\x84",
1849
      "\xc3\xa3"         => "\xc3\x83",
1850
      "\xc3\xa2"         => "\xc3\x82",
1851
      "\xc3\xa1"         => "\xc3\x81",
1852
      "\xc3\xa0"         => "\xc3\x80",
1853
      "\xc2\xb5"         => "\xce\x9c",
1854
      "\x7a"             => "\x5a",
1855
      "\x79"             => "\x59",
1856
      "\x78"             => "\x58",
1857
      "\x77"             => "\x57",
1858
      "\x76"             => "\x56",
1859
      "\x75"             => "\x55",
1860
      "\x74"             => "\x54",
1861
      "\x73"             => "\x53",
1862
      "\x72"             => "\x52",
1863
      "\x71"             => "\x51",
1864
      "\x70"             => "\x50",
1865
      "\x6f"             => "\x4f",
1866
      "\x6e"             => "\x4e",
1867
      "\x6d"             => "\x4d",
1868
      "\x6c"             => "\x4c",
1869
      "\x6b"             => "\x4b",
1870
      "\x6a"             => "\x4a",
1871
      "\x69"             => "\x49",
1872
      "\x68"             => "\x48",
1873
      "\x67"             => "\x47",
1874
      "\x66"             => "\x46",
1875
      "\x65"             => "\x45",
1876
      "\x64"             => "\x44",
1877
      "\x63"             => "\x43",
1878
      "\x62"             => "\x42",
1879
      "\x61"             => "\x41",
1880
1881
    );
1882
1883
    return $case;
1884
  }
1885
1886
  /**
1887
   * This method will auto-detect your server environment for UTF-8 support.
1888
   *
1889
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1890
   */
1891 194
  public static function checkForSupport()
1892
  {
1893 194
    if (!isset(self::$support['mbstring'])) {
1894
1895 1
      self::$support['mbstring'] = self::mbstring_loaded();
1896 1
      self::$support['iconv'] = self::iconv_loaded();
1897 1
      self::$support['intl'] = self::intl_loaded();
1898 1
      self::$support['intlChar'] = self::intlChar_loaded();
1899 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1900 1
    }
1901 194
  }
1902
1903
  /**
1904
   * Generates a UTF-8 encoded character from the given code point.
1905
   *
1906
   * INFO: opposite to UTF8::ord()
1907
   *
1908
   * @param    int $code_point The code point for which to generate a character.
1909
   *
1910
   * @return   string|null Multi-Byte character, returns null on failure to encode.
1911
   */
1912 9
  public static function chr($code_point)
1913
  {
1914 9
    self::checkForSupport();
1915
1916 9
    $i = (int)$code_point;
1917
1918 9
    if (self::$support['intlChar'] === true) {
1919
      return \IntlChar::chr($code_point);
1920
    }
1921
1922 9
    if ($i !== $code_point) {
1923 1
      $i = self::hex_to_int($code_point);
1924 1
    }
1925
1926 9
    if (!$i) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $i of type integer|false is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1927 2
      return null;
1928
    }
1929
1930 9
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1931
  }
1932
1933
  /**
1934
   * Applies callback to all characters of a string.
1935
   *
1936
   * @param  string|array $callback The callback function.
1937
   * @param  string       $str      UTF-8 string to run callback on.
1938
   *
1939
   * @return array The outcome of callback.
1940
   */
1941 1
  public static function chr_map($callback, $str)
1942
  {
1943 1
    $chars = self::split($str);
1944
1945 1
    return array_map($callback, $chars);
1946
  }
1947
1948
  /**
1949
   * Generates an array of byte length of each character of a Unicode string.
1950
   *
1951
   * 1 byte => U+0000  - U+007F
1952
   * 2 byte => U+0080  - U+07FF
1953
   * 3 byte => U+0800  - U+FFFF
1954
   * 4 byte => U+10000 - U+10FFFF
1955
   *
1956
   * @param    string $str The original Unicode string.
1957
   *
1958
   * @return   array An array of byte lengths of each character.
1959
   */
1960 4
  public static function chr_size_list($str)
1961
  {
1962 4
    if (!$str) {
1963 3
      return array();
1964
    }
1965
1966 4
    return array_map('strlen', self::split($str));
1967
  }
1968
1969
  /**
1970
   * Get a decimal code representation of a specific character.
1971
   *
1972
   * @param   string $char The input character
1973
   *
1974
   * @return  int
1975
   */
1976 2
  public static function chr_to_decimal($char)
1977
  {
1978 2
    $char = (string)$char;
1979 2
    $code = self::ord($char[0]);
1980 2
    $bytes = 1;
1981
1982 2
    if (!($code & 0x80)) {
1983
      // 0xxxxxxx
1984 2
      return $code;
1985
    }
1986
1987 2
    if (($code & 0xe0) === 0xc0) {
1988
      // 110xxxxx
1989 2
      $bytes = 2;
1990 2
      $code &= ~0xc0;
1991 2
    } elseif (($code & 0xf0) === 0xe0) {
1992
      // 1110xxxx
1993 1
      $bytes = 3;
1994 1
      $code &= ~0xe0;
1995 1
    } elseif (($code & 0xf8) === 0xf0) {
1996
      // 11110xxx
1997
      $bytes = 4;
1998
      $code &= ~0xf0;
1999
    }
2000
2001 2
    for ($i = 2; $i <= $bytes; $i++) {
2002
      // 10xxxxxx
2003 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
2004 2
    }
2005
2006 2
    return $code;
2007
  }
2008
2009
  /**
2010
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
2011
   *
2012
   * @param    string $char The input character
2013
   * @param    string $pfix
2014
   *
2015
   * @return   string The code point encoded as U+xxxx
2016
   */
2017
  public static function chr_to_hex($char, $pfix = 'U+')
2018
  {
2019
    return self::int_to_hex(self::ord($char), $pfix);
2020
  }
2021
2022
  /**
2023
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2024
   *
2025
   * @param    string $body     The original string to be split.
2026
   * @param    int    $chunklen The maximum character length of a chunk.
2027
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2028
   *
2029
   * @return   string The chunked string
2030
   */
2031 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2032
  {
2033 1
    return implode($end, self::split($body, $chunklen));
2034
  }
2035
2036
  /**
2037
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2038
   *
2039
   * @param string $str                     The string to be sanitized.
2040
   * @param bool   $remove_bom
2041
   * @param bool   $normalize_whitespace
2042
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2043
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2044
   *
2045
   * @return string Clean UTF-8 encoded string
2046
   */
2047 41
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2048
  {
2049
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2050
    // caused connection reset problem on larger strings
2051
2052
    $regx = '/
2053
      (
2054
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2055
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2056
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2057
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2058
        ){1,100}                      # ...one or more times
2059
      )
2060
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2061
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2062 41
    /x';
2063 41
    $str = preg_replace($regx, '$1', $str);
2064
2065 41
    $str = self::replace_diamond_question_mark($str, '');
2066 41
    $str = self::remove_invisible_characters($str);
2067
2068 41
    if ($normalize_whitespace === true) {
2069 6
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2070 6
    }
2071
2072 41
    if ($normalize_msword === true) {
2073 1
      $str = self::normalize_msword($str);
2074 1
    }
2075
2076 41
    if ($remove_bom === true) {
2077 5
      $str = self::removeBOM($str);
2078 5
    }
2079
2080 41
    return $str;
2081
  }
2082
2083
  /**
2084
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2085
   *
2086
   * @param string $str
2087
   *
2088
   * @return string
2089
   */
2090 4
  public static function cleanup($str)
2091
  {
2092 4
    $str = (string)$str;
2093
2094 4
    if (!isset($str[0])) {
2095 1
      return '';
2096
    }
2097
2098
    // fixed ISO <-> UTF-8 Errors
2099 4
    $str = self::fix_simple_utf8($str);
2100
2101
    // remove all none UTF-8 symbols
2102
    // && remove diamond question mark (�)
2103
    // && remove remove invisible characters (e.g. "\0")
2104
    // && remove BOM
2105
    // && normalize whitespace chars (but keep non-breaking-spaces)
2106 4
    $str = self::clean($str, true, true, false, true);
2107
2108 4
    return (string)$str;
2109
  }
2110
2111
  /**
2112
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2113
   *
2114
   * INFO: opposite to UTF8::string()
2115
   *
2116
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2117
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2118
   *                                    default, code points will be returned as integers.
2119
   *
2120
   * @return   array The array of code points
2121
   */
2122 5
  public static function codepoints($arg, $u_style = false)
2123
  {
2124 5
    if (is_string($arg)) {
2125 5
      $arg = self::split($arg);
2126 5
    }
2127
2128 5
    $arg = array_map(
2129
        array(
2130 5
            '\\voku\\helper\\UTF8',
2131 5
            'ord',
2132 5
        ),
2133
        $arg
2134 5
    );
2135
2136 5
    if ($u_style) {
2137 1
      $arg = array_map(
2138
          array(
2139 1
              '\\voku\\helper\\UTF8',
2140 1
              'int_to_hex',
2141 1
          ),
2142
          $arg
2143 1
      );
2144 1
    }
2145
2146 5
    return $arg;
2147
  }
2148
2149
  /**
2150
   * Returns count of characters used in a string.
2151
   *
2152
   * @param    string $str       The input string.
2153
   * @param    bool   $cleanUtf8 Clean non UTF-8 chars from the string.
2154
   *
2155
   * @return   array An associative array of Character as keys and
2156
   *           their count as values.
2157
   */
2158 6
  public static function count_chars($str, $cleanUtf8 = false)
2159
  {
2160 6
    return array_count_values(self::split($str, 1, $cleanUtf8));
2161
  }
2162
2163
  /**
2164
   * Get a UTF-8 character from its decimal code representation.
2165
   *
2166
   * @param   int $code Code.
2167
   *
2168
   * @return  string
2169
   */
2170 1
  public static function decimal_to_chr($code)
2171
  {
2172 1
    self::checkForSupport();
2173
2174 1
    return \mb_convert_encoding(
2175 1
        '&#x' . dechex($code) . ';',
2176 1
        'UTF-8',
2177
        'HTML-ENTITIES'
2178 1
    );
2179
  }
2180
2181
  /**
2182
   * Encode a string with a new charset-encoding.
2183
   *
2184
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2185
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2186
   *
2187
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2188
   * @param string $str      the string
2189
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2190
   *                         otherwise we auto-detect the current string-encoding
2191
   *
2192
   * @return string
2193
   */
2194 11
  public static function encode($encoding, $str, $force = true)
2195
  {
2196 11
    $str = (string)$str;
2197 11
    $encoding = (string)$encoding;
2198
2199 11
    if (!isset($str[0], $encoding[0])) {
2200 5
      return $str;
2201
    }
2202
2203 11
    $encoding = self::normalizeEncoding($encoding);
2204 11
    $encodingDetected = self::str_detect_encoding($str);
2205
2206
    if (
2207
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2208 11
        &&
2209
        (
2210
            $force === true
2211 11
            ||
2212
            $encodingDetected !== $encoding
2213 1
        )
2214 11
    ) {
2215 11
      self::checkForSupport();
2216
2217 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2218
          $encoding === 'UTF-8'
2219 11
          &&
2220
          (
2221
              $force === true
2222 11
              || $encodingDetected === 'UTF-8'
2223 1
              || $encodingDetected === 'WINDOWS-1252'
2224 1
              || $encodingDetected === 'ISO-8859-1'
2225 1
          )
2226 11
      ) {
2227 11
        return self::to_utf8($str);
2228
      }
2229
2230 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2231
          $encoding === 'ISO-8859-1'
2232 2
          &&
2233
          (
2234
              $force === true
2235 1
              || $encodingDetected === 'ISO-8859-1'
2236
              || $encodingDetected === 'UTF-8'
2237
          )
2238 2
      ) {
2239 1
        return self::to_win1252($str);
2240
      }
2241
2242 2
      $strEncoded = \mb_convert_encoding(
2243 2
          $str,
2244 2
          $encoding,
2245
          $encodingDetected
2246 2
      );
2247
2248 2
      if ($strEncoded) {
2249 2
        return $strEncoded;
2250
      }
2251
    }
2252
2253 1
    return $str;
2254
  }
2255
2256
  /**
2257
   * Reads entire file into a string.
2258
   *
2259
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2260
   *
2261
   * @link http://php.net/manual/en/function.file-get-contents.php
2262
   *
2263
   * @param string        $filename      <p>
2264
   *                                     Name of the file to read.
2265
   *                                     </p>
2266
   * @param int|null      $flags         [optional] <p>
2267
   *                                     Prior to PHP 6, this parameter is called
2268
   *                                     use_include_path and is a bool.
2269
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2270
   *                                     to trigger include path
2271
   *                                     search.
2272
   *                                     </p>
2273
   *                                     <p>
2274
   *                                     The value of flags can be any combination of
2275
   *                                     the following flags (with some restrictions), joined with the
2276
   *                                     binary OR (|)
2277
   *                                     operator.
2278
   *                                     </p>
2279
   *                                     <p>
2280
   *                                     <table>
2281
   *                                     Available flags
2282
   *                                     <tr valign="top">
2283
   *                                     <td>Flag</td>
2284
   *                                     <td>Description</td>
2285
   *                                     </tr>
2286
   *                                     <tr valign="top">
2287
   *                                     <td>
2288
   *                                     FILE_USE_INCLUDE_PATH
2289
   *                                     </td>
2290
   *                                     <td>
2291
   *                                     Search for filename in the include directory.
2292
   *                                     See include_path for more
2293
   *                                     information.
2294
   *                                     </td>
2295
   *                                     </tr>
2296
   *                                     <tr valign="top">
2297
   *                                     <td>
2298
   *                                     FILE_TEXT
2299
   *                                     </td>
2300
   *                                     <td>
2301
   *                                     As of PHP 6, the default encoding of the read
2302
   *                                     data is UTF-8. You can specify a different encoding by creating a
2303
   *                                     custom context or by changing the default using
2304
   *                                     stream_default_encoding. This flag cannot be
2305
   *                                     used with FILE_BINARY.
2306
   *                                     </td>
2307
   *                                     </tr>
2308
   *                                     <tr valign="top">
2309
   *                                     <td>
2310
   *                                     FILE_BINARY
2311
   *                                     </td>
2312
   *                                     <td>
2313
   *                                     With this flag, the file is read in binary mode. This is the default
2314
   *                                     setting and cannot be used with FILE_TEXT.
2315
   *                                     </td>
2316
   *                                     </tr>
2317
   *                                     </table>
2318
   *                                     </p>
2319
   * @param resource|null $context       [optional] <p>
2320
   *                                     A valid context resource created with
2321
   *                                     stream_context_create. If you don't need to use a
2322
   *                                     custom context, you can skip this parameter by &null;.
2323
   *                                     </p>
2324
   * @param int|null      $offset        [optional] <p>
2325
   *                                     The offset where the reading starts.
2326
   *                                     </p>
2327
   * @param int|null      $maxlen        [optional] <p>
2328
   *                                     Maximum length of data read. The default is to read until end
2329
   *                                     of file is reached.
2330
   *                                     </p>
2331
   * @param int           $timeout
2332
   *
2333
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2334
   *                                     non default utf-8 chars
2335
   *
2336
   * @return string The function returns the read data or false on failure.
2337
   */
2338 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2339
  {
2340
    // init
2341 2
    $timeout = (int)$timeout;
2342 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2343
2344 2
    if ($timeout && $context === null) {
2345 2
      $context = stream_context_create(
2346
          array(
2347
              'http' =>
2348
                  array(
2349 2
                      'timeout' => $timeout,
2350 2
                  ),
2351
          )
2352 2
      );
2353 2
    }
2354
2355 2
    if (is_int($maxlen)) {
2356 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2357 1
    } else {
2358 2
      $data = file_get_contents($filename, $flags, $context, $offset);
2359
    }
2360
2361
    // return false on error
2362 2
    if ($data === false) {
2363 1
      return false;
2364
    }
2365
2366 1
    if ($convertToUtf8 === true) {
2367 1
      self::checkForSupport();
2368
2369 1
      $data = self::encode('UTF-8', $data, false);
2370 1
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2371 1
    }
2372
2373
    // clean utf-8 string
2374 1
    return $data;
2375
  }
2376
2377
  /**
2378
   * Checks if a file starts with BOM (Byte Order Mark) character.
2379
   *
2380
   * @param    string $file_path Path to a valid file.
2381
   *
2382
   * @return   bool True if the file has BOM at the start, False otherwise.
2383
   */
2384 1
  public static function file_has_bom($file_path)
2385
  {
2386 1
    return self::string_has_bom(file_get_contents($file_path));
2387
  }
2388
2389
  /**
2390
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2391
   *
2392
   * @param mixed  $var
2393
   * @param int    $normalization_form
2394
   * @param string $leading_combining
2395
   *
2396
   * @return mixed
2397
   */
2398 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
2399
  {
2400 9
    switch (gettype($var)) {
2401 9 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2402 3
        foreach ($var as $k => $v) {
2403
          /** @noinspection AlterInForeachInspection */
2404 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2405 3
        }
2406 3
        break;
2407 9 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2408 2
        foreach ($var as $k => $v) {
2409 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
2410 2
        }
2411 2
        break;
2412 9
      case 'string':
2413 8
        if (false !== strpos($var, "\r")) {
2414
          // Workaround https://bugs.php.net/65732
2415 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2416 2
        }
2417 8
        if (preg_match('/[\x80-\xFF]/', $var)) {
2418 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2419 6
            $n = '-';
2420 6
          } else {
2421 6
            $n = \Normalizer::normalize($var, $normalization_form);
2422
2423 6
            if (isset($n[0])) {
2424 3
              $var = $n;
2425 3
            } else {
2426 5
              $var = self::encode('UTF-8', $var);
2427
            }
2428
2429
          }
2430 8
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2431
            // Prevent leading combining chars
2432
            // for NFC-safe concatenations.
2433 2
            $var = $leading_combining . $var;
2434 2
          }
2435 8
        }
2436 8
        break;
2437 9
    }
2438
2439 9
    return $var;
2440
  }
2441
2442
  /**
2443
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2444
   *
2445
   * @param int    $type
2446
   * @param string $var
2447
   * @param int    $filter
2448
   * @param mixed  $option
2449
   *
2450
   * @return mixed
2451
   */
2452 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2453
  {
2454
    if (4 > func_num_args()) {
2455
      $var = filter_input($type, $var, $filter);
2456
    } else {
2457
      $var = filter_input($type, $var, $filter, $option);
2458
    }
2459
2460
    return self::filter($var);
2461
  }
2462
2463
  /**
2464
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2465
   *
2466
   * @param int   $type
2467
   * @param mixed $definition
2468
   * @param bool  $add_empty
2469
   *
2470
   * @return mixed
2471
   */
2472 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2473
  {
2474
    if (2 > func_num_args()) {
2475
      $a = filter_input_array($type);
2476
    } else {
2477
      $a = filter_input_array($type, $definition, $add_empty);
2478
    }
2479
2480
    return self::filter($a);
2481
  }
2482
2483
  /**
2484
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2485
   *
2486
   * @param mixed $var
2487
   * @param int   $filter
2488
   * @param mixed $option
2489
   *
2490
   * @return mixed
2491
   */
2492 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2493
  {
2494 1
    if (3 > func_num_args()) {
2495 1
      $var = filter_var($var, $filter);
2496 1
    } else {
2497 1
      $var = filter_var($var, $filter, $option);
2498
    }
2499
2500 1
    return self::filter($var);
2501
  }
2502
2503
  /**
2504
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2505
   *
2506
   * @param array $data
2507
   * @param mixed $definition
2508
   * @param bool  $add_empty
2509
   *
2510
   * @return mixed
2511
   */
2512 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2513
  {
2514 1
    if (2 > func_num_args()) {
2515 1
      $a = filter_var_array($data);
2516 1
    } else {
2517 1
      $a = filter_var_array($data, $definition, $add_empty);
2518
    }
2519
2520 1
    return self::filter($a);
2521
  }
2522
2523
  /**
2524
   * Check if the number of unicode characters are not more than the specified integer.
2525
   *
2526
   * @param    string $str      The original string to be checked.
2527
   * @param    int    $box_size The size in number of chars to be checked against string.
2528
   *
2529
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2530
   */
2531 1
  public static function fits_inside($str, $box_size)
2532
  {
2533 1
    return (self::strlen($str) <= $box_size);
2534
  }
2535
2536
  /**
2537
   * Try to fix simple broken UTF-8 strings.
2538
   *
2539
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2540
   *
2541
   * @param string $str
2542
   *
2543
   * @return string
2544
   */
2545 7
  public static function fix_simple_utf8($str)
2546
  {
2547 7
    static $brokenUtf8ToUtf8Keys = null;
2548 7
    static $brokenUtf8ToUtf8Values = null;
2549
2550 7
    $str = (string)$str;
2551
2552 7
    if (!isset($str[0])) {
2553 2
      return '';
2554
    }
2555
2556 7
    if ($brokenUtf8ToUtf8Keys === null) {
2557 1
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2558 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2559 1
    }
2560
2561 7
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2562
  }
2563
2564
  /**
2565
   * Fix a double (or multiple) encoded UTF8 string.
2566
   *
2567
   * @param string|string[] $str You can use a string or an array of strings.
2568
   *
2569
   * @return mixed
2570
   */
2571 1
  public static function fix_utf8($str)
2572
  {
2573 1
    if (is_array($str)) {
2574
2575 1
      foreach ($str as $k => $v) {
2576
        /** @noinspection AlterInForeachInspection */
2577
        /** @noinspection OffsetOperationsInspection */
2578 1
        $str[$k] = self::fix_utf8($v);
2579 1
      }
2580
2581 1
      return $str;
2582
    }
2583
2584 1
    $last = '';
2585 1
    while ($last !== $str) {
2586 1
      $last = $str;
2587 1
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2587 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2588 1
    }
2589
2590 1
    return $str;
2591
  }
2592
2593
  /**
2594
   * Get character of a specific character.
2595
   *
2596
   * @param   string $char Character.
2597
   *
2598
   * @return  string 'RTL' or 'LTR'
2599
   */
2600 1
  public static function getCharDirection($char)
2601
  {
2602
    // init
2603 1
    self::checkForSupport();
2604
2605 1
    if (self::$support['intlChar'] === true) {
2606
      $tmpReturn = \IntlChar::charDirection($char);
2607
2608
      // from "IntlChar"-Class
2609
      $charDirection = array(
2610
          'RTL' => array(1, 13, 14, 15, 21),
2611
          'LTR' => array(0, 11, 12, 20),
2612
      );
2613
2614
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2615
        return 'LTR';
2616
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2617
        return 'RTL';
2618
      }
2619
    }
2620
2621 1
    $c = static::chr_to_decimal($char);
2622
2623 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2624 1
      return 'LTR';
2625
    }
2626
2627 1
    if (0x85e >= $c) {
2628
2629 1
      if (0x5be === $c ||
2630 1
          0x5c0 === $c ||
2631 1
          0x5c3 === $c ||
2632 1
          0x5c6 === $c ||
2633 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
2634 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2635 1
          0x608 === $c ||
2636 1
          0x60b === $c ||
2637 1
          0x60d === $c ||
2638 1
          0x61b === $c ||
2639 1
          (0x61e <= $c && 0x64a >= $c) ||
2640
          (0x66d <= $c && 0x66f >= $c) ||
2641
          (0x671 <= $c && 0x6d5 >= $c) ||
2642
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2643
          (0x6ee <= $c && 0x6ef >= $c) ||
2644
          (0x6fa <= $c && 0x70d >= $c) ||
2645
          0x710 === $c ||
2646
          (0x712 <= $c && 0x72f >= $c) ||
2647
          (0x74d <= $c && 0x7a5 >= $c) ||
2648
          0x7b1 === $c ||
2649
          (0x7c0 <= $c && 0x7ea >= $c) ||
2650
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2651
          0x7fa === $c ||
2652
          (0x800 <= $c && 0x815 >= $c) ||
2653
          0x81a === $c ||
2654
          0x824 === $c ||
2655
          0x828 === $c ||
2656
          (0x830 <= $c && 0x83e >= $c) ||
2657
          (0x840 <= $c && 0x858 >= $c) ||
2658
          0x85e === $c
2659 1
      ) {
2660 1
        return 'RTL';
2661
      }
2662
2663
    } elseif (0x200f === $c) {
2664
2665
      return 'RTL';
2666
2667
    } elseif (0xfb1d <= $c) {
2668
2669
      if (0xfb1d === $c ||
2670
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2671
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2672
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2673
          0xfb3e === $c ||
2674
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2675
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2676
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2677
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2678
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2679
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2680
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2681
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2682
          (0xfe76 <= $c && 0xfefc >= $c) ||
2683
          (0x10800 <= $c && 0x10805 >= $c) ||
2684
          0x10808 === $c ||
2685
          (0x1080a <= $c && 0x10835 >= $c) ||
2686
          (0x10837 <= $c && 0x10838 >= $c) ||
2687
          0x1083c === $c ||
2688
          (0x1083f <= $c && 0x10855 >= $c) ||
2689
          (0x10857 <= $c && 0x1085f >= $c) ||
2690
          (0x10900 <= $c && 0x1091b >= $c) ||
2691
          (0x10920 <= $c && 0x10939 >= $c) ||
2692
          0x1093f === $c ||
2693
          0x10a00 === $c ||
2694
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2695
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2696
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2697
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2698
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2699
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2700
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2701
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2702
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2703
          (0x10b78 <= $c && 0x10b7f >= $c)
2704
      ) {
2705
        return 'RTL';
2706
      }
2707
    }
2708
2709
    return 'LTR';
2710
  }
2711
2712
  /**
2713
   * get data from "/data/*.ser"
2714
   *
2715
   * @param string $file
2716
   *
2717
   * @return bool|string|array|int false on error
2718
   */
2719 1
  protected static function getData($file)
2720
  {
2721 1
    $file = __DIR__ . '/data/' . $file . '.php';
2722 1
    if (file_exists($file)) {
2723
      /** @noinspection PhpIncludeInspection */
2724 1
      return require $file;
2725
    } else {
2726
      return false;
2727
    }
2728
  }
2729
2730
  /**
2731
   * Converts hexadecimal U+xxxx code point representation to integer.
2732
   *
2733
   * INFO: opposite to UTF8::int_to_hex()
2734
   *
2735
   * @param    string $str The hexadecimal code point representation.
2736
   *
2737
   * @return   int|false The code point, or false on failure.
2738
   */
2739 2
  public static function hex_to_int($str)
2740
  {
2741 2
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2742 1
      return intval($match[1], 16);
2743
    }
2744
2745 1
    return false;
2746
  }
2747
2748
  /**
2749
   * alias for "UTF8::html_entity_decode()"
2750
   *
2751
   * @see UTF8::html_entity_decode()
2752
   *
2753
   * @param string $str
2754
   * @param int    $flags
2755
   * @param string $encoding
2756
   *
2757
   * @return string
2758
   */
2759 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2760
  {
2761 1
    return self::html_entity_decode($str, $flags, $encoding);
2762
  }
2763
2764
  /**
2765
   * Converts a UTF-8 string to a series of HTML numbered entities.
2766
   *
2767
   * INFO: opposite to UTF8::html_decode()
2768
   *
2769
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2770
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2771
   * @param  string $encoding
2772
   *
2773
   * @return string HTML numbered entities.
2774
   */
2775 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2776
  {
2777
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2778 2
    if (function_exists('mb_encode_numericentity')) {
2779
2780 2
      $startCode = 0x00;
2781 2
      if ($keepAsciiChars === true) {
2782 1
        $startCode = 0x80;
2783 1
      }
2784
2785 2
      $encoding = self::normalizeEncoding($encoding);
2786
2787 2
      return mb_encode_numericentity(
2788 2
          $str,
2789 2
          array($startCode, 0xffff, 0, 0xffff,),
2790
          $encoding
2791 2
      );
2792
    }
2793
2794
    return implode(
2795
        array_map(
2796
            function ($data) use ($keepAsciiChars) {
2797
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
2798
            },
2799
            self::split($str)
2800
        )
2801
    );
2802
  }
2803
2804
  /**
2805
   * UTF-8 version of html_entity_decode()
2806
   *
2807
   * The reason we are not using html_entity_decode() by itself is because
2808
   * while it is not technically correct to leave out the semicolon
2809
   * at the end of an entity most browsers will still interpret the entity
2810
   * correctly. html_entity_decode() does not convert entities without
2811
   * semicolons, so we are left with our own little solution here. Bummer.
2812
   *
2813
   * Convert all HTML entities to their applicable characters
2814
   *
2815
   * INFO: opposite to UTF8::html_encode()
2816
   *
2817
   * @link http://php.net/manual/en/function.html-entity-decode.php
2818
   *
2819
   * @param string $str      <p>
2820
   *                         The input string.
2821
   *                         </p>
2822
   * @param int    $flags    [optional] <p>
2823
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2824
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2825
   *                         <table>
2826
   *                         Available <i>flags</i> constants
2827
   *                         <tr valign="top">
2828
   *                         <td>Constant Name</td>
2829
   *                         <td>Description</td>
2830
   *                         </tr>
2831
   *                         <tr valign="top">
2832
   *                         <td><b>ENT_COMPAT</b></td>
2833
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2834
   *                         </tr>
2835
   *                         <tr valign="top">
2836
   *                         <td><b>ENT_QUOTES</b></td>
2837
   *                         <td>Will convert both double and single quotes.</td>
2838
   *                         </tr>
2839
   *                         <tr valign="top">
2840
   *                         <td><b>ENT_NOQUOTES</b></td>
2841
   *                         <td>Will leave both double and single quotes unconverted.</td>
2842
   *                         </tr>
2843
   *                         <tr valign="top">
2844
   *                         <td><b>ENT_HTML401</b></td>
2845
   *                         <td>
2846
   *                         Handle code as HTML 4.01.
2847
   *                         </td>
2848
   *                         </tr>
2849
   *                         <tr valign="top">
2850
   *                         <td><b>ENT_XML1</b></td>
2851
   *                         <td>
2852
   *                         Handle code as XML 1.
2853
   *                         </td>
2854
   *                         </tr>
2855
   *                         <tr valign="top">
2856
   *                         <td><b>ENT_XHTML</b></td>
2857
   *                         <td>
2858
   *                         Handle code as XHTML.
2859
   *                         </td>
2860
   *                         </tr>
2861
   *                         <tr valign="top">
2862
   *                         <td><b>ENT_HTML5</b></td>
2863
   *                         <td>
2864
   *                         Handle code as HTML 5.
2865
   *                         </td>
2866
   *                         </tr>
2867
   *                         </table>
2868
   *                         </p>
2869
   * @param string $encoding [optional] <p>
2870
   *                         Encoding to use.
2871
   *                         </p>
2872
   *
2873
   * @return string the decoded string.
2874
   */
2875 17
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2876
  {
2877 17
    $str = (string)$str;
2878
2879 17
    if (!isset($str[0])) {
2880 4
      return '';
2881
    }
2882
2883 17
    if (strpos($str, '&') === false) {
2884 5
      return $str;
2885
    }
2886
2887 17
    self::checkForSupport();
2888
2889 17
    $encoding = self::normalizeEncoding($encoding);
2890
2891 17
    if ($flags === null) {
2892 4
      if (Bootup::is_php('5.4') === true) {
2893
        $flags = ENT_COMPAT | ENT_HTML5;
2894
      } else {
2895 4
        $flags = ENT_COMPAT;
2896
      }
2897 4
    }
2898
2899
    do {
2900 17
      $str_compare = $str;
2901
2902
      $str = preg_replace_callback("/&#\d{2,5};/", function ($matches) {
2903 14
        $returnTmp =  \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2904
2905 14
        if ($returnTmp !== '"' && $returnTmp !== "'") {
2906 14
          return $returnTmp;
2907
        } else {
2908 6
          return $matches[0];
2909
        }
2910 17
      }, $str);
2911
2912
      // decode numeric & UTF16 two byte entities
2913 17
      $str = html_entity_decode(
2914 17
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2915 17
          $flags,
2916
          $encoding
2917 17
      );
2918
2919 17
    } while ($str_compare !== $str);
2920
2921 17
    return $str;
2922
  }
2923
2924
  /**
2925
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2926
   *
2927
   * @link http://php.net/manual/en/function.htmlentities.php
2928
   *
2929
   * @param string $str           <p>
2930
   *                              The input string.
2931
   *                              </p>
2932
   * @param int    $flags         [optional] <p>
2933
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2934
   *                              invalid code unit sequences and the used document type. The default is
2935
   *                              ENT_COMPAT | ENT_HTML401.
2936
   *                              <table>
2937
   *                              Available <i>flags</i> constants
2938
   *                              <tr valign="top">
2939
   *                              <td>Constant Name</td>
2940
   *                              <td>Description</td>
2941
   *                              </tr>
2942
   *                              <tr valign="top">
2943
   *                              <td><b>ENT_COMPAT</b></td>
2944
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2945
   *                              </tr>
2946
   *                              <tr valign="top">
2947
   *                              <td><b>ENT_QUOTES</b></td>
2948
   *                              <td>Will convert both double and single quotes.</td>
2949
   *                              </tr>
2950
   *                              <tr valign="top">
2951
   *                              <td><b>ENT_NOQUOTES</b></td>
2952
   *                              <td>Will leave both double and single quotes unconverted.</td>
2953
   *                              </tr>
2954
   *                              <tr valign="top">
2955
   *                              <td><b>ENT_IGNORE</b></td>
2956
   *                              <td>
2957
   *                              Silently discard invalid code unit sequences instead of returning
2958
   *                              an empty string. Using this flag is discouraged as it
2959
   *                              may have security implications.
2960
   *                              </td>
2961
   *                              </tr>
2962
   *                              <tr valign="top">
2963
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2964
   *                              <td>
2965
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2966
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2967
   *                              </td>
2968
   *                              </tr>
2969
   *                              <tr valign="top">
2970
   *                              <td><b>ENT_DISALLOWED</b></td>
2971
   *                              <td>
2972
   *                              Replace invalid code points for the given document type with a
2973
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2974
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2975
   *                              instance, to ensure the well-formedness of XML documents with
2976
   *                              embedded external content.
2977
   *                              </td>
2978
   *                              </tr>
2979
   *                              <tr valign="top">
2980
   *                              <td><b>ENT_HTML401</b></td>
2981
   *                              <td>
2982
   *                              Handle code as HTML 4.01.
2983
   *                              </td>
2984
   *                              </tr>
2985
   *                              <tr valign="top">
2986
   *                              <td><b>ENT_XML1</b></td>
2987
   *                              <td>
2988
   *                              Handle code as XML 1.
2989
   *                              </td>
2990
   *                              </tr>
2991
   *                              <tr valign="top">
2992
   *                              <td><b>ENT_XHTML</b></td>
2993
   *                              <td>
2994
   *                              Handle code as XHTML.
2995
   *                              </td>
2996
   *                              </tr>
2997
   *                              <tr valign="top">
2998
   *                              <td><b>ENT_HTML5</b></td>
2999
   *                              <td>
3000
   *                              Handle code as HTML 5.
3001
   *                              </td>
3002
   *                              </tr>
3003
   *                              </table>
3004
   *                              </p>
3005
   * @param string $encoding      [optional] <p>
3006
   *                              Like <b>htmlspecialchars</b>,
3007
   *                              <b>htmlentities</b> takes an optional third argument
3008
   *                              <i>encoding</i> which defines encoding used in
3009
   *                              conversion.
3010
   *                              Although this argument is technically optional, you are highly
3011
   *                              encouraged to specify the correct value for your code.
3012
   *                              </p>
3013
   * @param bool   $double_encode [optional] <p>
3014
   *                              When <i>double_encode</i> is turned off PHP will not
3015
   *                              encode existing html entities. The default is to convert everything.
3016
   *                              </p>
3017
   *
3018
   *
3019
   * @return string the encoded string.
3020
   * </p>
3021
   * <p>
3022
   * If the input <i>string</i> contains an invalid code unit
3023
   * sequence within the given <i>encoding</i> an empty string
3024
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3025
   * <b>ENT_SUBSTITUTE</b> flags are set.
3026
   */
3027 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3028
  {
3029 2
    $encoding = self::normalizeEncoding($encoding);
3030
3031 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
3032
3033 2
    if ($encoding !== 'UTF-8') {
3034
      return $str;
3035
    }
3036
3037 2
    $byteLengths = self::chr_size_list($str);
3038 2
    $search = array();
3039 2
    $replacements = array();
3040 2
    foreach ($byteLengths as $counter => $byteLength) {
3041 2
      if ($byteLength >= 3) {
3042 1
        $char = self::access($str, $counter);
3043
3044 1
        if (!isset($replacements[$char])) {
3045 1
          $search[$char] = $char;
3046 1
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 3042 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3047 1
        }
3048 1
      }
3049 2
    }
3050
3051 2
    return str_replace($search, $replacements, $str);
3052
  }
3053
3054
  /**
3055
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3056
   *
3057
   * INFO: Take a look at "UTF8::htmlentities()"
3058
   *
3059
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3060
   *
3061
   * @param string $str           <p>
3062
   *                              The string being converted.
3063
   *                              </p>
3064
   * @param int    $flags         [optional] <p>
3065
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3066
   *                              invalid code unit sequences and the used document type. The default is
3067
   *                              ENT_COMPAT | ENT_HTML401.
3068
   *                              <table>
3069
   *                              Available <i>flags</i> constants
3070
   *                              <tr valign="top">
3071
   *                              <td>Constant Name</td>
3072
   *                              <td>Description</td>
3073
   *                              </tr>
3074
   *                              <tr valign="top">
3075
   *                              <td><b>ENT_COMPAT</b></td>
3076
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3077
   *                              </tr>
3078
   *                              <tr valign="top">
3079
   *                              <td><b>ENT_QUOTES</b></td>
3080
   *                              <td>Will convert both double and single quotes.</td>
3081
   *                              </tr>
3082
   *                              <tr valign="top">
3083
   *                              <td><b>ENT_NOQUOTES</b></td>
3084
   *                              <td>Will leave both double and single quotes unconverted.</td>
3085
   *                              </tr>
3086
   *                              <tr valign="top">
3087
   *                              <td><b>ENT_IGNORE</b></td>
3088
   *                              <td>
3089
   *                              Silently discard invalid code unit sequences instead of returning
3090
   *                              an empty string. Using this flag is discouraged as it
3091
   *                              may have security implications.
3092
   *                              </td>
3093
   *                              </tr>
3094
   *                              <tr valign="top">
3095
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3096
   *                              <td>
3097
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3098
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3099
   *                              </td>
3100
   *                              </tr>
3101
   *                              <tr valign="top">
3102
   *                              <td><b>ENT_DISALLOWED</b></td>
3103
   *                              <td>
3104
   *                              Replace invalid code points for the given document type with a
3105
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3106
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3107
   *                              instance, to ensure the well-formedness of XML documents with
3108
   *                              embedded external content.
3109
   *                              </td>
3110
   *                              </tr>
3111
   *                              <tr valign="top">
3112
   *                              <td><b>ENT_HTML401</b></td>
3113
   *                              <td>
3114
   *                              Handle code as HTML 4.01.
3115
   *                              </td>
3116
   *                              </tr>
3117
   *                              <tr valign="top">
3118
   *                              <td><b>ENT_XML1</b></td>
3119
   *                              <td>
3120
   *                              Handle code as XML 1.
3121
   *                              </td>
3122
   *                              </tr>
3123
   *                              <tr valign="top">
3124
   *                              <td><b>ENT_XHTML</b></td>
3125
   *                              <td>
3126
   *                              Handle code as XHTML.
3127
   *                              </td>
3128
   *                              </tr>
3129
   *                              <tr valign="top">
3130
   *                              <td><b>ENT_HTML5</b></td>
3131
   *                              <td>
3132
   *                              Handle code as HTML 5.
3133
   *                              </td>
3134
   *                              </tr>
3135
   *                              </table>
3136
   *                              </p>
3137
   * @param string $encoding      [optional] <p>
3138
   *                              Defines encoding used in conversion.
3139
   *                              </p>
3140
   *                              <p>
3141
   *                              For the purposes of this function, the encodings
3142
   *                              ISO-8859-1, ISO-8859-15,
3143
   *                              UTF-8, cp866,
3144
   *                              cp1251, cp1252, and
3145
   *                              KOI8-R are effectively equivalent, provided the
3146
   *                              <i>string</i> itself is valid for the encoding, as
3147
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3148
   *                              the same positions in all of these encodings.
3149
   *                              </p>
3150
   * @param bool   $double_encode [optional] <p>
3151
   *                              When <i>double_encode</i> is turned off PHP will not
3152
   *                              encode existing html entities, the default is to convert everything.
3153
   *                              </p>
3154
   *
3155
   * @return string The converted string.
3156
   * </p>
3157
   * <p>
3158
   * If the input <i>string</i> contains an invalid code unit
3159
   * sequence within the given <i>encoding</i> an empty string
3160
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3161
   * <b>ENT_SUBSTITUTE</b> flags are set.
3162
   */
3163 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3164
  {
3165 1
    $encoding = self::normalizeEncoding($encoding);
3166
3167 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3168
  }
3169
3170
  /**
3171
   * checks whether iconv is available on the server
3172
   *
3173
   * @return   bool True if available, False otherwise
3174
   */
3175 1
  public static function iconv_loaded()
3176
  {
3177 1
    return extension_loaded('iconv') ? true : false;
3178
  }
3179
3180
  /**
3181
   * Converts Integer to hexadecimal U+xxxx code point representation.
3182
   *
3183
   * INFO: opposite to UTF8::hex_to_int()
3184
   *
3185
   * @param    int    $int The integer to be converted to hexadecimal code point.
3186
   * @param    string $pfix
3187
   *
3188
   * @return   string The code point, or empty string on failure.
3189
   */
3190 2
  public static function int_to_hex($int, $pfix = 'U+')
3191
  {
3192 2
    if (ctype_digit((string)$int)) {
3193 2
      $hex = dechex((int)$int);
3194
3195 2
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3196
3197 2
      return $pfix . $hex;
3198
    }
3199
3200
    return '';
3201
  }
3202
3203
  /**
3204
   * checks whether intl-char is available on the server
3205
   *
3206
   * @return   bool True if available, False otherwise
3207
   */
3208 1
  public static function intlChar_loaded()
3209
  {
3210 1
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3211
  }
3212
3213
  /**
3214
   * checks whether intl is available on the server
3215
   *
3216
   * @return   bool True if available, False otherwise
3217
   */
3218 3
  public static function intl_loaded()
3219
  {
3220 3
    return extension_loaded('intl') ? true : false;
3221
  }
3222
3223
  /**
3224
   * alias for "UTF8::is_ascii()"
3225
   *
3226
   * @see UTF8::is_ascii()
3227
   *
3228
   * @param string $str
3229
   *
3230
   * @return boolean
3231
   */
3232 2
  public static function isAscii($str)
3233
  {
3234 2
    return self::is_ascii($str);
3235
  }
3236
3237
  /**
3238
   * alias for "UTF8::is_base64()"
3239
   *
3240
   * @see UTF8::is_base64()
3241
   *
3242
   * @param string $str
3243
   *
3244
   * @return bool
3245
   */
3246 1
  public static function isBase64($str)
3247
  {
3248 1
    return self::is_base64($str);
3249
  }
3250
3251
  /**
3252
   * alias for "UTF8::is_binary()"
3253
   *
3254
   * @see UTF8::is_binary()
3255
   *
3256
   * @param string $str
3257
   *
3258
   * @return bool
3259
   */
3260
  public static function isBinary($str)
3261
  {
3262
    return self::is_binary($str);
3263
  }
3264
3265
  /**
3266
   * alias for "UTF8::is_bom()"
3267
   *
3268
   * @see UTF8::is_bom()
3269
   *
3270
   * @param string $utf8_chr
3271
   *
3272
   * @return boolean
3273
   */
3274
  public static function isBom($utf8_chr)
3275
  {
3276
    return self::is_bom($utf8_chr);
3277
  }
3278
3279
  /**
3280
   * alias for "UTF8::is_html()"
3281
   *
3282
   * @see UTF8::is_html()
3283
   *
3284
   * @param string $str
3285
   *
3286
   * @return boolean
3287
   */
3288 1
  public static function isHtml($str)
3289
  {
3290 1
    return self::is_html($str);
3291
  }
3292
3293
  /**
3294
   * alias for "UTF8::is_json()"
3295
   *
3296
   * @see UTF8::is_json()
3297
   *
3298
   * @param string $str
3299
   *
3300
   * @return bool
3301
   */
3302
  public static function isJson($str)
3303
  {
3304
    return self::is_json($str);
3305
  }
3306
3307
  /**
3308
   * alias for "UTF8::is_utf16()"
3309
   *
3310
   * @see UTF8::is_utf16()
3311
   *
3312
   * @param string $str
3313
   *
3314
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3315
   */
3316 1
  public static function isUtf16($str)
3317
  {
3318 1
    return self::is_utf16($str);
3319
  }
3320
3321
  /**
3322
   * alias for "UTF8::is_utf32()"
3323
   *
3324
   * @see UTF8::is_utf32()
3325
   *
3326
   * @param string $str
3327
   *
3328
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3329
   */
3330 1
  public static function isUtf32($str)
3331
  {
3332 1
    return self::is_utf32($str);
3333
  }
3334
3335
  /**
3336
   * alias for "UTF8::is_utf8()"
3337
   *
3338
   * @see UTF8::is_utf8()
3339
   *
3340
   * @param string $str
3341
   * @param  bool  $strict
3342
   *
3343
   * @return bool
3344
   */
3345 16
  public static function isUtf8($str, $strict = false)
3346
  {
3347 16
    return self::is_utf8($str, $strict);
3348
  }
3349
3350
  /**
3351
   * Checks if a string is 7 bit ASCII.
3352
   *
3353
   * @param    string $str The string to check.
3354
   *
3355
   * @return   bool <strong>true</strong> if it is ASCII<br />
3356
   *                <strong>false</strong> otherwise
3357
   */
3358 14
  public static function is_ascii($str)
3359
  {
3360 14
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3361
  }
3362
3363
  /**
3364
   * Returns true if the string is base64 encoded, false otherwise.
3365
   *
3366
   * @param string $str
3367
   *
3368
   * @return bool Whether or not $str is base64 encoded
3369
   */
3370 1
  public static function is_base64($str)
3371
  {
3372 1
    $str = (string)$str;
3373
3374 1
    if (!isset($str[0])) {
3375 1
      return false;
3376
    }
3377
3378 1
    if (base64_encode(base64_decode($str, true)) === $str) {
3379 1
      return true;
3380
    } else {
3381 1
      return false;
3382
    }
3383
  }
3384
3385
  /**
3386
   * Check if the input is binary... (is look like a hack).
3387
   *
3388
   * @param mixed $input
3389
   *
3390
   * @return bool
3391
   */
3392 16
  public static function is_binary($input)
3393
  {
3394
3395 16
    $testLength = strlen($input);
3396
3397
    if (
3398 16
        preg_match('~^[01]+$~', $input)
3399
        ||
3400 16
        substr_count($input, "\x00") > 0
3401 16
        ||
3402 15
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3403 16
    ) {
3404 6
      return true;
3405
    } else {
3406 15
      return false;
3407
    }
3408
  }
3409
3410
  /**
3411
   * Check if the file is binary.
3412
   *
3413
   * @param string $file
3414
   *
3415
   * @return boolean
3416
   */
3417
  public static function is_binary_file($file)
3418
  {
3419
    try {
3420
      $fp = fopen($file, 'r');
3421
      $block = fread($fp, 512);
3422
      fclose($fp);
3423
    } catch (\Exception $e) {
3424
      $block = '';
3425
    }
3426
3427
    return self::is_binary($block);
3428
  }
3429
3430
  /**
3431
   * Checks if the given string is equal to any "Byte Order Mark".
3432
   *
3433
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3434
   *
3435
   * @param    string $str The input string.
3436
   *
3437
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3438
   */
3439
  public static function is_bom($str)
3440
  {
3441
    foreach (self::$bom as $bomString => $bomByteLength) {
3442
      if ($str === $bomString) {
3443
        return true;
3444
      }
3445
    }
3446
3447
    return false;
3448
  }
3449
3450
  /**
3451
   * Check if the string contains any html-tags <lall>.
3452
   *
3453
   * @param string $str
3454
   *
3455
   * @return boolean
3456
   */
3457 1
  public static function is_html($str)
3458
  {
3459 1
    $str = (string)$str;
3460
3461 1
    if (!isset($str[0])) {
3462
      return false;
3463
    }
3464
3465
    // init
3466 1
    $matches = array();
3467
3468 1
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3469
3470 1
    if (count($matches) == 0) {
3471 1
      return false;
3472
    } else {
3473 1
      return true;
3474
    }
3475
  }
3476
3477
  /**
3478
   * Try to check if "$str" is an json-string.
3479
   *
3480
   * @param string $str
3481
   *
3482
   * @return bool
3483
   */
3484 1
  public static function is_json($str)
3485
  {
3486 1
    $str = (string)$str;
3487
3488 1
    if (!isset($str[0])) {
3489
      return false;
3490
    }
3491
3492
    if (
3493 1
        is_object(self::json_decode($str))
3494 1
        &&
3495 1
        json_last_error() === JSON_ERROR_NONE
3496 1
    ) {
3497 1
      return true;
3498
    } else {
3499 1
      return false;
3500
    }
3501
  }
3502
3503
  /**
3504
   * Check if the string is UTF-16.
3505
   *
3506
   * @param string $str
3507
   *
3508
   * @return int|false false if is't not UTF-16, 1 for UTF-16LE, 2 for UTF-16BE.
3509
   */
3510 4 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3511
  {
3512 4
    $str = self::remove_bom($str);
3513
3514 4
    if (self::is_binary($str)) {
3515 4
      self::checkForSupport();
3516
3517 4
      $maybeUTF16LE = 0;
3518 4
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3519 4
      if ($test) {
3520 4
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3521 4
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3522 4
        if ($test3 === $test) {
3523 4
          $strChars = self::count_chars($str, true);
3524 4
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3525 4
            if (in_array($test3char, $strChars, true) === true) {
3526 2
              $maybeUTF16LE++;
3527 2
            }
3528 4
          }
3529 4
        }
3530 4
      }
3531
3532 4
      $maybeUTF16BE = 0;
3533 4
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3534 4
      if ($test) {
3535 4
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3536 4
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3537 4
        if ($test3 === $test) {
3538 4
          $strChars = self::count_chars($str, true);
3539 4
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3540 4
            if (in_array($test3char, $strChars, true) === true) {
3541 3
              $maybeUTF16BE++;
3542 3
            }
3543 4
          }
3544 4
        }
3545 4
      }
3546
3547 4
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3548 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3549 2
          return 1;
3550
        } else {
3551 3
          return 2;
3552
        }
3553
      }
3554
3555 3
    }
3556
3557 3
    return false;
3558
  }
3559
3560
  /**
3561
   * Check if the string is UTF-32.
3562
   *
3563
   * @param string $str
3564
   *
3565
   * @return int|false false if is't not UTF-16, 1 for UTF-32LE, 2 for UTF-32BE.
3566
   */
3567 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3568
  {
3569 3
    $str = self::remove_bom($str);
3570
3571 3
    if (self::is_binary($str)) {
3572 3
      self::checkForSupport();
3573
3574 3
      $maybeUTF32LE = 0;
3575 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3576 3
      if ($test) {
3577 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3578 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3579 2
        if ($test3 === $test) {
3580 2
          $strChars = self::count_chars($str, true);
3581 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3582 2
            if (in_array($test3char, $strChars, true) === true) {
3583 1
              $maybeUTF32LE++;
3584 1
            }
3585 2
          }
3586 2
        }
3587 2
      }
3588
3589 3
      $maybeUTF32BE = 0;
3590 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3591 3
      if ($test) {
3592 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3593 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3594 2
        if ($test3 === $test) {
3595 2
          $strChars = self::count_chars($str, true);
3596 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3597 2
            if (in_array($test3char, $strChars, true) === true) {
3598 1
              $maybeUTF32BE++;
3599 1
            }
3600 2
          }
3601 2
        }
3602 2
      }
3603
3604 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3605 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3606 1
          return 1;
3607
        } else {
3608 1
          return 2;
3609
        }
3610
      }
3611
3612 3
    }
3613
3614 3
    return false;
3615
  }
3616
3617
  /**
3618
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3619
   *
3620
   * @see    http://hsivonen.iki.fi/php-utf8/
3621
   *
3622
   * @param  string $str    The string to be checked.
3623
   * @param  bool   $strict Check also if the string is not UTF-16 or UTF-32.
3624
   *
3625
   * @return bool
3626
   */
3627 43
  public static function is_utf8($str, $strict = false)
3628
  {
3629 43
    $str = (string)$str;
3630
3631 43
    if (!isset($str[0])) {
3632 3
      return true;
3633
    }
3634
3635 41
    if ($strict === true) {
3636 1
      if (self::is_utf16($str) !== false) {
3637 1
        return false;
3638
      }
3639
3640
      if (self::is_utf32($str) !== false) {
3641
        return false;
3642
      }
3643
    }
3644
3645 41
    if (self::pcre_utf8_support() !== true) {
3646
3647
      // If even just the first character can be matched, when the /u
3648
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3649
      // invalid, nothing at all will match, even if the string contains
3650
      // some valid sequences
3651
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3652
3653
    } else {
3654
3655 41
      $mState = 0; // cached expected number of octets after the current octet
3656
      // until the beginning of the next UTF8 character sequence
3657 41
      $mUcs4 = 0; // cached Unicode character
3658 41
      $mBytes = 1; // cached expected number of octets in the current sequence
3659 41
      $len = strlen($str);
3660
3661
      /** @noinspection ForeachInvariantsInspection */
3662 41
      for ($i = 0; $i < $len; $i++) {
3663 41
        $in = ord($str[$i]);
3664 41
        if ($mState === 0) {
3665
          // When mState is zero we expect either a US-ASCII character or a
3666
          // multi-octet sequence.
3667 41
          if (0 === (0x80 & $in)) {
3668
            // US-ASCII, pass straight through.
3669 36
            $mBytes = 1;
3670 41 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3671
            // First octet of 2 octet sequence.
3672 34
            $mUcs4 = $in;
3673 34
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3674 34
            $mState = 1;
3675 34
            $mBytes = 2;
3676 39
          } elseif (0xE0 === (0xF0 & $in)) {
3677
            // First octet of 3 octet sequence.
3678 21
            $mUcs4 = $in;
3679 21
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3680 21
            $mState = 2;
3681 21
            $mBytes = 3;
3682 33 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3683
            // First octet of 4 octet sequence.
3684 9
            $mUcs4 = $in;
3685 9
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3686 9
            $mState = 3;
3687 9
            $mBytes = 4;
3688 16
          } elseif (0xF8 === (0xFC & $in)) {
3689
            /* First octet of 5 octet sequence.
3690
            *
3691
            * This is illegal because the encoded codepoint must be either
3692
            * (a) not the shortest form or
3693
            * (b) outside the Unicode range of 0-0x10FFFF.
3694
            * Rather than trying to resynchronize, we will carry on until the end
3695
            * of the sequence and let the later error handling code catch it.
3696
            */
3697 3
            $mUcs4 = $in;
3698 3
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3699 3
            $mState = 4;
3700 3
            $mBytes = 5;
3701 9 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3702
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3703 3
            $mUcs4 = $in;
3704 3
            $mUcs4 = ($mUcs4 & 1) << 30;
3705 3
            $mState = 5;
3706 3
            $mBytes = 6;
3707 3
          } else {
3708
            /* Current octet is neither in the US-ASCII range nor a legal first
3709
             * octet of a multi-octet sequence.
3710
             */
3711 5
            return false;
3712
          }
3713 41
        } else {
3714
          // When mState is non-zero, we expect a continuation of the multi-octet
3715
          // sequence
3716 36
          if (0x80 === (0xC0 & $in)) {
3717
            // Legal continuation.
3718 33
            $shift = ($mState - 1) * 6;
3719 33
            $tmp = $in;
3720 33
            $tmp = ($tmp & 0x0000003F) << $shift;
3721 33
            $mUcs4 |= $tmp;
3722
            /**
3723
             * End of the multi-octet sequence. mUcs4 now contains the final
3724
             * Unicode code point to be output
3725
             */
3726 33
            if (0 === --$mState) {
3727
              /*
3728
              * Check for illegal sequences and code points.
3729
              */
3730
              // From Unicode 3.1, non-shortest form is illegal
3731
              if (
3732 33
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3733 33
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3734 33
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3735 33
                  (4 < $mBytes) ||
3736
                  // From Unicode 3.2, surrogate characters are illegal.
3737 33
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3738
                  // Code points outside the Unicode range are illegal.
3739 33
                  ($mUcs4 > 0x10FFFF)
3740 33
              ) {
3741 5
                return false;
3742
              }
3743
              // initialize UTF8 cache
3744 33
              $mState = 0;
3745 33
              $mUcs4 = 0;
3746 33
              $mBytes = 1;
3747 33
            }
3748 33
          } else {
3749
            /**
3750
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3751
             * Incomplete multi-octet sequence.
3752
             */
3753 18
            return false;
3754
          }
3755
        }
3756 41
      }
3757
3758 20
      return true;
3759
    }
3760
  }
3761
3762
  /**
3763
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3764
   * Decodes a JSON string
3765
   *
3766
   * @link http://php.net/manual/en/function.json-decode.php
3767
   *
3768
   * @param string $json    <p>
3769
   *                        The <i>json</i> string being decoded.
3770
   *                        </p>
3771
   *                        <p>
3772
   *                        This function only works with UTF-8 encoded strings.
3773
   *                        </p>
3774
   *                        <p>PHP implements a superset of
3775
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3776
   *                        only supports these values when they are nested inside an array or an object.
3777
   *                        </p>
3778
   * @param bool   $assoc   [optional] <p>
3779
   *                        When <b>TRUE</b>, returned objects will be converted into
3780
   *                        associative arrays.
3781
   *                        </p>
3782
   * @param int    $depth   [optional] <p>
3783
   *                        User specified recursion depth.
3784
   *                        </p>
3785
   * @param int    $options [optional] <p>
3786
   *                        Bitmask of JSON decode options. Currently only
3787
   *                        <b>JSON_BIGINT_AS_STRING</b>
3788
   *                        is supported (default is to cast large integers as floats)
3789
   *                        </p>
3790
   *
3791
   * @return mixed the value encoded in <i>json</i> in appropriate
3792
   * PHP type. Values true, false and
3793
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3794
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3795
   * <i>json</i> cannot be decoded or if the encoded
3796
   * data is deeper than the recursion limit.
3797
   */
3798 2
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3799
  {
3800 2
    $json = self::filter($json);
3801
3802 2
    if (Bootup::is_php('5.4') === true) {
3803
      $json = json_decode($json, $assoc, $depth, $options);
3804
    } else {
3805 2
      $json = json_decode($json, $assoc, $depth);
3806
    }
3807
3808 2
    return $json;
3809
  }
3810
3811
  /**
3812
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3813
   * Returns the JSON representation of a value.
3814
   *
3815
   * @link http://php.net/manual/en/function.json-encode.php
3816
   *
3817
   * @param mixed $value   <p>
3818
   *                       The <i>value</i> being encoded. Can be any type except
3819
   *                       a resource.
3820
   *                       </p>
3821
   *                       <p>
3822
   *                       All string data must be UTF-8 encoded.
3823
   *                       </p>
3824
   *                       <p>PHP implements a superset of
3825
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3826
   *                       only supports these values when they are nested inside an array or an object.
3827
   *                       </p>
3828
   * @param int   $options [optional] <p>
3829
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3830
   *                       <b>JSON_HEX_TAG</b>,
3831
   *                       <b>JSON_HEX_AMP</b>,
3832
   *                       <b>JSON_HEX_APOS</b>,
3833
   *                       <b>JSON_NUMERIC_CHECK</b>,
3834
   *                       <b>JSON_PRETTY_PRINT</b>,
3835
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3836
   *                       <b>JSON_FORCE_OBJECT</b>,
3837
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3838
   *                       constants is described on
3839
   *                       the JSON constants page.
3840
   *                       </p>
3841
   * @param int   $depth   [optional] <p>
3842
   *                       Set the maximum depth. Must be greater than zero.
3843
   *                       </p>
3844
   *
3845
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3846
   */
3847 2
  public static function json_encode($value, $options = 0, $depth = 512)
3848
  {
3849 2
    $value = self::filter($value);
3850
3851 2
    if (Bootup::is_php('5.5')) {
3852
      $json = json_encode($value, $options, $depth);
3853
    } else {
3854 2
      $json = json_encode($value, $options);
3855
    }
3856
3857 2
    return $json;
3858
  }
3859
3860
  /**
3861
   * Makes string's first char lowercase.
3862
   *
3863
   * @param    string $str The input string
3864
   *
3865
   * @return   string The resulting string
3866
   */
3867 6
  public static function lcfirst($str)
3868
  {
3869 6
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3870
  }
3871
3872
  /**
3873
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3874
   *
3875
   * @param  string $str   The string to be trimmed
3876
   * @param  string $chars Optional characters to be stripped
3877
   *
3878
   * @return string The string with unwanted characters stripped from the left
3879
   */
3880 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3881
  {
3882 24
    $str = (string)$str;
3883
3884 24
    if (!isset($str[0])) {
3885 2
      return '';
3886
    }
3887
3888
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3889 23
    if ($chars === INF || !$chars) {
3890 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3891
    }
3892
3893 23
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3894
3895 23
    return preg_replace("/^{$chars}+/u", '', $str);
3896
  }
3897
3898
  /**
3899
   * Returns the UTF-8 character with the maximum code point in the given data.
3900
   *
3901
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3902
   *
3903
   * @return   string The character with the highest code point than others.
3904
   */
3905 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3906
  {
3907 1
    if (is_array($arg)) {
3908
      $arg = implode($arg);
3909
    }
3910
3911 1
    return self::chr(max(self::codepoints($arg)));
3912
  }
3913
3914
  /**
3915
   * Calculates and returns the maximum number of bytes taken by any
3916
   * UTF-8 encoded character in the given string.
3917
   *
3918
   * @param  string $str The original Unicode string.
3919
   *
3920
   * @return int Max byte lengths of the given chars.
3921
   */
3922 1
  public static function max_chr_width($str)
3923
  {
3924 1
    $bytes = self::chr_size_list($str);
3925 1
    if (count($bytes) > 0) {
3926 1
      return (int)max($bytes);
3927
    } else {
3928 1
      return 0;
3929
    }
3930
  }
3931
3932
  /**
3933
   * checks whether mbstring is available on the server
3934
   *
3935
   * @return   bool True if available, False otherwise
3936
   */
3937 2
  public static function mbstring_loaded()
3938
  {
3939 2
    $return = extension_loaded('mbstring');
3940
3941 2
    if ($return === true) {
3942 2
      \mb_internal_encoding('UTF-8');
3943 2
    }
3944
3945 2
    return $return;
3946
  }
3947
3948
  /**
3949
   * Returns the UTF-8 character with the minimum code point in the given data.
3950
   *
3951
   * @param  mixed $arg A UTF-8 encoded string or an array of such strings.
3952
   *
3953
   * @return string The character with the lowest code point than others.
3954
   */
3955 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3956
  {
3957 1
    if (is_array($arg)) {
3958
      $arg = implode($arg);
3959
    }
3960
3961 1
    return self::chr(min(self::codepoints($arg)));
3962
  }
3963
3964
  /**
3965
   * alias for "UTF8::normalize_encoding()"
3966
   *
3967
   * @see UTF8::normalize_encoding()
3968
   *
3969
   * @param string $encoding
3970
   *
3971
   * @return string
3972
   */
3973 125
  public static function normalizeEncoding($encoding)
3974
  {
3975 125
    return self::normalize_encoding($encoding);
3976
  }
3977
3978
  /**
3979
   * Normalize the encoding-"name" input.
3980
   *
3981
   * @param  string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3982
   *
3983
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3984
   */
3985 125
  public static function normalize_encoding($encoding)
3986
  {
3987 125
    static $staticNormalizeEncodingCache = array();
3988
3989 125
    if (!$encoding) {
3990 1
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3991
    }
3992
3993 125
    if ('UTF-8' === $encoding) {
3994 125
      return $encoding;
3995
    }
3996
3997 2
    if (in_array($encoding, self::$iconvEncoding, true)) {
3998 2
      return $encoding;
3999
    }
4000
4001 2
    if (isset($staticNormalizeEncodingCache[$encoding])) {
4002 2
      return $staticNormalizeEncodingCache[$encoding];
4003
    }
4004
4005 2
    $encodingOrig = $encoding;
4006 2
    $encoding = strtoupper($encoding);
4007 2
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4008
4009
    $equivalences = array(
4010 2
        'ISO88591'    => 'ISO-8859-1',
4011 2
        'ISO8859'     => 'ISO-8859-1',
4012 2
        'ISO'         => 'ISO-8859-1',
4013 2
        'LATIN1'      => 'ISO-8859-1',
4014 2
        'LATIN'       => 'ISO-8859-1',
4015 2
        'UTF16'       => 'UTF-16',
4016 2
        'UTF32'       => 'UTF-32',
4017 2
        'UTF8'        => 'UTF-8',
4018 2
        'UTF'         => 'UTF-8',
4019 2
        'UTF7'        => 'UTF-7',
4020 2
        'WIN1252'     => 'ISO-8859-1',
4021 2
        'WINDOWS1252' => 'ISO-8859-1',
4022 2
        '8BIT'        => 'CP850',
4023 2
        'BINARY'      => 'CP850',
4024 2
    );
4025
4026 2
    if (!empty($equivalences[$encodingUpperHelper])) {
4027 2
      $encoding = $equivalences[$encodingUpperHelper];
4028 2
    }
4029
4030 2
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
4031
4032 2
    return $encoding;
4033
  }
4034
4035
  /**
4036
   * Normalize some MS Word special characters.
4037
   *
4038
   * @param string $str The string to be normalized.
4039
   *
4040
   * @return string
4041
   */
4042 2
  public static function normalize_msword($str)
4043
  {
4044 2
    static $utf8MSWordKeys = null;
4045 2
    static $utf8MSWordValues = null;
4046
4047 2
    if ($utf8MSWordKeys === null) {
4048 1
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
4049 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
4050 1
    }
4051
4052 2
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
4053
  }
4054
4055
  /**
4056
   * Normalize the whitespace.
4057
   *
4058
   * @param string $str                     The string to be normalized.
4059
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
4060
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
4061
   *
4062
   * @return string
4063
   */
4064 7
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
4065
  {
4066 7
    static $whitespaces = array();
4067 7
    static $bidiUniCodeControls = null;
4068
4069 7
    $cacheKey = (int)$keepNonBreakingSpace;
4070
4071 7
    if (!isset($whitespaces[$cacheKey])) {
4072
4073 2
      $whitespaces[$cacheKey] = self::$whitespaceTable;
4074
4075 2
      if ($keepNonBreakingSpace === true) {
4076
        /** @noinspection OffsetOperationsInspection */
4077 1
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
4078 1
      }
4079
4080 2
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
4081 2
    }
4082
4083 7
    if ($keepBidiUnicodeControls === false) {
4084 7
      if ($bidiUniCodeControls === null) {
4085 1
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
4086 1
      }
4087
4088 7
      $str = str_replace($bidiUniCodeControls, '', $str);
4089 7
    }
4090
4091 7
    return str_replace($whitespaces[$cacheKey], ' ', $str);
4092
  }
4093
4094
  /**
4095
   * Format a number with grouped thousands.
4096
   *
4097
   * @param float  $number
4098
   * @param int    $decimals
4099
   * @param string $dec_point
4100
   * @param string $thousands_sep
4101
   *
4102
   * @deprecated
4103
   *
4104
   * @return string
4105
   */
4106
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4107
  {
4108
    $thousands_sep = (string)$thousands_sep;
4109
    $dec_point = (string)$dec_point;
4110
4111
    if (
4112
        isset($thousands_sep[1], $dec_point[1])
4113
        &&
4114
        Bootup::is_php('5.4') === true
4115
    ) {
4116
      return str_replace(
4117
          array(
4118
              '.',
4119
              ',',
4120
          ),
4121
          array(
4122
              $dec_point,
4123
              $thousands_sep,
4124
          ),
4125
          number_format($number, $decimals, '.', ',')
4126
      );
4127
    }
4128
4129
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4130
  }
4131
4132
  /**
4133
   * Calculates Unicode code point of the given UTF-8 encoded character.
4134
   *
4135
   * INFO: opposite to UTF8::chr()
4136
   *
4137
   * @param  string $chr The character of which to calculate code point.
4138
   *
4139
   * @return int Unicode code point of the given character,<br />
4140
   *         0 on invalid UTF-8 byte sequence.
4141
   */
4142 16
  public static function ord($chr)
4143
  {
4144 16
    if (!$chr && $chr !== '0') {
4145 2
      return 0;
4146
    }
4147
4148
    // init
4149 15
    self::checkForSupport();
4150
4151 15
    if (self::$support['intlChar'] === true) {
4152
      $tmpReturn = \IntlChar::ord($chr);
4153
      if ($tmpReturn) {
4154
        return $tmpReturn;
4155
      }
4156
    }
4157
4158 15
    $chr = unpack('C*', substr($chr, 0, 4));
4159 15
    $a = $chr ? $chr[1] : 0;
4160
4161 15
    if (0xF0 <= $a && isset($chr[4])) {
4162 3
      return (($a - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
4163
    }
4164
4165 14
    if (0xE0 <= $a && isset($chr[3])) {
4166 9
      return (($a - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
4167
    }
4168
4169 12
    if (0xC0 <= $a && isset($chr[2])) {
4170 9
      return (($a - 0xC0) << 6) + $chr[2] - 0x80;
4171
    }
4172
4173 11
    return $a;
4174
  }
4175
4176
  /**
4177
   * Parses the string into an array (into the the second parameter).
4178
   *
4179
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4180
   *          if the second parameter is not set!
4181
   *
4182
   * @link http://php.net/manual/en/function.parse-str.php
4183
   *
4184
   * @param string $str     <p>
4185
   *                        The input string.
4186
   *                        </p>
4187
   * @param array  $result  <p>
4188
   *                        The result will be returned into this reference parameter.
4189
   *                        </p>
4190
   *
4191
   * @return bool will return false if php can't parse the string and we haven't any $result
4192
   */
4193 1
  public static function parse_str($str, &$result)
4194
  {
4195
    // init
4196 1
    self::checkForSupport();
4197
4198 1
    $str = self::clean($str);
4199
4200 1
    $return = \mb_parse_str($str, $result);
4201 1
    if ($return === false || empty($result)) {
4202 1
      return false;
4203
    }
4204
4205 1
    return true;
4206
  }
4207
4208
  /**
4209
   * checks if \u modifier is available that enables Unicode support in PCRE.
4210
   *
4211
   * @return   bool True if support is available, false otherwise
4212
   */
4213 41
  public static function pcre_utf8_support()
4214
  {
4215
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4216 41
    return (bool)@preg_match('//u', '');
4217
  }
4218
4219
  /**
4220
   * Create an array containing a range of UTF-8 characters.
4221
   *
4222
   * @param  mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4223
   * @param  mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4224
   *
4225
   * @return array
4226
   */
4227 1
  public static function range($var1, $var2)
4228
  {
4229 1
    if (!$var1 || !$var2) {
4230 1
      return array();
4231
    }
4232
4233 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4234 1
      $start = (int)$var1;
4235 1
    } elseif (ctype_xdigit($var1)) {
4236
      $start = (int)self::hex_to_int($var1);
4237
    } else {
4238 1
      $start = self::ord($var1);
4239
    }
4240
4241 1
    if (!$start) {
4242
      return array();
4243
    }
4244
4245 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4246 1
      $end = (int)$var2;
4247 1
    } elseif (ctype_xdigit($var2)) {
4248
      $end = (int)self::hex_to_int($var2);
4249
    } else {
4250 1
      $end = self::ord($var2);
4251
    }
4252
4253 1
    if (!$end) {
4254
      return array();
4255
    }
4256
4257 1
    return array_map(
4258
        array(
4259 1
            '\\voku\\helper\\UTF8',
4260 1
            'chr',
4261 1
        ),
4262 1
        range($start, $end)
4263 1
    );
4264
  }
4265
4266
  /**
4267
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4268
   *
4269
   * @param string $str
4270
   *
4271
   * @return string
4272
   */
4273 10
  public static function remove_bom($str)
4274
  {
4275 10
    foreach (self::$bom as $bomString => $bomByteLength) {
4276 10
      if (0 === strpos($str, $bomString)) {
4277 5
        $str = substr($str, $bomByteLength);
4278 5
      }
4279 10
    }
4280
4281 10
    return $str;
4282
  }
4283
4284
  /**
4285
   * alias for "UTF8::remove_bom()"
4286
   *
4287
   * @see UTF8::remove_bom()
4288
   *
4289
   * @param string $str
4290
   *
4291
   * @return string
4292
   */
4293 5
  public static function removeBOM($str)
4294
  {
4295 5
    return self::remove_bom($str);
4296
  }
4297
4298
  /**
4299
   * Removes duplicate occurrences of a string in another string.
4300
   *
4301
   * @param    string       $str  The base string
4302
   * @param    string|array $what String to search for in the base string
4303
   *
4304
   * @return   string The result string with removed duplicates
4305
   */
4306 1
  public static function remove_duplicates($str, $what = ' ')
4307
  {
4308 1
    if (is_string($what)) {
4309 1
      $what = array($what);
4310 1
    }
4311
4312 1
    if (is_array($what)) {
4313 1
      foreach ($what as $item) {
4314 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4315 1
      }
4316 1
    }
4317
4318 1
    return $str;
4319
  }
4320
4321
  /**
4322
   * Remove invisible characters from a string.
4323
   *
4324
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4325
   *
4326
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4327
   *
4328
   * @param  string $str
4329
   * @param  bool   $url_encoded
4330
   * @param  string $replacement
4331
   *
4332
   * @return  string
4333
   */
4334 42
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4335
  {
4336
    // init
4337 42
    $non_displayables = array();
4338
4339
    // every control character except newline (dec 10),
4340
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4341 42
    if ($url_encoded) {
4342 42
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4343 42
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4344 42
    }
4345
4346 42
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4347
4348
    do {
4349 42
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4350 42
    } while ($count !== 0);
4351
4352 42
    return $str;
4353
  }
4354
4355
  /**
4356
   * Replace the diamond question mark (�) with the replacement.
4357
   *
4358
   * @param string $str
4359
   * @param string $unknown
4360
   *
4361
   * @return string
4362
   */
4363 42
  public static function replace_diamond_question_mark($str, $unknown = '?')
4364
  {
4365 42
    return str_replace(
4366
        array(
4367 42
            "\xEF\xBF\xBD",
4368 42
            '�',
4369 42
        ),
4370
        array(
4371 42
            $unknown,
4372 42
            $unknown,
4373 42
        ),
4374
        $str
4375 42
    );
4376
  }
4377
4378
  /**
4379
   * Strip whitespace or other characters from end of a UTF-8 string.
4380
   *
4381
   * @param    string $str   The string to be trimmed
4382
   * @param    string $chars Optional characters to be stripped
4383
   *
4384
   * @return   string The string with unwanted characters stripped from the right
4385
   */
4386 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4387
  {
4388 23
    $str = (string)$str;
4389
4390 23
    if (!isset($str[0])) {
4391 5
      return '';
4392
    }
4393
4394
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4395 19
    if ($chars === INF || !$chars) {
4396 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4397
    }
4398
4399 18
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4400
4401 18
    return preg_replace("/{$chars}+$/u", '', $str);
4402
  }
4403
4404
  /**
4405
   * rxClass
4406
   *
4407
   * @param string $s
4408
   * @param string $class
4409
   *
4410
   * @return string
4411
   */
4412 45
  protected static function rxClass($s, $class = '')
4413
  {
4414 45
    static $rxClassCache = array();
4415
4416 45
    $cacheKey = $s . $class;
4417
4418 45
    if (isset($rxClassCache[$cacheKey])) {
4419 34
      return $rxClassCache[$cacheKey];
4420
    }
4421
4422 17
    $class = array($class);
4423
4424
    /** @noinspection SuspiciousLoopInspection */
4425 17
    foreach (self::str_split($s) as $s) {
4426 17
      if ('-' === $s) {
4427
        $class[0] = '-' . $class[0];
4428 17
      } elseif (!isset($s[2])) {
4429 17
        $class[0] .= preg_quote($s, '/');
4430 17
      } elseif (1 === self::strlen($s)) {
4431 2
        $class[0] .= $s;
4432 2
      } else {
4433
        $class[] = $s;
4434
      }
4435 17
    }
4436
4437 17
    if ($class[0]) {
4438 17
      $class[0] = '[' . $class[0] . ']';
4439 17
    }
4440
4441 17
    if (1 === count($class)) {
4442 17
      $return = $class[0];
4443 17
    } else {
4444
      $return = '(?:' . implode('|', $class) . ')';
4445
    }
4446
4447 17
    $rxClassCache[$cacheKey] = $return;
4448
4449 17
    return $return;
4450
  }
4451
4452
  /**
4453
   * Echo native UTF8-Support libs, e.g. for debugging.
4454
   */
4455
  public static function showSupport()
4456
  {
4457
    foreach (self::$support as $utf8Support) {
4458
      echo $utf8Support . "\n<br>";
4459
    }
4460
  }
4461
4462
  /**
4463
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4464
   *
4465
   * @param    string $char           The Unicode character to be encoded as numbered entity.
4466
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4467
   *
4468
   * @return   string The HTML numbered entity.
4469
   */
4470 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
4471
  {
4472 1
    if (!$char) {
4473 1
      return '';
4474
    }
4475
4476
    if (
4477
        $keepAsciiChars === true
4478 1
        &&
4479 1
        self::isAscii($char) === true
4480 1
    ) {
4481 1
      return $char;
4482
    }
4483
4484 1
    return '&#' . self::ord($char) . ';';
4485
  }
4486
4487
  /**
4488
   * Convert a string to an array of Unicode characters.
4489
   *
4490
   * @param    string  $str       The string to split into array.
4491
   * @param    int     $length    Max character length of each array element.
4492
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4493
   *
4494
   * @return   array An array containing chunks of the string.
4495
   */
4496 36
  public static function split($str, $length = 1, $cleanUtf8 = false)
4497
  {
4498 36
    $str = (string)$str;
4499
4500 36
    if (!isset($str[0])) {
4501 4
      return array();
4502
    }
4503
4504
    // init
4505 35
    self::checkForSupport();
4506 35
    $str = (string)$str;
4507 35
    $ret = array();
4508
4509 35
    if (self::$support['pcre_utf8'] === true) {
4510
4511 35
      if ($cleanUtf8 === true) {
4512 6
        $str = self::clean($str);
4513 6
      }
4514
4515 35
      preg_match_all('/./us', $str, $retArray);
4516 35
      if (isset($retArray[0])) {
4517 35
        $ret = $retArray[0];
4518 35
      }
4519 35
      unset($retArray);
4520
4521 35
    } else {
4522
4523
      // fallback
4524
4525
      $len = strlen($str);
4526
4527
      /** @noinspection ForeachInvariantsInspection */
4528
      for ($i = 0; $i < $len; $i++) {
4529
        if (($str[$i] & "\x80") === "\x00") {
4530
          $ret[] = $str[$i];
4531
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4532
          if (($str[$i + 1] & "\xC0") === "\x80") {
4533
            $ret[] = $str[$i] . $str[$i + 1];
4534
4535
            $i++;
4536
          }
4537 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4538
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4539
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4540
4541
            $i += 2;
4542
          }
4543
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4544 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4545
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4546
4547
            $i += 3;
4548
          }
4549
        }
4550
      }
4551
    }
4552
4553 35
    if ($length > 1) {
4554 5
      $ret = array_chunk($ret, $length);
4555
4556 5
      $ret = array_map('implode', $ret);
4557 5
    }
4558
4559
    /** @noinspection OffsetOperationsInspection */
4560 35
    if (isset($ret[0]) && $ret[0] === '') {
4561
      return array();
4562
    }
4563
4564 35
    return $ret;
4565
  }
4566
4567
  /**
4568
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4569
   *
4570
   * @param string $str
4571
   *
4572
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4573
   *                      otherwise it will return false.
4574
   */
4575 12
  public static function str_detect_encoding($str)
4576
  {
4577
4578
    //
4579
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4580
    //
4581
4582 12
    if (self::is_binary($str)) {
4583 2
      if (self::is_utf16($str) === 1) {
4584 1
        return 'UTF-16LE';
4585 2
      } elseif (self::is_utf16($str) === 2) {
4586 1
        return 'UTF-16BE';
4587 2
      } elseif (self::is_utf32($str) === 1) {
4588
        return 'UTF-32LE';
4589 2
      } elseif (self::is_utf32($str) === 2) {
4590
        return 'UTF-32BE';
4591
      }
4592 2
    }
4593
4594
    //
4595
    // 2.) simple check for ASCII chars
4596
    //
4597
4598 12
    if (self::is_ascii($str) === true) {
4599 3
      return 'ASCII';
4600
    }
4601
4602
    //
4603
    // 3.) simple check for UTF-8 chars
4604
    //
4605
4606 12
    if (self::is_utf8($str) === true) {
4607 9
      return 'UTF-8';
4608
    }
4609
4610
    //
4611
    // 4.) check via "\mb_detect_encoding()"
4612
    //
4613
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4614
4615
    $detectOrder = array(
4616 6
        'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5',
4617 6
        'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10',
4618 6
        'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
4619 6
        'WINDOWS-1251', 'WINDOWS-1252', 'WINDOWS-1254',
4620 6
        'ISO-2022-JP', 'JIS', 'EUC-JP',
4621 6
    );
4622
4623 6
    self::checkForSupport();
4624 6
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4625 6
    if ($encoding) {
4626 6
      return $encoding;
4627
    }
4628
4629
    //
4630
    // 5.) check via "iconv()"
4631
    //
4632
4633
    $md5 = md5($str);
4634
    foreach (self::$iconvEncoding as $encodingTmp) {
4635
      # INFO: //IGNORE and //TRANSLIT still throw notice
4636
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4637
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4638
        return $encodingTmp;
4639
      }
4640
    }
4641
4642
    return false;
4643
  }
4644
4645
  /**
4646
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4647
   *
4648
   * @link  http://php.net/manual/en/function.str-ireplace.php
4649
   *
4650
   * @param mixed $search  <p>
4651
   *                       Every replacement with search array is
4652
   *                       performed on the result of previous replacement.
4653
   *                       </p>
4654
   * @param mixed $replace <p>
4655
   *                       </p>
4656
   * @param mixed $subject <p>
4657
   *                       If subject is an array, then the search and
4658
   *                       replace is performed with every entry of
4659
   *                       subject, and the return value is an array as
4660
   *                       well.
4661
   *                       </p>
4662
   * @param int   $count   [optional] <p>
4663
   *                       The number of matched and replaced needles will
4664
   *                       be returned in count which is passed by
4665
   *                       reference.
4666
   *                       </p>
4667
   *
4668
   * @return mixed A string or an array of replacements.
4669
   */
4670 13
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4671
  {
4672 13
    $search = (array)$search;
4673
4674
    /** @noinspection AlterInForeachInspection */
4675 13
    foreach ($search as &$s) {
4676 13
      if ('' === $s .= '') {
4677 1
        $s = '/^(?<=.)$/';
4678 1
      } else {
4679 12
        $s = '/' . preg_quote($s, '/') . '/ui';
4680
      }
4681 13
    }
4682
4683 13
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4684 13
    $count = $replace; // used as reference parameter
4685
4686 13
    return $subject;
4687
  }
4688
4689
  /**
4690
   * Limit the number of characters in a string, but also after the next word.
4691
   *
4692
   * @param  string $str
4693
   * @param  int    $length
4694
   * @param  string $strAddOn
4695
   *
4696
   * @return string
4697
   */
4698 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4699
  {
4700 1
    $str = (string)$str;
4701
4702 1
    if (!isset($str[0])) {
4703
      return '';
4704
    }
4705
4706 1
    $length = (int)$length;
4707
4708 1
    if (self::strlen($str) <= $length) {
4709
      return $str;
4710
    }
4711
4712 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4713 1
      return self::substr($str, 0, $length - 1) . $strAddOn;
4714
    }
4715
4716 1
    $str = self::substr($str, 0, $length);
4717 1
    $array = explode(' ', $str);
4718 1
    array_pop($array);
4719 1
    $new_str = implode(' ', $array);
4720
4721 1
    if ($new_str === '') {
4722
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4723
    } else {
4724 1
      $str = $new_str . $strAddOn;
4725
    }
4726
4727 1
    return $str;
4728
  }
4729
4730
  /**
4731
   * Pad a UTF-8 string to given length with another string.
4732
   *
4733
   * @param    string $str        The input string
4734
   * @param    int    $pad_length The length of return string
4735
   * @param    string $pad_string String to use for padding the input string
4736
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4737
   *
4738
   * @return   string Returns the padded string
4739
   */
4740 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4741
  {
4742 2
    $str_length = self::strlen($str);
4743
4744 2
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $str_length)) {
4745 2
      $ps_length = self::strlen($pad_string);
4746
4747 2
      $diff = $pad_length - $str_length;
4748
4749
      switch ($pad_type) {
4750 2 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4751 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4752 2
          $pre = self::substr($pre, 0, $diff);
4753 2
          $post = '';
4754 2
          break;
4755
4756 2
        case STR_PAD_BOTH:
4757 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4758 2
          $pre = self::substr($pre, 0, (int)$diff / 2);
4759 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4760 2
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4761 2
          break;
4762
4763 2
        case STR_PAD_RIGHT:
4764 2 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4765 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4766 2
          $post = self::substr($post, 0, $diff);
4767 2
          $pre = '';
4768 2
      }
4769
4770 2
      return $pre . $str . $post;
4771
    }
4772
4773 2
    return $str;
4774
  }
4775
4776
  /**
4777
   * Repeat a string.
4778
   *
4779
   * @param string $str        <p>
4780
   *                           The string to be repeated.
4781
   *                           </p>
4782
   * @param int    $multiplier <p>
4783
   *                           Number of time the input string should be
4784
   *                           repeated.
4785
   *                           </p>
4786
   *                           <p>
4787
   *                           multiplier has to be greater than or equal to 0.
4788
   *                           If the multiplier is set to 0, the function
4789
   *                           will return an empty string.
4790
   *                           </p>
4791
   *
4792
   * @return string the repeated string.
4793
   */
4794 1
  public static function str_repeat($str, $multiplier)
4795
  {
4796 1
    $str = self::filter($str);
4797
4798 1
    return str_repeat($str, $multiplier);
4799
  }
4800
4801
  /**
4802
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4803
   *
4804
   * (PHP 4, PHP 5)<br/>
4805
   * Replace all occurrences of the search string with the replacement string
4806
   *
4807
   * @link http://php.net/manual/en/function.str-replace.php
4808
   *
4809
   * @param mixed $search  <p>
4810
   *                       The value being searched for, otherwise known as the needle.
4811
   *                       An array may be used to designate multiple needles.
4812
   *                       </p>
4813
   * @param mixed $replace <p>
4814
   *                       The replacement value that replaces found search
4815
   *                       values. An array may be used to designate multiple replacements.
4816
   *                       </p>
4817
   * @param mixed $subject <p>
4818
   *                       The string or array being searched and replaced on,
4819
   *                       otherwise known as the haystack.
4820
   *                       </p>
4821
   *                       <p>
4822
   *                       If subject is an array, then the search and
4823
   *                       replace is performed with every entry of
4824
   *                       subject, and the return value is an array as
4825
   *                       well.
4826
   *                       </p>
4827
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4828
   *
4829
   * @return mixed This function returns a string or an array with the replaced values.
4830
   */
4831 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4832
  {
4833 12
    return str_replace($search, $replace, $subject, $count);
4834
  }
4835
4836
  /**
4837
   * Shuffles all the characters in the string.
4838
   *
4839
   * @param    string $str The input string
4840
   *
4841
   * @return   string The shuffled string.
4842
   */
4843 1
  public static function str_shuffle($str)
4844
  {
4845 1
    $array = self::split($str);
4846
4847 1
    shuffle($array);
4848
4849 1
    return implode('', $array);
4850
  }
4851
4852
  /**
4853
   * Sort all characters according to code points.
4854
   *
4855
   * @param    string $str    A UTF-8 string.
4856
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4857
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4858
   *
4859
   * @return   string String of sorted characters
4860
   */
4861 1
  public static function str_sort($str, $unique = false, $desc = false)
4862
  {
4863 1
    $array = self::codepoints($str);
4864
4865 1
    if ($unique) {
4866 1
      $array = array_flip(array_flip($array));
4867 1
    }
4868
4869 1
    if ($desc) {
4870 1
      arsort($array);
4871 1
    } else {
4872 1
      asort($array);
4873
    }
4874
4875 1
    return self::string($array);
4876
  }
4877
4878
  /**
4879
   * Split a string into an array.
4880
   *
4881
   * @param string $str
4882
   * @param int    $len
4883
   *
4884
   * @return array
4885
   */
4886 20
  public static function str_split($str, $len = 1)
4887
  {
4888
    // init
4889 20
    self::checkForSupport();
4890 20
    $len = (int)$len;
4891
4892 20
    if ($len < 1) {
4893
      return str_split($str, $len);
4894
    }
4895
4896 20
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4897 20
    $a = $a[0];
4898
4899 20
    if ($len === 1) {
4900 20
      return $a;
4901
    }
4902
4903 1
    $arrayOutput = array();
4904 1
    $p = -1;
4905
4906
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4907 1
    foreach ($a as $l => $a) {
4908 1
      if ($l % $len) {
4909 1
        $arrayOutput[$p] .= $a;
4910 1
      } else {
4911 1
        $arrayOutput[++$p] = $a;
4912
      }
4913 1
    }
4914
4915 1
    return $arrayOutput;
4916
  }
4917
4918
  /**
4919
   * Get a binary representation of a specific string.
4920
   *
4921
   * @param  string $str The input string.
4922
   *
4923
   * @return string
4924
   */
4925 1
  public static function str_to_binary($str)
4926
  {
4927 1
    $str = (string)$str;
4928
4929 1
    $value = unpack('H*', $str);
4930
4931 1
    return base_convert($value[1], 16, 2);
4932
  }
4933
4934
  /**
4935
   * alias for "UTF8::to_ascii()"
4936
   *
4937
   * @see UTF8::to_ascii()
4938
   *
4939
   * @param string $str
4940
   * @param string $unknown
4941
   *
4942
   * @return string
4943
   */
4944 7
  public static function str_transliterate($str, $unknown = '?')
4945
  {
4946 7
    return self::to_ascii($str, $unknown);
4947
  }
4948
4949
  /**
4950
   * Counts number of words in the UTF-8 string.
4951
   *
4952
   * @param string $str    The input string.
4953
   * @param int    $format <strong>0</strong> => return a number of words<br />
4954
   *                       <strong>1</strong> => return an array of words<br />
4955
   *                       <strong>2</strong> => return an array of words with word-offset as key
4956
   * @param string $charlist Additional chars that contains to words and do not start a new word (default: "'", "’")
4957
   *
4958
   * @return array|int The number of words in the string
4959
   */
4960 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4961
  {
4962 1
    $charlist = self::rxClass($charlist, '\pL');
4963 1
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4964
4965 1
    $len = count($strParts);
4966
4967 1
    if ($format === 1) {
4968
4969 1
      $numberOfWords = array();
4970 1
      for ($i = 1; $i < $len; $i += 2) {
4971 1
        $numberOfWords[] = $strParts[$i];
4972 1
      }
4973
4974 1
    } elseif ($format === 2) {
4975
4976 1
      self::checkForSupport();
4977
4978 1
      $numberOfWords = array();
4979 1
      $offset = self::strlen($strParts[0]);
4980 1
      for ($i = 1; $i < $len; $i += 2) {
4981 1
        $numberOfWords[$offset] = $strParts[$i];
4982 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4983 1
      }
4984
4985 1
    } else {
4986
4987 1
      $numberOfWords = ($len - 1) / 2;
4988
4989
    }
4990
4991 1
    return $numberOfWords;
4992
  }
4993
4994
  /**
4995
   * Case-insensitive string comparison.
4996
   *
4997
   * INFO: Case-insensitive version of UTF8::strcmp()
4998
   *
4999
   * @param string $str1
5000
   * @param string $str2
5001
   *
5002
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5003
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
5004
   *             <strong>0</strong> if they are equal.
5005
   */
5006 9
  public static function strcasecmp($str1, $str2)
5007
  {
5008 9
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5009
  }
5010
5011
  /**
5012
   * Case-sensitive string comparison.
5013
   *
5014
   * @param string $str1
5015
   * @param string $str2
5016
   *
5017
   * @return int  <strong>&lt; 0</strong> if str1 is less than str2<br />
5018
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
5019
   *              <strong>0</strong> if they are equal.
5020
   */
5021 12
  public static function strcmp($str1, $str2)
5022
  {
5023 12
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5024 11
        \Normalizer::normalize($str1, \Normalizer::NFD),
5025 11
        \Normalizer::normalize($str2, \Normalizer::NFD)
5026 12
    );
5027
  }
5028
5029
  /**
5030
   * Find length of initial segment not matching mask.
5031
   *
5032
   * @param string $str
5033
   * @param string $charList
5034
   * @param int    $offset
5035
   * @param int    $length
5036
   *
5037
   * @return int|null
5038
   */
5039 8
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5040
  {
5041 8
    if ('' === $charList .= '') {
5042 1
      return null;
5043
    }
5044
5045 7
    if ($offset || 2147483647 !== $length) {
5046 2
      $str = (string)self::substr($str, $offset, $length);
5047 2
    } else {
5048 5
      $str = (string)$str;
5049
    }
5050
5051 7
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5052
      /** @noinspection OffsetOperationsInspection */
5053 7
      return self::strlen($length[1]);
5054
    } else {
5055 1
      return self::strlen($str);
5056
    }
5057
  }
5058
5059
  /**
5060
   * Create a UTF-8 string from code points.
5061
   *
5062
   * INFO: opposite to UTF8::codepoints()
5063
   *
5064
   * @param  array $array Integer or Hexadecimal codepoints
5065
   *
5066
   * @return string UTF-8 encoded string
5067
   */
5068 2
  public static function string(array $array)
5069
  {
5070 2
    return implode(
5071 2
        array_map(
5072
            array(
5073 2
                '\\voku\\helper\\UTF8',
5074 2
                'chr',
5075 2
            ),
5076
            $array
5077 2
        )
5078 2
    );
5079
  }
5080
5081
  /**
5082
   * alias for "UTF8::string_has_bom()"
5083
   *
5084
   * @see UTF8::string_has_bom()
5085
   *
5086
   * @param string $str
5087
   *
5088
   * @return bool
5089
   */
5090
  public static function hasBom($str)
5091
  {
5092
    return self::string_has_bom($str);
5093
  }
5094
5095
  /**
5096
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5097
   *
5098
   * @param    string $str The input string.
5099
   *
5100
   * @return   bool True if the string has BOM at the start, False otherwise.
5101
   */
5102 2
  public static function string_has_bom($str)
5103
  {
5104 2
    foreach (self::$bom as $bomString => $bomByteLength) {
5105 2
      if (0 === strpos($str, $bomString)) {
5106 2
        return true;
5107
      }
5108 2
    }
5109
5110 2
    return false;
5111
  }
5112
5113
  /**
5114
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5115
   *
5116
   * @link http://php.net/manual/en/function.strip-tags.php
5117
   *
5118
   * @param string $str            <p>
5119
   *                               The input string.
5120
   *                               </p>
5121
   * @param string $allowable_tags [optional] <p>
5122
   *                               You can use the optional second parameter to specify tags which should
5123
   *                               not be stripped.
5124
   *                               </p>
5125
   *                               <p>
5126
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5127
   *                               can not be changed with allowable_tags.
5128
   *                               </p>
5129
   *
5130
   * @return string the stripped string.
5131
   */
5132 2
  public static function strip_tags($str, $allowable_tags = null)
5133
  {
5134
    // clean broken utf8
5135 2
    $str = self::clean($str);
5136
5137 2
    return strip_tags($str, $allowable_tags);
5138
  }
5139
5140
  /**
5141
   * Finds position of first occurrence of a string within another, case insensitive.
5142
   *
5143
   * @link http://php.net/manual/en/function.mb-stripos.php
5144
   *
5145
   * @param string  $haystack  <p>
5146
   *                           The string from which to get the position of the first occurrence
5147
   *                           of needle
5148
   *                           </p>
5149
   * @param string  $needle    <p>
5150
   *                           The string to find in haystack
5151
   *                           </p>
5152
   * @param int     $offset    [optional] <p>
5153
   *                           The position in haystack
5154
   *                           to start searching
5155
   *                           </p>
5156
   * @param string  $encoding
5157
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string.
5158
   *
5159
   * @return int|false Return the numeric position of the first occurrence of needle in the haystack string,<br />
5160
   *                   or false if needle is not found.
5161
   */
5162 8
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5163
  {
5164 8
    $haystack = (string)$haystack;
5165 8
    $needle = (string)$needle;
5166
5167 8
    if (!isset($haystack[0], $needle[0])) {
5168 3
      return false;
5169
    }
5170
5171
    // init
5172 7
    self::checkForSupport();
5173
5174 7
    if ($cleanUtf8 === true) {
5175 1
      $haystack = self::clean($haystack);
5176 1
      $needle = self::clean($needle);
5177 1
    }
5178
5179
    // INFO: this is only a fallback for old versions
5180 7
    if ($encoding === true || $encoding === false) {
5181 1
      $encoding = 'UTF-8';
5182 1
    } else {
5183 7
      $encoding = self::normalizeEncoding($encoding);
5184
    }
5185
5186 7
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5187
  }
5188
5189
  /**
5190
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5191
   *
5192
   * @param string $str
5193
   * @param string $needle
5194
   * @param bool   $before_needle
5195
   *
5196
   * @return false|string sub-string, or false if needle is not found
5197
   */
5198 7
  public static function stristr($str, $needle, $before_needle = false)
5199
  {
5200 7
    if ('' === $needle .= '') {
5201 2
      return false;
5202
    }
5203
5204
    // init
5205 5
    self::checkForSupport();
5206
5207 5
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5208
  }
5209
5210
  /**
5211
   * Get the string length, not the byte-length!
5212
   *
5213
   * @link     http://php.net/manual/en/function.mb-strlen.php
5214
   *
5215
   * @param string  $str       The string being checked for length.
5216
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5217
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5218
   *
5219
   * @return int the number of characters in the string $str having character encoding $encoding. (One multi-byte character counted as +1)
5220
   */
5221 61
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5222
  {
5223 61
    $str = (string)$str;
5224
5225 61
    if (!isset($str[0])) {
5226 4
      return 0;
5227
    }
5228
5229
    // INFO: this is only a fallback for old versions
5230 60
    if ($encoding === true || $encoding === false) {
5231
      $encoding = 'UTF-8';
5232
    } else {
5233 60
      $encoding = self::normalizeEncoding($encoding);
5234
    }
5235
5236
    switch ($encoding) {
5237 60
      case 'ASCII':
5238 60
      case 'CP850':
5239
        return strlen($str);
5240
    }
5241
5242 60
    self::checkForSupport();
5243
5244 60
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5245 1
      $str = self::clean($str);
5246 1
    }
5247
5248 60
    return \mb_strlen($str, $encoding);
5249
  }
5250
5251
  /**
5252
   * Case insensitive string comparisons using a "natural order" algorithm.
5253
   *
5254
   * INFO: natural order version of UTF8::strcasecmp()
5255
   *
5256
   * @param string $str1
5257
   * @param string $str2
5258
   *
5259
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5260
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5261
   *             <strong>0</strong> if they are equal
5262
   */
5263 1
  public static function strnatcasecmp($str1, $str2)
5264
  {
5265 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5266
  }
5267
5268
  /**
5269
   * String comparisons using a "natural order" algorithm
5270
   *
5271
   * INFO: natural order version of UTF8::strcmp()
5272
   *
5273
   * @link  http://php.net/manual/en/function.strnatcmp.php
5274
   *
5275
   * @param string $str1 <p>
5276
   *                     The first string.
5277
   *                     </p>
5278
   * @param string $str2 <p>
5279
   *                     The second string.
5280
   *                     </p>
5281
   *
5282
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5283
   * str1 is less than str2; &gt;
5284
   * 0 if str1 is greater than
5285
   * str2, and 0 if they are equal.
5286
   */
5287 2
  public static function strnatcmp($str1, $str2)
5288
  {
5289 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5290
  }
5291
5292
  /**
5293
   * Case-insensitive string comparison of the first n characters.
5294
   *
5295
   * @link  http://php.net/manual/en/function.strncasecmp.php
5296
   *
5297
   * @param string $str1 <p>
5298
   *                     The first string.
5299
   *                     </p>
5300
   * @param string $str2 <p>
5301
   *                     The second string.
5302
   *                     </p>
5303
   * @param int    $len  <p>
5304
   *                     The length of strings to be used in the comparison.
5305
   *                     </p>
5306
   *
5307
   * @return int &lt; 0 if <i>str1</i> is less than
5308
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5309
   * greater than <i>str2</i>, and 0 if they are equal.
5310
   */
5311 1
  public static function strncasecmp($str1, $str2, $len)
5312
  {
5313 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5314
  }
5315
5316
  /**
5317
   * String comparison of the first n characters.
5318
   *
5319
   * @link  http://php.net/manual/en/function.strncmp.php
5320
   *
5321
   * @param string $str1 <p>
5322
   *                     The first string.
5323
   *                     </p>
5324
   * @param string $str2 <p>
5325
   *                     The second string.
5326
   *                     </p>
5327
   * @param int    $len  <p>
5328
   *                     Number of characters to use in the comparison.
5329
   *                     </p>
5330
   *
5331
   * @return int &lt; 0 if <i>str1</i> is less than
5332
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5333
   * is greater than <i>str2</i>, and 0 if they are
5334
   * equal.
5335
   */
5336 2
  public static function strncmp($str1, $str2, $len)
5337
  {
5338 2
    $str1 = self::substr($str1, 0, $len);
5339 2
    $str2 = self::substr($str2, 0, $len);
5340
5341 2
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5338 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5339 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5342
  }
5343
5344
  /**
5345
   * Search a string for any of a set of characters
5346
   *
5347
   * @link  http://php.net/manual/en/function.strpbrk.php
5348
   *
5349
   * @param string $haystack  <p>
5350
   *                          The string where char_list is looked for.
5351
   *                          </p>
5352
   * @param string $char_list <p>
5353
   *                          This parameter is case sensitive.
5354
   *                          </p>
5355
   *
5356
   * @return string a string starting from the character found, or false if it is not found.
5357
   */
5358 1
  public static function strpbrk($haystack, $char_list)
5359
  {
5360 1
    $haystack = (string)$haystack;
5361 1
    $char_list = (string)$char_list;
5362
5363 1
    if (!isset($haystack[0], $char_list[0])) {
5364 1
      return false;
5365
    }
5366
5367 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5368 1
      return substr($haystack, strpos($haystack, $m[0]));
5369
    } else {
5370
      return false;
5371
    }
5372
  }
5373
5374
  /**
5375
   * Find position of first occurrence of string in a string.
5376
   *
5377
   * @link http://php.net/manual/en/function.mb-strpos.php
5378
   *
5379
   * @param string  $haystack     <p>
5380
   *                              The string being checked.
5381
   *                              </p>
5382
   * @param string  $needle       <p>
5383
   *                              The position counted from the beginning of haystack.
5384
   *                              </p>
5385
   * @param int     $offset       [optional] <p>
5386
   *                              The search offset. If it is not specified, 0 is used.
5387
   *                              </p>
5388
   * @param string  $encoding
5389
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5390
   *
5391
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5392
   *             If needle is not found it returns false.
5393
   */
5394 15
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5395
  {
5396 15
    $haystack = (string)$haystack;
5397 15
    $needle = (string)$needle;
5398
5399 15
    if (!isset($haystack[0], $needle[0])) {
5400 2
      return false;
5401
    }
5402
5403
    // init
5404 14
    self::checkForSupport();
5405 14
    $offset = (int)$offset;
5406
5407
    // iconv and mbstring do not support integer $needle
5408
5409 14
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5410
      $needle = self::chr($needle);
5411
    }
5412
5413 14
    if ($cleanUtf8 === true) {
5414
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5415
      // iconv_strpos is not tolerant to invalid characters
5416
5417 1
      $needle = self::clean((string)$needle);
5418 1
      $haystack = self::clean($haystack);
5419 1
    }
5420
5421 14 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5422
5423
      // INFO: this is only a fallback for old versions
5424 14
      if ($encoding === true || $encoding === false) {
5425 1
        $encoding = 'UTF-8';
5426 1
      } else {
5427 14
        $encoding = self::normalizeEncoding($encoding);
5428
      }
5429
5430 14
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5431
    }
5432
5433
    if (self::$support['iconv'] === true) {
5434
      // ignore invalid negative offset to keep compatility
5435
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5436
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5437
    }
5438
5439
    if ($offset > 0) {
5440
      $haystack = self::substr($haystack, $offset);
5441
    }
5442
5443 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5444
      $left = substr($haystack, 0, $pos);
5445
5446
      // negative offset not supported in PHP strpos(), ignoring
5447
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5448
    }
5449
5450
    return false;
5451
  }
5452
5453
  /**
5454
   * Finds the last occurrence of a character in a string within another.
5455
   *
5456
   * @link http://php.net/manual/en/function.mb-strrchr.php
5457
   *
5458
   * @param string $haystack <p>
5459
   *                         The string from which to get the last occurrence
5460
   *                         of needle
5461
   *                         </p>
5462
   * @param string $needle   <p>
5463
   *                         The string to find in haystack
5464
   *                         </p>
5465
   * @param bool   $part     [optional] <p>
5466
   *                         Determines which portion of haystack
5467
   *                         this function returns.
5468
   *                         If set to true, it returns all of haystack
5469
   *                         from the beginning to the last occurrence of needle.
5470
   *                         If set to false, it returns all of haystack
5471
   *                         from the last occurrence of needle to the end,
5472
   *                         </p>
5473
   * @param string $encoding [optional] <p>
5474
   *                         Character encoding name to use.
5475
   *                         If it is omitted, internal character encoding is used.
5476
   *                         </p>
5477
   *
5478
   * @return string the portion of haystack.
5479
   * or false if needle is not found.
5480
   */
5481 1 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5482
  {
5483 1
    self::checkForSupport();
5484 1
    $encoding = self::normalizeEncoding($encoding);
5485
5486 1
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5487
  }
5488
5489
  /**
5490
   * Reverses characters order in the string.
5491
   *
5492
   * @param    string $str The input string
5493
   *
5494
   * @return   string The string with characters in the reverse sequence
5495
   */
5496 4
  public static function strrev($str)
5497
  {
5498 4
    return implode(array_reverse(self::split($str)));
5499
  }
5500
5501
  /**
5502
   * Finds the last occurrence of a character in a string within another, case insensitive.
5503
   *
5504
   * @link http://php.net/manual/en/function.mb-strrichr.php
5505
   *
5506
   * @param string $haystack <p>
5507
   *                         The string from which to get the last occurrence
5508
   *                         of needle
5509
   *                         </p>
5510
   * @param string $needle   <p>
5511
   *                         The string to find in haystack
5512
   *                         </p>
5513
   * @param bool   $part     [optional] <p>
5514
   *                         Determines which portion of haystack
5515
   *                         this function returns.
5516
   *                         If set to true, it returns all of haystack
5517
   *                         from the beginning to the last occurrence of needle.
5518
   *                         If set to false, it returns all of haystack
5519
   *                         from the last occurrence of needle to the end,
5520
   *                         </p>
5521
   * @param string $encoding [optional] <p>
5522
   *                         Character encoding name to use.
5523
   *                         If it is omitted, internal character encoding is used.
5524
   *                         </p>
5525
   *
5526
   * @return string the portion of haystack.
5527
   * or false if needle is not found.
5528
   */
5529 1 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5530
  {
5531 1
    self::checkForSupport();
5532 1
    $encoding = self::normalizeEncoding($encoding);
5533
5534 1
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5535
  }
5536
5537
  /**
5538
   * Find position of last occurrence of a case-insensitive string.
5539
   *
5540
   * @param    string $haystack The string to look in
5541
   * @param    string $needle   The string to look for
5542
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5543
   *
5544
   * @return   int The position of offset
5545
   */
5546 1
  public static function strripos($haystack, $needle, $offset = 0)
5547
  {
5548 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5549
  }
5550
5551
  /**
5552
   * Find position of last occurrence of a string in a string.
5553
   *
5554
   * @link http://php.net/manual/en/function.mb-strrpos.php
5555
   *
5556
   * @param string     $haystack  <p>
5557
   *                              The string being checked, for the last occurrence
5558
   *                              of needle
5559
   *                              </p>
5560
   * @param string|int $needle    <p>
5561
   *                              The string to find in haystack.
5562
   *                              Or a code point as int.
5563
   *                              </p>
5564
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5565
   *                              the string. Negative values will stop searching at an arbitrary point
5566
   *                              prior to the end of the string.
5567
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5568
   *
5569
   * @return int the numeric position of
5570
   * the last occurrence of needle in the
5571
   * haystack string. If
5572
   * needle is not found, it returns false.
5573
   */
5574 11
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5575
  {
5576 11
    $haystack = (string)$haystack;
5577
5578 11
    if (((int)$needle) === $needle && ($needle >= 0)) {
5579 2
      $needle = self::chr($needle);
5580 2
    }
5581
5582 11
    $needle = (string)$needle;
5583
5584 11
    if (!isset($haystack[0], $needle[0])) {
5585 2
      return false;
5586
    }
5587
5588
    // init
5589 10
    self::checkForSupport();
5590
5591 10
    $needle = (string)$needle;
5592 10
    $offset = (int)$offset;
5593
5594 10
    if ($cleanUtf8 === true) {
5595
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5596
5597 2
      $needle = self::clean($needle);
5598 2
      $haystack = self::clean($haystack);
5599 2
    }
5600
5601 10
    if (self::$support['mbstring'] === true) {
5602 10
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5603
    }
5604
5605
    if (self::$support['iconv'] === true) {
5606
      return \grapheme_strrpos($haystack, $needle, $offset);
5607
    }
5608
5609
    // fallback
5610
5611
    if ($offset > 0) {
5612
      $haystack = self::substr($haystack, $offset);
5613
    } elseif ($offset < 0) {
5614
      $haystack = self::substr($haystack, 0, $offset);
5615
    }
5616
5617 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5618
      $left = substr($haystack, 0, $pos);
5619
5620
      // negative offset not supported in PHP strpos(), ignoring
5621
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5622
    }
5623
5624
    return false;
5625
  }
5626
5627
  /**
5628
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5629
   * mask.
5630
   *
5631
   * @param string $str
5632
   * @param string $mask
5633
   * @param int    $offset
5634
   * @param int    $length
5635
   *
5636
   * @return int|null
5637
   */
5638 8
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5639
  {
5640 8
    if ($offset || 2147483647 !== $length) {
5641 2
      $str = self::substr($str, $offset, $length);
5642 2
    }
5643
5644 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5645
  }
5646
5647
  /**
5648
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5649
   *
5650
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5651
   *
5652
   * @param string $haystack      <p>
5653
   *                              The input string. Must be valid UTF-8.
5654
   *                              </p>
5655
   * @param string $needle        <p>
5656
   *                              The string to look for. Must be valid UTF-8.
5657
   *                              </p>
5658
   * @param bool   $before_needle [optional] <p>
5659
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5660
   *                              haystack before the first occurrence of the needle (excluding the needle).
5661
   *                              </p>
5662
   *
5663
   * @return string the portion of string, or FALSE if needle is not found.
5664
   */
5665 1
  public static function strstr($haystack, $needle, $before_needle = false)
5666
  {
5667 1
    self::checkForSupport();
5668
5669 1
    return \grapheme_strstr($haystack, $needle, $before_needle);
5670
  }
5671
5672
  /**
5673
   * Unicode transformation for case-less matching.
5674
   *
5675
   * @link http://unicode.org/reports/tr21/tr21-5.html
5676
   *
5677
   * @param string $str
5678
   * @param bool   $full
5679
   *
5680
   * @return string
5681
   */
5682 11
  public static function strtocasefold($str, $full = true)
5683
  {
5684 11
    static $fullCaseFold = null;
5685 11
    static $commonCaseFoldKeys = null;
5686 11
    static $commonCaseFoldValues = null;
5687
5688 11
    if ($commonCaseFoldKeys === null) {
5689 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5690 1
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5691 1
    }
5692
5693 11
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5694
5695 11
    if ($full) {
5696
5697 11
      if ($fullCaseFold === null) {
5698 1
        $fullCaseFold = self::getData('caseFolding_full');
5699 1
      }
5700
5701
      /** @noinspection OffsetOperationsInspection */
5702 11
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5703 11
    }
5704
5705 11
    $str = self::clean($str);
5706
5707 11
    return self::strtolower($str);
5708
  }
5709
5710
  /**
5711
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5712
   * Make a string lowercase.
5713
   *
5714
   * @link http://php.net/manual/en/function.mb-strtolower.php
5715
   *
5716
   * @param string $str <p>
5717
   *                    The string being lowercased.
5718
   *                    </p>
5719
   * @param string $encoding
5720
   *
5721
   * @return string str with all alphabetic characters converted to lowercase.
5722
   */
5723 21
  public static function strtolower($str, $encoding = 'UTF-8')
5724
  {
5725 21
    $str = (string)$str;
5726
5727 21
    if (!isset($str[0])) {
5728 6
      return '';
5729
    }
5730
5731
    // init
5732 19
    self::checkForSupport();
5733 19
    $encoding = self::normalizeEncoding($encoding);
5734
5735 19
    return \mb_strtolower($str, $encoding);
5736
  }
5737
5738
  /**
5739
   * Generic case sensitive transformation for collation matching.
5740
   *
5741
   * @param string $s
5742
   *
5743
   * @return string
5744
   */
5745 3
  protected static function strtonatfold($s)
5746
  {
5747 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5748
  }
5749
5750
  /**
5751
   * Make a string uppercase.
5752
   *
5753
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5754
   *
5755
   * @param string $str <p>
5756
   *                    The string being uppercased.
5757
   *                    </p>
5758
   * @param string $encoding
5759
   *
5760
   * @return string str with all alphabetic characters converted to uppercase.
5761
   */
5762 16
  public static function strtoupper($str, $encoding = 'UTF-8')
5763
  {
5764 16
    $str = (string)$str;
5765
5766 16
    if (!isset($str[0])) {
5767 4
      return '';
5768
    }
5769
5770
    // init
5771 15
    self::checkForSupport();
5772
5773 15
    if (self::$support['mbstring'] === true) {
5774 15
      $encoding = self::normalizeEncoding($encoding);
5775
5776 15
      return \mb_strtoupper($str, $encoding);
5777
    } else {
5778
5779
      // fallback
5780
5781
      static $caseTableKeys = null;
5782
      static $caseTableValues = null;
5783
5784
      if ($caseTableKeys === null) {
5785
        $caseTable = self::case_table();
5786
        $caseTableKeys = array_keys($caseTable);
5787
        $caseTableValues = array_values($caseTable);
5788
      }
5789
5790
      $str = self::clean($str);
5791
5792
      return str_replace($caseTableKeys, $caseTableValues, $str);
5793
    }
5794
  }
5795
5796
  /**
5797
   * Translate characters or replace sub-strings.
5798
   *
5799
   * @link  http://php.net/manual/en/function.strtr.php
5800
   *
5801
   * @param string       $str  <p>
5802
   *                           The string being translated.
5803
   *                           </p>
5804
   * @param string|array $from <p>
5805
   *                           The string replacing from.
5806
   *                           </p>
5807
   * @param string|array $to   <p>
5808
   *                           The string being translated to to.
5809
   *                           </p>
5810
   *
5811
   * @return string This function returns a copy of str,
5812
   * translating all occurrences of each character in
5813
   * from to the corresponding character in
5814
   * to.
5815
   */
5816 1
  public static function strtr($str, $from, $to = INF)
5817
  {
5818 1
    if (INF !== $to) {
5819 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5819 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5820 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5820 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5821 1
      $countFrom = count($from);
5822 1
      $countTo = count($to);
5823
5824 1
      if ($countFrom > $countTo) {
5825 1
        $from = array_slice($from, 0, $countTo);
5826 1
      } elseif ($countFrom < $countTo) {
5827 1
        $to = array_slice($to, 0, $countFrom);
5828 1
      }
5829
5830 1
      $from = array_combine($from, $to);
5831 1
    }
5832
5833 1
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5816 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5834
  }
5835
5836
  /**
5837
   * Return the width of a string.
5838
   *
5839
   * @param string $s
5840
   *
5841
   * @return int
5842
   */
5843 1
  public static function strwidth($s)
5844
  {
5845
    // init
5846 1
    self::checkForSupport();
5847
5848 1
    return \mb_strwidth($s, 'UTF-8');
5849
  }
5850
5851
  /**
5852
   * Get part of a string.
5853
   *
5854
   * @link http://php.net/manual/en/function.mb-substr.php
5855
   *
5856
   * @param string  $str       <p>
5857
   *                           The string being checked.
5858
   *                           </p>
5859
   * @param int     $start     <p>
5860
   *                           The first position used in str.
5861
   *                           </p>
5862
   * @param int     $length    [optional] <p>
5863
   *                           The maximum length of the returned string.
5864
   *                           </p>
5865
   * @param string  $encoding
5866
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5867
   *
5868
   * @return string Returns a sub-string specified by the start and length parameters.
5869
   */
5870 47
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5871
  {
5872 47
    $str = (string)$str;
5873
5874 47
    if (!isset($str[0])) {
5875 11
      return '';
5876
    }
5877
5878
    // init
5879 45
    self::checkForSupport();
5880
5881 45
    if ($cleanUtf8 === true) {
5882
      // iconv and mbstring are not tolerant to invalid encoding
5883
      // further, their behaviour is inconsistent with that of PHP's substr
5884
5885 1
      $str = self::clean($str);
5886 1
    }
5887
5888 45
    $str_length = 0;
5889 45
    if ($start || $length === null) {
5890 37
      $str_length = (int)self::strlen($str);
5891 37
    }
5892
5893 45
    if ($start && $start > $str_length) {
5894 2
      return false;
5895
    }
5896
5897 43
    if ($length === null) {
5898 20
      $length = $str_length;
5899 20
    } else {
5900 41
      $length = (int)$length;
5901
    }
5902
5903 43 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5904
5905
      // INFO: this is only a fallback for old versions
5906 43
      if ($encoding === true || $encoding === false) {
5907 1
        $encoding = 'UTF-8';
5908 1
      } else {
5909 43
        $encoding = self::normalizeEncoding($encoding);
5910
      }
5911
5912 43
      return \mb_substr($str, $start, $length, $encoding);
5913
    }
5914
5915
    if (self::$support['iconv'] === true) {
5916
      return (string)\grapheme_substr($str, $start, $length);
5917
    }
5918
5919
    // fallback
5920
5921
    // split to array, and remove invalid characters
5922
    $array = self::split($str);
5923
5924
    // extract relevant part, and join to make sting again
5925
    return implode(array_slice($array, $start, $length));
5926
  }
5927
5928
  /**
5929
   * Binary safe comparison of two strings from an offset, up to length characters.
5930
   *
5931
   * @param string  $main_str           The main string being compared.
5932
   * @param string  $str                The secondary string being compared.
5933
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5934
   *                                    end of the string.
5935
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5936
   *                                    the str compared to the length of main_str less the offset.
5937
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5938
   *
5939
   * @return int
5940
   */
5941 1
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5942
  {
5943 1
    $main_str = self::substr($main_str, $offset, $length);
5944 1
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5943 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5945
5946 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5943 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5944 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5943 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5944 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5947
  }
5948
5949
  /**
5950
   * Count the number of substring occurrences
5951
   *
5952
   * @link  http://php.net/manual/en/function.substr-count.php
5953
   *
5954
   * @param string $haystack <p>
5955
   *                         The string to search in
5956
   *                         </p>
5957
   * @param string $needle   <p>
5958
   *                         The substring to search for
5959
   *                         </p>
5960
   * @param int    $offset   [optional] <p>
5961
   *                         The offset where to start counting
5962
   *                         </p>
5963
   * @param int    $length   [optional] <p>
5964
   *                         The maximum length after the specified offset to search for the
5965
   *                         substring. It outputs a warning if the offset plus the length is
5966
   *                         greater than the haystack length.
5967
   *                         </p>
5968
   *
5969
   * @return int This functions returns an integer.
5970
   */
5971 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5972
  {
5973 1
    $haystack = (string)$haystack;
5974 1
    $needle = (string)$needle;
5975
5976 1
    if (!isset($haystack[0], $needle[0])) {
5977 1
      return false;
5978
    }
5979
5980 1
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5981 1
      $offset = (int)$offset;
5982 1
      $length = (int)$length;
5983
5984 1
      if ($length + $offset <= 0) {
5985 1
        return false;
5986
      }
5987
5988 1
      $haystack = self::substr($haystack, $offset, $length);
5989 1
    }
5990
5991 1
    self::checkForSupport();
5992
5993 1
    return \mb_substr_count($haystack, $needle);
5994
  }
5995
5996
  /**
5997
   * Replace text within a portion of a string.
5998
   *
5999
   * source: https://gist.github.com/stemar/8287074
6000
   *
6001
   * @param string|array   $str
6002
   * @param string|array   $replacement
6003
   * @param int|array      $start
6004
   * @param null|int|array $length
6005
   *
6006
   * @return array|string
6007
   */
6008 6
  public static function substr_replace($str, $replacement, $start, $length = null)
6009
  {
6010 6
    if (is_array($str)) {
6011 1
      $num = count($str);
6012
6013
      // $replacement
6014 1
      if (is_array($replacement)) {
6015 1
        $replacement = array_slice($replacement, 0, $num);
6016 1
      } else {
6017 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6018
      }
6019
6020
      // $start
6021 1 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6022 1
        $start = array_slice($start, 0, $num);
6023 1
        foreach ($start as &$valueTmp) {
6024 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6025 1
        }
6026 1
        unset($valueTmp);
6027 1
      } else {
6028 1
        $start = array_pad(array($start), $num, $start);
6029
      }
6030
6031
      // $length
6032 1
      if (!isset($length)) {
6033 1
        $length = array_fill(0, $num, 0);
6034 1 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6035 1
        $length = array_slice($length, 0, $num);
6036 1
        foreach ($length as &$valueTmpV2) {
6037 1
          if (isset($valueTmpV2)) {
6038 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6039 1
          } else {
6040
            $valueTmpV2 = 0;
6041
          }
6042 1
        }
6043 1
        unset($valueTmpV2);
6044 1
      } else {
6045 1
        $length = array_pad(array($length), $num, $length);
6046
      }
6047
6048
      // Recursive call
6049 1
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6050
    } else {
6051 6
      if (is_array($replacement)) {
6052 1
        if (count($replacement) > 0) {
6053 1
          $replacement = $replacement[0];
6054 1
        } else {
6055 1
          $replacement = '';
6056
        }
6057 1
      }
6058
    }
6059
6060 6
    preg_match_all('/./us', (string)$str, $smatches);
6061 6
    preg_match_all('/./us', (string)$replacement, $rmatches);
6062
6063 6
    if ($length === null) {
6064 4
      self::checkForSupport();
6065
6066 4
      $length = \mb_strlen($str);
6067 4
    }
6068
6069 6
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6070
6071 6
    return implode($smatches[0], null);
6072
  }
6073
6074
  /**
6075
   * Returns a case swapped version of the string.
6076
   *
6077
   * @param string $str
6078
   * @param string $encoding
6079
   *
6080
   * @return string each character's case swapped
6081
   */
6082 1
  public static function swapCase($str, $encoding = 'UTF-8')
6083
  {
6084 1
    $str = (string)$str;
6085
6086 1
    if (!isset($str[0])) {
6087 1
      return '';
6088
    }
6089
6090 1
    $encoding = self::normalizeEncoding($encoding);
6091 1
    $str = self::clean($str);
6092
6093 1
    $strSwappedCase = preg_replace_callback(
6094 1
        '/[\S]/u',
6095
        function ($match) use ($encoding) {
6096 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6097
6098 1
          if ($match[0] === $marchToUpper) {
6099 1
            return UTF8::strtolower($match[0], $encoding);
6100
          } else {
6101 1
            return $marchToUpper;
6102
          }
6103 1
        },
6104
        $str
6105 1
    );
6106
6107 1
    return $strSwappedCase;
6108
  }
6109
6110
  /**
6111
   * alias for "UTF8::to_ascii()"
6112
   *
6113
   * @see UTF8::to_ascii()
6114
   *
6115
   * @param string $s The input string e.g. a UTF-8 String
6116
   * @param string $subst_chr
6117
   *
6118
   * @return string
6119
   */
6120 6
  public static function toAscii($s, $subst_chr = '?')
6121
  {
6122 6
    return self::to_ascii($s, $subst_chr);
6123
  }
6124
6125
  /**
6126
   * alias for "UTF8::to_latin1()"
6127
   *
6128
   * @see UTF8::to_latin1()
6129
   *
6130
   * @param $str
6131
   *
6132
   * @return string
6133
   */
6134 1
  public static function toLatin1($str)
6135
  {
6136 1
    return self::to_latin1($str);
6137
  }
6138
6139
  /**
6140
   * alias for "UTF8::to_utf8()"
6141
   *
6142
   * @see UTF8::to_utf8()
6143
   *
6144
   * @param string $str
6145
   *
6146
   * @return string
6147
   */
6148 1
  public static function toUTF8($str)
6149
  {
6150 1
    return self::to_utf8($str);
6151
  }
6152
6153
  /**
6154
   * convert to ASCII
6155
   *
6156
   * @param string $str     The input string.
6157
   * @param string $unknown Character use if character unknown. (default is ?)
6158
   *
6159
   * @return string
6160
   */
6161 13
  public static function to_ascii($str, $unknown = '?')
6162
  {
6163 13
    static $UTF8_TO_ASCII;
6164
6165
    // init
6166 13
    $str = (string)$str;
6167
6168 13
    if (!isset($str[0])) {
6169 3
      return '';
6170
    }
6171
6172 11
    $str = self::clean($str);
6173
6174 11
    self::checkForSupport();
6175 11
    if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6176
      $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6177
6178
      // check again, if we only have ASCII, now ...
6179
      if (!preg_match("/[\x80-\xFF]/", $str)) {
6180
        return $str;
6181
      }
6182
    }
6183
6184 11
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6185 11
    $chars = $ar[0];
6186 11
    foreach ($chars as &$c) {
6187
6188 11
      $ordC0 = ord($c[0]);
6189
6190 11
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6191 11
        continue;
6192
      }
6193
6194 5
      $ordC1 = ord($c[1]);
6195
6196
      // ASCII - next please
6197 5
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6198 5
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6199 5
      }
6200
6201 5
      if ($ordC0 >= 224) {
6202 2
        $ordC2 = ord($c[2]);
6203
6204 2
        if ($ordC0 <= 239) {
6205 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6206 2
        }
6207
6208 2
        if ($ordC0 >= 240) {
6209 1
          $ordC3 = ord($c[3]);
6210
6211 1
          if ($ordC0 <= 247) {
6212 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6213 1
          }
6214
6215 1
          if ($ordC0 >= 248) {
6216
            $ordC4 = ord($c[4]);
6217
6218 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6219
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6220
            }
6221
6222
            if ($ordC0 >= 252) {
6223
              $ordC5 = ord($c[5]);
6224
6225 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6226
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6227
              }
6228
            }
6229
          }
6230 1
        }
6231 2
      }
6232
6233 5
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6234
        $c = $unknown;
6235
        continue;
6236
      }
6237
6238 5
      if (!isset($ord)) {
6239
        $c = $unknown;
6240
        continue;
6241
      }
6242
6243 5
      $bank = $ord >> 8;
6244 5
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6245 1
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6246 1
        if (file_exists($bankfile)) {
6247
          /** @noinspection PhpIncludeInspection */
6248 1
          require $bankfile;
6249 1
        } else {
6250 1
          $UTF8_TO_ASCII[$bank] = array();
6251
        }
6252 1
      }
6253
6254 5
      $newchar = $ord & 255;
6255 5
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6256 5
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6257 5
      } else {
6258 1
        $c = $unknown;
6259
      }
6260 11
    }
6261
6262 11
    return implode('', $chars);
6263
  }
6264
6265
  /**
6266
   * alias for "UTF8::to_win1252()"
6267
   *
6268
   * @see UTF8::to_win1252()
6269
   *
6270
   * @param   string $str
6271
   *
6272
   * @return  array|string
6273
   */
6274 1
  public static function to_iso8859($str)
6275
  {
6276 1
    return self::to_win1252($str);
6277
  }
6278
6279
  /**
6280
   * alias for "UTF8::to_win1252()"
6281
   *
6282
   * @see UTF8::to_win1252()
6283
   *
6284
   * @param string|array $str
6285
   *
6286
   * @return string|array
6287
   */
6288 1
  public static function to_latin1($str)
6289
  {
6290 1
    return self::to_win1252($str);
6291
  }
6292
6293
  /**
6294
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6295
   *
6296
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6297
   *
6298
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6299
   *
6300
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6301
   *    are followed by any of these:  ("group B")
6302
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6303
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6304
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6305
   * is also a valid unicode character, and will be left unchanged.
6306
   *
6307
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6308
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6309
   *
6310
   * @param string|array $str Any string or array.
6311
   *
6312
   * @return string The same string, but UTF8 encoded.
6313
   */
6314 20
  public static function to_utf8($str)
6315
  {
6316 20
    if (is_array($str)) {
6317 2
      foreach ($str as $k => $v) {
6318
        /** @noinspection AlterInForeachInspection */
6319 2
        $str[$k] = self::to_utf8($v);
6320 2
      }
6321
6322 2
      return $str;
6323
    }
6324
6325 20
    $str = (string)$str;
6326
6327 20
    if (!isset($str[0])) {
6328 4
      return $str;
6329
    }
6330
6331 19
    $max = strlen($str);
6332 19
    $buf = '';
6333
6334
    /** @noinspection ForeachInvariantsInspection */
6335 19
    for ($i = 0; $i < $max; $i++) {
6336 19
      $c1 = $str[$i];
6337
6338 19
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6339 19
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6340 19
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6341 19
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6342
6343 19
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6344
6345 16
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6346 16
            $buf .= $c1 . $c2;
6347 16
            $i++;
6348 16
          } else { // not valid UTF8 - convert it
6349 5
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6350 5
            $cc2 = ($c1 & "\x3f") | "\x80";
6351 5
            $buf .= $cc1 . $cc2;
6352
          }
6353
6354 19 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6355
6356 17
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6357 13
            $buf .= $c1 . $c2 . $c3;
6358 13
            $i += 2;
6359 13
          } else { // not valid UTF8 - convert it
6360 8
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6361 8
            $cc2 = ($c1 & "\x3f") | "\x80";
6362 8
            $buf .= $cc1 . $cc2;
6363
          }
6364
6365 19
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6366
6367 9 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6368 4
            $buf .= $c1 . $c2 . $c3 . $c4;
6369 4
            $i += 3;
6370 4
          } else { // not valid UTF8 - convert it
6371 6
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6372 6
            $cc2 = ($c1 & "\x3f") | "\x80";
6373 6
            $buf .= $cc1 . $cc2;
6374
          }
6375
6376 9
        } else { // doesn't look like UTF8, but should be converted
6377 6
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6378 6
          $cc2 = (($c1 & "\x3f") | "\x80");
6379 6
          $buf .= $cc1 . $cc2;
6380
        }
6381
6382 19
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6383
6384 4
        $ordC1 = ord($c1);
6385 4
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6386 2
          $buf .= self::$win1252ToUtf8[$ordC1];
6387 2
        } else {
6388 3
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6389 3
          $cc2 = (($c1 & "\x3f") | "\x80");
6390 3
          $buf .= $cc1 . $cc2;
6391
        }
6392
6393 4
      } else { // it doesn't need conversion
6394 16
        $buf .= $c1;
6395
      }
6396 19
    }
6397
6398 19
    self::checkForSupport();
6399
6400
    // decode unicode escape sequences
6401 19
    $buf = preg_replace_callback(
6402 19
        '/\\\\u([0-9a-f]{4})/i',
6403
        function ($match) {
6404 3
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6405 19
        },
6406
        $buf
6407 19
    );
6408
6409
    // decode UTF-8 codepoints
6410 19
    $buf = preg_replace_callback(
6411 19
        '/&#\d{2,4};/',
6412 19
        function ($match) {
6413 2
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6414 19
        },
6415
        $buf
6416 19
    );
6417
6418 19
    return $buf;
6419
  }
6420
6421
  /**
6422
   * Convert a string into "win1252"-encoding.
6423
   *
6424
   * @param  string|array $str
6425
   *
6426
   * @return string|array
6427
   */
6428 2
  protected static function to_win1252($str)
6429
  {
6430 2
    if (is_array($str)) {
6431
6432 1
      foreach ($str as $k => $v) {
6433
        /** @noinspection AlterInForeachInspection */
6434 1
        $str[$k] = self::to_win1252($v);
6435 1
      }
6436
6437 1
      return $str;
6438
    }
6439
6440 2
    $str = (string)$str;
6441
6442 2
    if (!isset($str[0])) {
6443 1
      return '';
6444
    }
6445
6446 2
    return self::utf8_decode($str);
6447
  }
6448
6449
  /**
6450
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6451
   *
6452
   * INFO: This is slower then "trim()"
6453
   *
6454
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6455
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6456
   *
6457
   * @param    string $str   The string to be trimmed
6458
   * @param    string $chars Optional characters to be stripped
6459
   *
6460
   * @return   string The trimmed string
6461
   */
6462 26
  public static function trim($str = '', $chars = INF)
6463
  {
6464 26
    $str = (string)$str;
6465
6466 26
    if (!isset($str[0])) {
6467 5
      return '';
6468
    }
6469
6470
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6471 22
    if ($chars === INF || !$chars) {
6472 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6473
    }
6474
6475 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
6476
  }
6477
6478
  /**
6479
   * Makes string's first char uppercase.
6480
   *
6481
   * @param    string $str The input string
6482
   *
6483
   * @return   string The resulting string
6484
   */
6485 14
  public static function ucfirst($str)
6486
  {
6487 14
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6488
  }
6489
6490
  /**
6491
   * alias for "UTF8::ucfirst()"
6492
   *
6493
   * @see UTF8::ucfirst()
6494
   *
6495
   * @param string $word
6496
   *
6497
   * @return string
6498
   */
6499 1
  public static function ucword($word)
6500
  {
6501 1
    return self::ucfirst($word);
6502
  }
6503
6504
  /**
6505
   * Uppercase for all words in the string.
6506
   *
6507
   * @param  string $str
6508
   * @param array   $exceptions
6509
   *
6510
   * @return string
6511
   */
6512 8
  public static function ucwords($str, $exceptions = array())
6513
  {
6514 8
    if (!$str) {
6515 2
      return '';
6516
    }
6517
6518
    // init
6519 7
    $words = explode(' ', $str);
6520 7
    $newwords = array();
6521
6522 7
    if (count($exceptions) > 0) {
6523 1
      $useExceptions = true;
6524 1
    } else {
6525 7
      $useExceptions = false;
6526
    }
6527
6528 7
    foreach ($words as $word) {
6529
      if (
6530 7
          ($useExceptions === false)
6531
          ||
6532
          (
6533
              $useExceptions === true
6534 1
              &&
6535 1
              !in_array($word, $exceptions, true)
6536 1
          )
6537 7
      ) {
6538 7
        $word = self::ucfirst($word);
6539 7
      }
6540 7
      $newwords[] = $word;
6541 7
    }
6542
6543 7
    return self::ucfirst(implode(' ', $newwords));
6544
  }
6545
6546
  /**
6547
   * Multi decode html entity & fix urlencoded-win1252-chars.
6548
   *
6549
   * e.g:
6550
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6551
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6552
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6553
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6554
   * 'Düsseldorf'                   => 'Düsseldorf'
6555
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6556
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6557
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6558
   *
6559
   * @param string $str
6560
   *
6561
   * @return string
6562
   */
6563 1
  public static function urldecode($str)
6564
  {
6565 1
    $str = (string)$str;
6566
6567 1
    if (!isset($str[0])) {
6568 1
      return '';
6569
    }
6570
6571 1
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6572
6573 1
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6574
6575 1
    $str = self::fix_simple_utf8(
6576 1
        rawurldecode(
6577 1
            self::html_entity_decode(
6578 1
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6579
                $flags
6580 1
            )
6581 1
        )
6582 1
    );
6583
6584 1
    return (string)$str;
6585
  }
6586
6587
  /**
6588
   * Return a array with "urlencoded"-win1252 -> UTF-8
6589
   *
6590
   * @return mixed
6591
   */
6592 1
  public static function urldecode_fix_win1252_chars()
6593
  {
6594
    static $array = array(
6595
        '%20' => ' ',
6596
        '%21' => '!',
6597
        '%22' => '"',
6598
        '%23' => '#',
6599
        '%24' => '$',
6600
        '%25' => '%',
6601
        '%26' => '&',
6602
        '%27' => "'",
6603
        '%28' => '(',
6604
        '%29' => ')',
6605
        '%2A' => '*',
6606
        '%2B' => '+',
6607
        '%2C' => ',',
6608
        '%2D' => '-',
6609
        '%2E' => '.',
6610
        '%2F' => '/',
6611
        '%30' => '0',
6612
        '%31' => '1',
6613
        '%32' => '2',
6614
        '%33' => '3',
6615
        '%34' => '4',
6616
        '%35' => '5',
6617
        '%36' => '6',
6618
        '%37' => '7',
6619
        '%38' => '8',
6620
        '%39' => '9',
6621
        '%3A' => ':',
6622
        '%3B' => ';',
6623
        '%3C' => '<',
6624
        '%3D' => '=',
6625
        '%3E' => '>',
6626
        '%3F' => '?',
6627
        '%40' => '@',
6628
        '%41' => 'A',
6629
        '%42' => 'B',
6630
        '%43' => 'C',
6631
        '%44' => 'D',
6632
        '%45' => 'E',
6633
        '%46' => 'F',
6634
        '%47' => 'G',
6635
        '%48' => 'H',
6636
        '%49' => 'I',
6637
        '%4A' => 'J',
6638
        '%4B' => 'K',
6639
        '%4C' => 'L',
6640
        '%4D' => 'M',
6641
        '%4E' => 'N',
6642
        '%4F' => 'O',
6643
        '%50' => 'P',
6644
        '%51' => 'Q',
6645
        '%52' => 'R',
6646
        '%53' => 'S',
6647
        '%54' => 'T',
6648
        '%55' => 'U',
6649
        '%56' => 'V',
6650
        '%57' => 'W',
6651
        '%58' => 'X',
6652
        '%59' => 'Y',
6653
        '%5A' => 'Z',
6654
        '%5B' => '[',
6655
        '%5C' => '\\',
6656
        '%5D' => ']',
6657
        '%5E' => '^',
6658
        '%5F' => '_',
6659
        '%60' => '`',
6660
        '%61' => 'a',
6661
        '%62' => 'b',
6662
        '%63' => 'c',
6663
        '%64' => 'd',
6664
        '%65' => 'e',
6665
        '%66' => 'f',
6666
        '%67' => 'g',
6667
        '%68' => 'h',
6668
        '%69' => 'i',
6669
        '%6A' => 'j',
6670
        '%6B' => 'k',
6671
        '%6C' => 'l',
6672
        '%6D' => 'm',
6673
        '%6E' => 'n',
6674
        '%6F' => 'o',
6675
        '%70' => 'p',
6676
        '%71' => 'q',
6677
        '%72' => 'r',
6678
        '%73' => 's',
6679
        '%74' => 't',
6680
        '%75' => 'u',
6681
        '%76' => 'v',
6682
        '%77' => 'w',
6683
        '%78' => 'x',
6684
        '%79' => 'y',
6685
        '%7A' => 'z',
6686
        '%7B' => '{',
6687
        '%7C' => '|',
6688
        '%7D' => '}',
6689
        '%7E' => '~',
6690
        '%7F' => '',
6691
        '%80' => '`',
6692
        '%81' => '',
6693
        '%82' => '‚',
6694
        '%83' => 'ƒ',
6695
        '%84' => '„',
6696
        '%85' => '…',
6697
        '%86' => '†',
6698
        '%87' => '‡',
6699
        '%88' => 'ˆ',
6700
        '%89' => '‰',
6701
        '%8A' => 'Š',
6702
        '%8B' => '‹',
6703
        '%8C' => 'Œ',
6704
        '%8D' => '',
6705
        '%8E' => 'Ž',
6706
        '%8F' => '',
6707
        '%90' => '',
6708
        '%91' => '‘',
6709
        '%92' => '’',
6710
        '%93' => '“',
6711
        '%94' => '”',
6712
        '%95' => '•',
6713
        '%96' => '–',
6714
        '%97' => '—',
6715
        '%98' => '˜',
6716
        '%99' => '™',
6717
        '%9A' => 'š',
6718
        '%9B' => '›',
6719
        '%9C' => 'œ',
6720
        '%9D' => '',
6721
        '%9E' => 'ž',
6722
        '%9F' => 'Ÿ',
6723
        '%A0' => '',
6724
        '%A1' => '¡',
6725
        '%A2' => '¢',
6726
        '%A3' => '£',
6727
        '%A4' => '¤',
6728
        '%A5' => '¥',
6729
        '%A6' => '¦',
6730
        '%A7' => '§',
6731
        '%A8' => '¨',
6732
        '%A9' => '©',
6733
        '%AA' => 'ª',
6734
        '%AB' => '«',
6735
        '%AC' => '¬',
6736
        '%AD' => '',
6737
        '%AE' => '®',
6738
        '%AF' => '¯',
6739
        '%B0' => '°',
6740
        '%B1' => '±',
6741
        '%B2' => '²',
6742
        '%B3' => '³',
6743
        '%B4' => '´',
6744
        '%B5' => 'µ',
6745
        '%B6' => '¶',
6746
        '%B7' => '·',
6747
        '%B8' => '¸',
6748
        '%B9' => '¹',
6749
        '%BA' => 'º',
6750
        '%BB' => '»',
6751
        '%BC' => '¼',
6752
        '%BD' => '½',
6753
        '%BE' => '¾',
6754
        '%BF' => '¿',
6755
        '%C0' => 'À',
6756
        '%C1' => 'Á',
6757
        '%C2' => 'Â',
6758
        '%C3' => 'Ã',
6759
        '%C4' => 'Ä',
6760
        '%C5' => 'Å',
6761
        '%C6' => 'Æ',
6762
        '%C7' => 'Ç',
6763
        '%C8' => 'È',
6764
        '%C9' => 'É',
6765
        '%CA' => 'Ê',
6766
        '%CB' => 'Ë',
6767
        '%CC' => 'Ì',
6768
        '%CD' => 'Í',
6769
        '%CE' => 'Î',
6770
        '%CF' => 'Ï',
6771
        '%D0' => 'Ð',
6772
        '%D1' => 'Ñ',
6773
        '%D2' => 'Ò',
6774
        '%D3' => 'Ó',
6775
        '%D4' => 'Ô',
6776
        '%D5' => 'Õ',
6777
        '%D6' => 'Ö',
6778
        '%D7' => '×',
6779
        '%D8' => 'Ø',
6780
        '%D9' => 'Ù',
6781
        '%DA' => 'Ú',
6782
        '%DB' => 'Û',
6783
        '%DC' => 'Ü',
6784
        '%DD' => 'Ý',
6785
        '%DE' => 'Þ',
6786
        '%DF' => 'ß',
6787
        '%E0' => 'à',
6788
        '%E1' => 'á',
6789
        '%E2' => 'â',
6790
        '%E3' => 'ã',
6791
        '%E4' => 'ä',
6792
        '%E5' => 'å',
6793
        '%E6' => 'æ',
6794
        '%E7' => 'ç',
6795
        '%E8' => 'è',
6796
        '%E9' => 'é',
6797
        '%EA' => 'ê',
6798
        '%EB' => 'ë',
6799
        '%EC' => 'ì',
6800
        '%ED' => 'í',
6801
        '%EE' => 'î',
6802
        '%EF' => 'ï',
6803
        '%F0' => 'ð',
6804
        '%F1' => 'ñ',
6805
        '%F2' => 'ò',
6806
        '%F3' => 'ó',
6807
        '%F4' => 'ô',
6808
        '%F5' => 'õ',
6809
        '%F6' => 'ö',
6810
        '%F7' => '÷',
6811
        '%F8' => 'ø',
6812
        '%F9' => 'ù',
6813
        '%FA' => 'ú',
6814
        '%FB' => 'û',
6815
        '%FC' => 'ü',
6816
        '%FD' => 'ý',
6817
        '%FE' => 'þ',
6818
        '%FF' => 'ÿ',
6819 1
    );
6820
6821 1
    return $array;
6822
  }
6823
6824
  /**
6825
   * Decodes an UTF-8 string to ISO-8859-1.
6826
   *
6827
   * @param string $str
6828
   *
6829
   * @return string
6830
   */
6831 6
  public static function utf8_decode($str)
6832
  {
6833 6
    static $utf8ToWin1252Keys = null;
6834 6
    static $utf8ToWin1252Values = null;
6835
6836 6
    $str = (string)$str;
6837
6838 6
    if (!isset($str[0])) {
6839 3
      return '';
6840
    }
6841
6842
    // init
6843 6
    self::checkForSupport();
6844
6845 6
    $str = self::to_utf8($str);
6846
6847 6
    if ($utf8ToWin1252Keys === null) {
6848 1
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6849 1
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6850 1
    }
6851
6852 6
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6853
  }
6854
6855
  /**
6856
   * Encodes an ISO-8859-1 string to UTF-8.
6857
   *
6858
   * @param string $str
6859
   *
6860
   * @return string
6861
   */
6862 6
  public static function utf8_encode($str)
6863
  {
6864 6
    $str = \utf8_encode($str);
6865
6866 6
    if (false === strpos($str, "\xC2")) {
6867 6
      return $str;
6868
    } else {
6869
6870 5
      static $cp1252ToUtf8Keys = null;
6871 5
      static $cp1252ToUtf8Values = null;
6872
6873 5
      if ($cp1252ToUtf8Keys === null) {
6874 1
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6875 1
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6876 1
      }
6877
6878 5
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6879
    }
6880
  }
6881
6882
  /**
6883
   * fix -> utf8-win1252 chars
6884
   *
6885
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6886
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6887
   * See: http://en.wikipedia.org/wiki/Windows-1252
6888
   *
6889
   * @deprecated use "UTF8::fix_simple_utf8()"
6890
   *
6891
   * @param   string $str
6892
   *
6893
   * @return  string
6894
   */
6895
  public static function utf8_fix_win1252_chars($str)
6896
  {
6897
    return self::fix_simple_utf8($str);
6898
  }
6899
6900
  /**
6901
   * Returns an array with all utf8 whitespace characters.
6902
   *
6903
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6904
   *
6905
   * @author: Derek E. [email protected]
6906
   *
6907
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6908
   *         as defined in above URL
6909
   */
6910 1
  public static function whitespace_table()
6911
  {
6912 1
    return self::$whitespaceTable;
6913
  }
6914
6915
  /**
6916
   * Limit the number of words in a string.
6917
   *
6918
   * @param  string $str
6919
   * @param  int    $words
6920
   * @param  string $strAddOn
6921
   *
6922
   * @return string
6923
   */
6924 1
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6925
  {
6926 1
    $str = (string)$str;
6927
6928 1
    if (!isset($str[0])) {
6929 1
      return '';
6930
    }
6931
6932 1
    $words = (int)$words;
6933
6934 1
    if ($words < 1) {
6935 1
      return '';
6936
    }
6937
6938 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6939
6940
    if (
6941 1
        !isset($matches[0])
6942 1
        ||
6943 1
        self::strlen($str) === self::strlen($matches[0])
6944 1
    ) {
6945 1
      return $str;
6946
    }
6947
6948 1
    return self::rtrim($matches[0]) . $strAddOn;
6949
  }
6950
6951
  /**
6952
   * Wraps a string to a given number of characters
6953
   *
6954
   * @link  http://php.net/manual/en/function.wordwrap.php
6955
   *
6956
   * @param string $str   <p>
6957
   *                      The input string.
6958
   *                      </p>
6959
   * @param int    $width [optional] <p>
6960
   *                      The column width.
6961
   *                      </p>
6962
   * @param string $break [optional] <p>
6963
   *                      The line is broken using the optional
6964
   *                      break parameter.
6965
   *                      </p>
6966
   * @param bool   $cut   [optional] <p>
6967
   *                      If the cut is set to true, the string is
6968
   *                      always wrapped at or before the specified width. So if you have
6969
   *                      a word that is larger than the given width, it is broken apart.
6970
   *                      (See second example).
6971
   *                      </p>
6972
   *
6973
   * @return string the given string wrapped at the specified column.
6974
   */
6975 9
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6976
  {
6977 9
    $str = (string)$str;
6978 9
    $break = (string)$break;
6979
6980 9
    if (!isset($str[0], $break[0])) {
6981 2
      return '';
6982
    }
6983
6984 8
    $w = '';
6985 8
    $strSplit = explode($break, $str);
6986 8
    $count = count($strSplit);
6987
6988 8
    if (1 === $count && '' === $strSplit[0]) {
6989
      return '';
6990
    }
6991
6992 8
    $chars = array();
6993
    /** @noinspection ForeachInvariantsInspection */
6994 8
    for ($i = 0; $i < $count; ++$i) {
6995
6996 8
      if ($i) {
6997 1
        $chars[] = $break;
6998 1
        $w .= '#';
6999 1
      }
7000
7001 8
      $c = $strSplit[$i];
7002 8
      unset($strSplit[$i]);
7003
7004 8
      foreach (self::split($c) as $c) {
7005 8
        $chars[] = $c;
7006 8
        $w .= ' ' === $c ? ' ' : '?';
7007 8
      }
7008 8
    }
7009
7010 8
    $strReturn = '';
7011 8
    $j = 0;
7012 8
    $b = $i = -1;
7013 8
    $w = wordwrap($w, $width, '#', $cut);
7014
7015 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7016 6
      for (++$i; $i < $b; ++$i) {
7017 6
        $strReturn .= $chars[$j];
7018 6
        unset($chars[$j++]);
7019 6
      }
7020
7021 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7022 3
        unset($chars[$j++]);
7023 3
      }
7024
7025 6
      $strReturn .= $break;
7026 6
    }
7027
7028 8
    return $strReturn . implode('', $chars);
7029
  }
7030
7031
  /**
7032
   * Returns an array of Unicode White Space characters.
7033
   *
7034
   * @return   array An array with numeric code point as key and White Space Character as value.
7035
   */
7036 1
  public static function ws()
7037
  {
7038 1
    return self::$whitespace;
7039
  }
7040
7041
}
7042