Completed
Push — master ( d1c37c...590746 )
by Lars
03:20
created

UTF8::isBinary()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  protected static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  protected static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  protected static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  protected static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  protected static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  protected static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  protected static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  protected static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790 1
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792 1
      'ISO-IR-230',
793 1
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803 1
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809
    self::checkForSupport();
810
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820
  public static function access($str, $pos)
821
  {
822
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833 2
   */
834
  public static function add_bom_to_string($str)
835 2
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Convert binary into an string.
845
   *
846 1
   * @param mixed $bin 1|0
847
   *
848 1
   * @return string
849
   */
850
  public static function binary_to_str($bin)
851
  {
852
    return pack('H*', base_convert($bin, 2, 16));
853
  }
854
855
  /**
856
   * Returns the UTF-8 Byte Order Mark Character.
857
   *
858
   * @return string UTF-8 Byte Order Mark
859
   */
860
  public static function bom()
861
  {
862
    return "\xEF\xBB\xBF";
863
  }
864
865
  /**
866
   * @alias of UTF8::chr_map()
867
   * @see   UTF8::chr_map()
868
   *
869
   * @param string|array $callback
870
   * @param string       $str
871
   *
872
   * @return array
873
   */
874
  public static function callback($callback, $str)
875
  {
876
    return self::chr_map($callback, $str);
877
  }
878
879
  /**
880
   * Returns an array of all lower and upper case UTF-8 encoded characters.
881
   *
882
   * @return   string An array with lower case chars as keys and upper chars as values.
883
   */
884
  protected static function case_table()
885
  {
886
    static $case = array(
887
888
      // lower => upper
889
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
890
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
891
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
892
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
893
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
894
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
895
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
896
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
897
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
898
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
899
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
900
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
901
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
902
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
903
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
904
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
905
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
906
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
907
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
908
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
909
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
910
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
911
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
912
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
913
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
914
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
915
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
916
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
917
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
918
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
919
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
920
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
921
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
922
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
923
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
924
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
925
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
926
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
927
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
928
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
929
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
930
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
931
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
932
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
933
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
934
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
935
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
936
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
937
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
938
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
939
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
940
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
941
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
942
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
943
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
944
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
945
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
946
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
947
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
948
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
949
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
950
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
951
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
952
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
953
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
954
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
955
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
956
      "\xea\x9e\x87"     => "\xea\x9e\x86",
957
      "\xea\x9e\x85"     => "\xea\x9e\x84",
958
      "\xea\x9e\x83"     => "\xea\x9e\x82",
959
      "\xea\x9e\x81"     => "\xea\x9e\x80",
960
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
961
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
962
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
963
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
964
      "\xea\x9d\xad"     => "\xea\x9d\xac",
965
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
966
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
967
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
968
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
969
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
970
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
971
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
972
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
973
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
974
      "\xea\x9d\x99"     => "\xea\x9d\x98",
975
      "\xea\x9d\x97"     => "\xea\x9d\x96",
976
      "\xea\x9d\x95"     => "\xea\x9d\x94",
977
      "\xea\x9d\x93"     => "\xea\x9d\x92",
978
      "\xea\x9d\x91"     => "\xea\x9d\x90",
979
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
980
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
981
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
982
      "\xea\x9d\x89"     => "\xea\x9d\x88",
983
      "\xea\x9d\x87"     => "\xea\x9d\x86",
984
      "\xea\x9d\x85"     => "\xea\x9d\x84",
985
      "\xea\x9d\x83"     => "\xea\x9d\x82",
986
      "\xea\x9d\x81"     => "\xea\x9d\x80",
987
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
988
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
989
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
990
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
991
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
992
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
993
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
994
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
995
      "\xea\x9c\xad"     => "\xea\x9c\xac",
996
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
997
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
998
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
999
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
1000
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
1001
      "\xea\x9a\x97"     => "\xea\x9a\x96",
1002
      "\xea\x9a\x95"     => "\xea\x9a\x94",
1003
      "\xea\x9a\x93"     => "\xea\x9a\x92",
1004
      "\xea\x9a\x91"     => "\xea\x9a\x90",
1005
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
1006
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
1007
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
1008
      "\xea\x9a\x89"     => "\xea\x9a\x88",
1009
      "\xea\x9a\x87"     => "\xea\x9a\x86",
1010
      "\xea\x9a\x85"     => "\xea\x9a\x84",
1011
      "\xea\x9a\x83"     => "\xea\x9a\x82",
1012
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1013
      "\xea\x99\xad"     => "\xea\x99\xac",
1014
      "\xea\x99\xab"     => "\xea\x99\xaa",
1015
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1016
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1017
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1018
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1019
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1020
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1021
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1022
      "\xea\x99\x99"     => "\xea\x99\x98",
1023
      "\xea\x99\x97"     => "\xea\x99\x96",
1024
      "\xea\x99\x95"     => "\xea\x99\x94",
1025
      "\xea\x99\x93"     => "\xea\x99\x92",
1026
      "\xea\x99\x91"     => "\xea\x99\x90",
1027
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1028
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1029
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1030
      "\xea\x99\x89"     => "\xea\x99\x88",
1031
      "\xea\x99\x87"     => "\xea\x99\x86",
1032
      "\xea\x99\x85"     => "\xea\x99\x84",
1033
      "\xea\x99\x83"     => "\xea\x99\x82",
1034
      "\xea\x99\x81"     => "\xea\x99\x80",
1035
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1036
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1037
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1038
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1039
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1040
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1041
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1042
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1043
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1044
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1045
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1046
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1047
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1048
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1049
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1050
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1051
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1052
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1053
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1054
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1055
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1056
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1057
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1058
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1059
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1060
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1061
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1062
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1063
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1064
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1065
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1066
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1067
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1068
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1069
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1070
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1071
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1072
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1073
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1074
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1075
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1076
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1077
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1078
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1079
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1080
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1081
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1082
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1083
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1084
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1085
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1086
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1087
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1088
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1089
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1090
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1091
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1092
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1093
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1094
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1095
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1096
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1097
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1098
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1099
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1100
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1101
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1102
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1103
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1104
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1105
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1106
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1107
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1108
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1109
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1110
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1111
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1112
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1113
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1114
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1115
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1116
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1117
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1118
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1119
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1120
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1121
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1122
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1123
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1124
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1125
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1126
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1127
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1128
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1129
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1130
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1131
      "\xe2\xb1\xa5"     => "\xc8\xba",
1132
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1133
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1134
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1135
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1136
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1137
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1138
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1139
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1140
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1141
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1142
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1143
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1144
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1145
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1146
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1147
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1148
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1149
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1150
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1151
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1152
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1153
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1154
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1155
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1156
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1157
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1158
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1159
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1160
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1161
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1162
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1163
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1164
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1165
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1166
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1167
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1168
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1169
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1170
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1171
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1172
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1173
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1174
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1175
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1176
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1177
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1178
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1179
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1180
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1181
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1182
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1183
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1184
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1185
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1186
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1187
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1188
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1189
      "\xe1\xbe\xbe"     => "\xce\x99",
1190
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1191
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1192
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1193
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1194
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1195
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1196
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1197
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1198
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1199
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1200
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1201
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1202
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1203
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1204
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1205
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1206
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1207
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1208
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1209
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1210
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1211
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1212
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1213
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1214
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1215
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1216
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1217
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1218
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1219
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1220
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1221
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1222
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1223
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1224
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1225
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1226
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1227
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1228
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1229
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1230
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1231
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1232
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1233
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1234
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1235
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1236
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1237
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1238
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1239
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1240
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1241
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1242
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1243
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1244
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1245
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1246
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1247
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1248
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1249
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1250
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1251
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1252
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1253
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1254
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1255
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1256
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1257
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1258
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1259
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1260
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1261
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1262
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1263
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1264
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1265
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1266
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1267
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1268
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1269
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1270
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1271
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1272
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1273
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1274
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1275
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1276
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1277
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1278
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1279
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1280
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1281
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1282
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1283
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1284
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1285
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1286
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1287
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1288
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1289
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1290
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1291
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1292
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1293
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1294
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1295
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1296
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1297
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1298
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1299
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1300
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1301
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1302
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1303
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1304
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1305
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1306
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1307
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1308
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1309
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1310
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1311
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1312
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1313
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1314
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1315
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1316
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1317
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1318
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1319
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1320
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1321
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1322
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1323
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1324
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1325
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1326
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1327
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1328
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1329
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1330
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1331
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1332
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1333
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1334
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1335
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1336
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1337
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1338
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1339
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1340
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1341
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1342
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1343
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1344
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1345
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1346
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1347
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1348
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1349
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1350
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1351
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1352
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1353
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1354
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1355
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1356
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1357
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1358
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1359
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1360
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1361
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1362
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1363
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1364
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1365
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1366
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1367
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1368
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1369
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1370
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1371
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1372
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1373
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1374
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1375
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1376
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1377
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1378
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1379
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1380
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1381
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1382
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1383
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1384
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1385
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1386
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1387
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1388
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1389
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1390
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1391
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1392
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1393
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1394
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1395
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1396
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1397
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1398
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1399
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1400
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1401
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1402
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1403
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1404
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1405
      "\xd6\x86"         => "\xd5\x96",
1406
      "\xd6\x85"         => "\xd5\x95",
1407
      "\xd6\x84"         => "\xd5\x94",
1408
      "\xd6\x83"         => "\xd5\x93",
1409
      "\xd6\x82"         => "\xd5\x92",
1410
      "\xd6\x81"         => "\xd5\x91",
1411
      "\xd6\x80"         => "\xd5\x90",
1412
      "\xd5\xbf"         => "\xd5\x8f",
1413
      "\xd5\xbe"         => "\xd5\x8e",
1414
      "\xd5\xbd"         => "\xd5\x8d",
1415
      "\xd5\xbc"         => "\xd5\x8c",
1416
      "\xd5\xbb"         => "\xd5\x8b",
1417
      "\xd5\xba"         => "\xd5\x8a",
1418
      "\xd5\xb9"         => "\xd5\x89",
1419
      "\xd5\xb8"         => "\xd5\x88",
1420
      "\xd5\xb7"         => "\xd5\x87",
1421
      "\xd5\xb6"         => "\xd5\x86",
1422
      "\xd5\xb5"         => "\xd5\x85",
1423
      "\xd5\xb4"         => "\xd5\x84",
1424
      "\xd5\xb3"         => "\xd5\x83",
1425
      "\xd5\xb2"         => "\xd5\x82",
1426
      "\xd5\xb1"         => "\xd5\x81",
1427
      "\xd5\xb0"         => "\xd5\x80",
1428
      "\xd5\xaf"         => "\xd4\xbf",
1429
      "\xd5\xae"         => "\xd4\xbe",
1430
      "\xd5\xad"         => "\xd4\xbd",
1431
      "\xd5\xac"         => "\xd4\xbc",
1432
      "\xd5\xab"         => "\xd4\xbb",
1433
      "\xd5\xaa"         => "\xd4\xba",
1434
      "\xd5\xa9"         => "\xd4\xb9",
1435
      "\xd5\xa8"         => "\xd4\xb8",
1436
      "\xd5\xa7"         => "\xd4\xb7",
1437
      "\xd5\xa6"         => "\xd4\xb6",
1438
      "\xd5\xa5"         => "\xd4\xb5",
1439
      "\xd5\xa4"         => "\xd4\xb4",
1440
      "\xd5\xa3"         => "\xd4\xb3",
1441
      "\xd5\xa2"         => "\xd4\xb2",
1442
      "\xd5\xa1"         => "\xd4\xb1",
1443
      "\xd4\xa5"         => "\xd4\xa4",
1444
      "\xd4\xa3"         => "\xd4\xa2",
1445
      "\xd4\xa1"         => "\xd4\xa0",
1446
      "\xd4\x9f"         => "\xd4\x9e",
1447
      "\xd4\x9d"         => "\xd4\x9c",
1448
      "\xd4\x9b"         => "\xd4\x9a",
1449
      "\xd4\x99"         => "\xd4\x98",
1450
      "\xd4\x97"         => "\xd4\x96",
1451
      "\xd4\x95"         => "\xd4\x94",
1452
      "\xd4\x93"         => "\xd4\x92",
1453
      "\xd4\x91"         => "\xd4\x90",
1454
      "\xd4\x8f"         => "\xd4\x8e",
1455
      "\xd4\x8d"         => "\xd4\x8c",
1456
      "\xd4\x8b"         => "\xd4\x8a",
1457
      "\xd4\x89"         => "\xd4\x88",
1458
      "\xd4\x87"         => "\xd4\x86",
1459
      "\xd4\x85"         => "\xd4\x84",
1460
      "\xd4\x83"         => "\xd4\x82",
1461
      "\xd4\x81"         => "\xd4\x80",
1462
      "\xd3\xbf"         => "\xd3\xbe",
1463
      "\xd3\xbd"         => "\xd3\xbc",
1464
      "\xd3\xbb"         => "\xd3\xba",
1465
      "\xd3\xb9"         => "\xd3\xb8",
1466
      "\xd3\xb7"         => "\xd3\xb6",
1467
      "\xd3\xb5"         => "\xd3\xb4",
1468
      "\xd3\xb3"         => "\xd3\xb2",
1469
      "\xd3\xb1"         => "\xd3\xb0",
1470
      "\xd3\xaf"         => "\xd3\xae",
1471
      "\xd3\xad"         => "\xd3\xac",
1472
      "\xd3\xab"         => "\xd3\xaa",
1473
      "\xd3\xa9"         => "\xd3\xa8",
1474
      "\xd3\xa7"         => "\xd3\xa6",
1475
      "\xd3\xa5"         => "\xd3\xa4",
1476
      "\xd3\xa3"         => "\xd3\xa2",
1477
      "\xd3\xa1"         => "\xd3\xa0",
1478
      "\xd3\x9f"         => "\xd3\x9e",
1479
      "\xd3\x9d"         => "\xd3\x9c",
1480
      "\xd3\x9b"         => "\xd3\x9a",
1481
      "\xd3\x99"         => "\xd3\x98",
1482
      "\xd3\x97"         => "\xd3\x96",
1483
      "\xd3\x95"         => "\xd3\x94",
1484
      "\xd3\x93"         => "\xd3\x92",
1485
      "\xd3\x91"         => "\xd3\x90",
1486
      "\xd3\x8f"         => "\xd3\x80",
1487
      "\xd3\x8e"         => "\xd3\x8d",
1488
      "\xd3\x8c"         => "\xd3\x8b",
1489
      "\xd3\x8a"         => "\xd3\x89",
1490
      "\xd3\x88"         => "\xd3\x87",
1491
      "\xd3\x86"         => "\xd3\x85",
1492
      "\xd3\x84"         => "\xd3\x83",
1493
      "\xd3\x82"         => "\xd3\x81",
1494
      "\xd2\xbf"         => "\xd2\xbe",
1495
      "\xd2\xbd"         => "\xd2\xbc",
1496
      "\xd2\xbb"         => "\xd2\xba",
1497
      "\xd2\xb9"         => "\xd2\xb8",
1498
      "\xd2\xb7"         => "\xd2\xb6",
1499
      "\xd2\xb5"         => "\xd2\xb4",
1500
      "\xd2\xb3"         => "\xd2\xb2",
1501
      "\xd2\xb1"         => "\xd2\xb0",
1502
      "\xd2\xaf"         => "\xd2\xae",
1503
      "\xd2\xad"         => "\xd2\xac",
1504
      "\xd2\xab"         => "\xd2\xaa",
1505
      "\xd2\xa9"         => "\xd2\xa8",
1506
      "\xd2\xa7"         => "\xd2\xa6",
1507
      "\xd2\xa5"         => "\xd2\xa4",
1508
      "\xd2\xa3"         => "\xd2\xa2",
1509
      "\xd2\xa1"         => "\xd2\xa0",
1510
      "\xd2\x9f"         => "\xd2\x9e",
1511
      "\xd2\x9d"         => "\xd2\x9c",
1512
      "\xd2\x9b"         => "\xd2\x9a",
1513
      "\xd2\x99"         => "\xd2\x98",
1514
      "\xd2\x97"         => "\xd2\x96",
1515
      "\xd2\x95"         => "\xd2\x94",
1516
      "\xd2\x93"         => "\xd2\x92",
1517
      "\xd2\x91"         => "\xd2\x90",
1518
      "\xd2\x8f"         => "\xd2\x8e",
1519
      "\xd2\x8d"         => "\xd2\x8c",
1520
      "\xd2\x8b"         => "\xd2\x8a",
1521
      "\xd2\x81"         => "\xd2\x80",
1522
      "\xd1\xbf"         => "\xd1\xbe",
1523
      "\xd1\xbd"         => "\xd1\xbc",
1524
      "\xd1\xbb"         => "\xd1\xba",
1525
      "\xd1\xb9"         => "\xd1\xb8",
1526
      "\xd1\xb7"         => "\xd1\xb6",
1527
      "\xd1\xb5"         => "\xd1\xb4",
1528
      "\xd1\xb3"         => "\xd1\xb2",
1529
      "\xd1\xb1"         => "\xd1\xb0",
1530
      "\xd1\xaf"         => "\xd1\xae",
1531
      "\xd1\xad"         => "\xd1\xac",
1532
      "\xd1\xab"         => "\xd1\xaa",
1533
      "\xd1\xa9"         => "\xd1\xa8",
1534
      "\xd1\xa7"         => "\xd1\xa6",
1535
      "\xd1\xa5"         => "\xd1\xa4",
1536
      "\xd1\xa3"         => "\xd1\xa2",
1537
      "\xd1\xa1"         => "\xd1\xa0",
1538
      "\xd1\x9f"         => "\xd0\x8f",
1539
      "\xd1\x9e"         => "\xd0\x8e",
1540
      "\xd1\x9d"         => "\xd0\x8d",
1541
      "\xd1\x9c"         => "\xd0\x8c",
1542
      "\xd1\x9b"         => "\xd0\x8b",
1543
      "\xd1\x9a"         => "\xd0\x8a",
1544
      "\xd1\x99"         => "\xd0\x89",
1545
      "\xd1\x98"         => "\xd0\x88",
1546
      "\xd1\x97"         => "\xd0\x87",
1547
      "\xd1\x96"         => "\xd0\x86",
1548
      "\xd1\x95"         => "\xd0\x85",
1549
      "\xd1\x94"         => "\xd0\x84",
1550
      "\xd1\x93"         => "\xd0\x83",
1551
      "\xd1\x92"         => "\xd0\x82",
1552
      "\xd1\x91"         => "\xd0\x81",
1553
      "\xd1\x90"         => "\xd0\x80",
1554
      "\xd1\x8f"         => "\xd0\xaf",
1555
      "\xd1\x8e"         => "\xd0\xae",
1556
      "\xd1\x8d"         => "\xd0\xad",
1557
      "\xd1\x8c"         => "\xd0\xac",
1558
      "\xd1\x8b"         => "\xd0\xab",
1559
      "\xd1\x8a"         => "\xd0\xaa",
1560
      "\xd1\x89"         => "\xd0\xa9",
1561
      "\xd1\x88"         => "\xd0\xa8",
1562
      "\xd1\x87"         => "\xd0\xa7",
1563
      "\xd1\x86"         => "\xd0\xa6",
1564
      "\xd1\x85"         => "\xd0\xa5",
1565
      "\xd1\x84"         => "\xd0\xa4",
1566
      "\xd1\x83"         => "\xd0\xa3",
1567
      "\xd1\x82"         => "\xd0\xa2",
1568
      "\xd1\x81"         => "\xd0\xa1",
1569
      "\xd1\x80"         => "\xd0\xa0",
1570
      "\xd0\xbf"         => "\xd0\x9f",
1571
      "\xd0\xbe"         => "\xd0\x9e",
1572
      "\xd0\xbd"         => "\xd0\x9d",
1573
      "\xd0\xbc"         => "\xd0\x9c",
1574
      "\xd0\xbb"         => "\xd0\x9b",
1575
      "\xd0\xba"         => "\xd0\x9a",
1576
      "\xd0\xb9"         => "\xd0\x99",
1577
      "\xd0\xb8"         => "\xd0\x98",
1578
      "\xd0\xb7"         => "\xd0\x97",
1579
      "\xd0\xb6"         => "\xd0\x96",
1580
      "\xd0\xb5"         => "\xd0\x95",
1581
      "\xd0\xb4"         => "\xd0\x94",
1582
      "\xd0\xb3"         => "\xd0\x93",
1583
      "\xd0\xb2"         => "\xd0\x92",
1584
      "\xd0\xb1"         => "\xd0\x91",
1585
      "\xd0\xb0"         => "\xd0\x90",
1586
      "\xcf\xbb"         => "\xcf\xba",
1587
      "\xcf\xb8"         => "\xcf\xb7",
1588
      "\xcf\xb5"         => "\xce\x95",
1589
      "\xcf\xb2"         => "\xcf\xb9",
1590
      "\xcf\xb1"         => "\xce\xa1",
1591
      "\xcf\xb0"         => "\xce\x9a",
1592
      "\xcf\xaf"         => "\xcf\xae",
1593
      "\xcf\xad"         => "\xcf\xac",
1594
      "\xcf\xab"         => "\xcf\xaa",
1595
      "\xcf\xa9"         => "\xcf\xa8",
1596
      "\xcf\xa7"         => "\xcf\xa6",
1597
      "\xcf\xa5"         => "\xcf\xa4",
1598
      "\xcf\xa3"         => "\xcf\xa2",
1599
      "\xcf\xa1"         => "\xcf\xa0",
1600
      "\xcf\x9f"         => "\xcf\x9e",
1601
      "\xcf\x9d"         => "\xcf\x9c",
1602
      "\xcf\x9b"         => "\xcf\x9a",
1603
      "\xcf\x99"         => "\xcf\x98",
1604
      "\xcf\x97"         => "\xcf\x8f",
1605
      "\xcf\x96"         => "\xce\xa0",
1606
      "\xcf\x95"         => "\xce\xa6",
1607
      "\xcf\x91"         => "\xce\x98",
1608
      "\xcf\x90"         => "\xce\x92",
1609
      "\xcf\x8e"         => "\xce\x8f",
1610
      "\xcf\x8d"         => "\xce\x8e",
1611
      "\xcf\x8c"         => "\xce\x8c",
1612
      "\xcf\x8b"         => "\xce\xab",
1613
      "\xcf\x8a"         => "\xce\xaa",
1614
      "\xcf\x89"         => "\xce\xa9",
1615
      "\xcf\x88"         => "\xce\xa8",
1616
      "\xcf\x87"         => "\xce\xa7",
1617
      "\xcf\x86"         => "\xce\xa6",
1618
      "\xcf\x85"         => "\xce\xa5",
1619
      "\xcf\x84"         => "\xce\xa4",
1620
      "\xcf\x83"         => "\xce\xa3",
1621
      "\xcf\x82"         => "\xce\xa3",
1622
      "\xcf\x81"         => "\xce\xa1",
1623
      "\xcf\x80"         => "\xce\xa0",
1624
      "\xce\xbf"         => "\xce\x9f",
1625
      "\xce\xbe"         => "\xce\x9e",
1626
      "\xce\xbd"         => "\xce\x9d",
1627
      "\xce\xbc"         => "\xce\x9c",
1628
      "\xce\xbb"         => "\xce\x9b",
1629
      "\xce\xba"         => "\xce\x9a",
1630
      "\xce\xb9"         => "\xce\x99",
1631
      "\xce\xb8"         => "\xce\x98",
1632
      "\xce\xb7"         => "\xce\x97",
1633
      "\xce\xb6"         => "\xce\x96",
1634
      "\xce\xb5"         => "\xce\x95",
1635
      "\xce\xb4"         => "\xce\x94",
1636
      "\xce\xb3"         => "\xce\x93",
1637
      "\xce\xb2"         => "\xce\x92",
1638
      "\xce\xb1"         => "\xce\x91",
1639
      "\xce\xaf"         => "\xce\x8a",
1640
      "\xce\xae"         => "\xce\x89",
1641
      "\xce\xad"         => "\xce\x88",
1642
      "\xce\xac"         => "\xce\x86",
1643
      "\xcd\xbd"         => "\xcf\xbf",
1644
      "\xcd\xbc"         => "\xcf\xbe",
1645
      "\xcd\xbb"         => "\xcf\xbd",
1646
      "\xcd\xb7"         => "\xcd\xb6",
1647
      "\xcd\xb3"         => "\xcd\xb2",
1648
      "\xcd\xb1"         => "\xcd\xb0",
1649
      "\xca\x92"         => "\xc6\xb7",
1650
      "\xca\x8c"         => "\xc9\x85",
1651
      "\xca\x8b"         => "\xc6\xb2",
1652
      "\xca\x8a"         => "\xc6\xb1",
1653
      "\xca\x89"         => "\xc9\x84",
1654
      "\xca\x88"         => "\xc6\xae",
1655
      "\xca\x83"         => "\xc6\xa9",
1656
      "\xca\x80"         => "\xc6\xa6",
1657
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1658
      "\xc9\xb5"         => "\xc6\x9f",
1659
      "\xc9\xb2"         => "\xc6\x9d",
1660
      "\xc9\xb1"         => "\xe2\xb1\xae",
1661
      "\xc9\xaf"         => "\xc6\x9c",
1662
      "\xc9\xab"         => "\xe2\xb1\xa2",
1663
      "\xc9\xa9"         => "\xc6\x96",
1664
      "\xc9\xa8"         => "\xc6\x97",
1665
      "\xc9\xa5"         => "\xea\x9e\x8d",
1666
      "\xc9\xa3"         => "\xc6\x94",
1667
      "\xc9\xa0"         => "\xc6\x93",
1668
      "\xc9\x9b"         => "\xc6\x90",
1669
      "\xc9\x99"         => "\xc6\x8f",
1670
      "\xc9\x97"         => "\xc6\x8a",
1671
      "\xc9\x96"         => "\xc6\x89",
1672
      "\xc9\x94"         => "\xc6\x86",
1673
      "\xc9\x93"         => "\xc6\x81",
1674
      "\xc9\x92"         => "\xe2\xb1\xb0",
1675
      "\xc9\x91"         => "\xe2\xb1\xad",
1676
      "\xc9\x90"         => "\xe2\xb1\xaf",
1677
      "\xc9\x8f"         => "\xc9\x8e",
1678
      "\xc9\x8d"         => "\xc9\x8c",
1679
      "\xc9\x8b"         => "\xc9\x8a",
1680
      "\xc9\x89"         => "\xc9\x88",
1681
      "\xc9\x87"         => "\xc9\x86",
1682
      "\xc9\x82"         => "\xc9\x81",
1683
      "\xc9\x80"         => "\xe2\xb1\xbf",
1684
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1685
      "\xc8\xbc"         => "\xc8\xbb",
1686
      "\xc8\xb3"         => "\xc8\xb2",
1687
      "\xc8\xb1"         => "\xc8\xb0",
1688
      "\xc8\xaf"         => "\xc8\xae",
1689
      "\xc8\xad"         => "\xc8\xac",
1690
      "\xc8\xab"         => "\xc8\xaa",
1691
      "\xc8\xa9"         => "\xc8\xa8",
1692
      "\xc8\xa7"         => "\xc8\xa6",
1693
      "\xc8\xa5"         => "\xc8\xa4",
1694
      "\xc8\xa3"         => "\xc8\xa2",
1695
      "\xc8\x9f"         => "\xc8\x9e",
1696
      "\xc8\x9d"         => "\xc8\x9c",
1697
      "\xc8\x9b"         => "\xc8\x9a",
1698
      "\xc8\x99"         => "\xc8\x98",
1699
      "\xc8\x97"         => "\xc8\x96",
1700
      "\xc8\x95"         => "\xc8\x94",
1701
      "\xc8\x93"         => "\xc8\x92",
1702
      "\xc8\x91"         => "\xc8\x90",
1703
      "\xc8\x8f"         => "\xc8\x8e",
1704
      "\xc8\x8d"         => "\xc8\x8c",
1705
      "\xc8\x8b"         => "\xc8\x8a",
1706
      "\xc8\x89"         => "\xc8\x88",
1707
      "\xc8\x87"         => "\xc8\x86",
1708
      "\xc8\x85"         => "\xc8\x84",
1709
      "\xc8\x83"         => "\xc8\x82",
1710
      "\xc8\x81"         => "\xc8\x80",
1711
      "\xc7\xbf"         => "\xc7\xbe",
1712
      "\xc7\xbd"         => "\xc7\xbc",
1713
      "\xc7\xbb"         => "\xc7\xba",
1714
      "\xc7\xb9"         => "\xc7\xb8",
1715
      "\xc7\xb5"         => "\xc7\xb4",
1716
      "\xc7\xb3"         => "\xc7\xb2",
1717
      "\xc7\xaf"         => "\xc7\xae",
1718
      "\xc7\xad"         => "\xc7\xac",
1719
      "\xc7\xab"         => "\xc7\xaa",
1720
      "\xc7\xa9"         => "\xc7\xa8",
1721
      "\xc7\xa7"         => "\xc7\xa6",
1722
      "\xc7\xa5"         => "\xc7\xa4",
1723
      "\xc7\xa3"         => "\xc7\xa2",
1724
      "\xc7\xa1"         => "\xc7\xa0",
1725
      "\xc7\x9f"         => "\xc7\x9e",
1726
      "\xc7\x9d"         => "\xc6\x8e",
1727
      "\xc7\x9c"         => "\xc7\x9b",
1728
      "\xc7\x9a"         => "\xc7\x99",
1729
      "\xc7\x98"         => "\xc7\x97",
1730
      "\xc7\x96"         => "\xc7\x95",
1731
      "\xc7\x94"         => "\xc7\x93",
1732
      "\xc7\x92"         => "\xc7\x91",
1733
      "\xc7\x90"         => "\xc7\x8f",
1734
      "\xc7\x8e"         => "\xc7\x8d",
1735
      "\xc7\x8c"         => "\xc7\x8b",
1736
      "\xc7\x89"         => "\xc7\x88",
1737
      "\xc7\x86"         => "\xc7\x85",
1738
      "\xc6\xbf"         => "\xc7\xb7",
1739
      "\xc6\xbd"         => "\xc6\xbc",
1740
      "\xc6\xb9"         => "\xc6\xb8",
1741
      "\xc6\xb6"         => "\xc6\xb5",
1742
      "\xc6\xb4"         => "\xc6\xb3",
1743
      "\xc6\xb0"         => "\xc6\xaf",
1744
      "\xc6\xad"         => "\xc6\xac",
1745
      "\xc6\xa8"         => "\xc6\xa7",
1746
      "\xc6\xa5"         => "\xc6\xa4",
1747
      "\xc6\xa3"         => "\xc6\xa2",
1748
      "\xc6\xa1"         => "\xc6\xa0",
1749
      "\xc6\x9e"         => "\xc8\xa0",
1750
      "\xc6\x9a"         => "\xc8\xbd",
1751
      "\xc6\x99"         => "\xc6\x98",
1752
      "\xc6\x95"         => "\xc7\xb6",
1753
      "\xc6\x92"         => "\xc6\x91",
1754
      "\xc6\x8c"         => "\xc6\x8b",
1755
      "\xc6\x88"         => "\xc6\x87",
1756
      "\xc6\x85"         => "\xc6\x84",
1757
      "\xc6\x83"         => "\xc6\x82",
1758
      "\xc6\x80"         => "\xc9\x83",
1759
      "\xc5\xbf"         => "\x53",
1760
      "\xc5\xbe"         => "\xc5\xbd",
1761
      "\xc5\xbc"         => "\xc5\xbb",
1762
      "\xc5\xba"         => "\xc5\xb9",
1763
      "\xc5\xb7"         => "\xc5\xb6",
1764
      "\xc5\xb5"         => "\xc5\xb4",
1765
      "\xc5\xb3"         => "\xc5\xb2",
1766
      "\xc5\xb1"         => "\xc5\xb0",
1767
      "\xc5\xaf"         => "\xc5\xae",
1768
      "\xc5\xad"         => "\xc5\xac",
1769
      "\xc5\xab"         => "\xc5\xaa",
1770
      "\xc5\xa9"         => "\xc5\xa8",
1771
      "\xc5\xa7"         => "\xc5\xa6",
1772
      "\xc5\xa5"         => "\xc5\xa4",
1773
      "\xc5\xa3"         => "\xc5\xa2",
1774
      "\xc5\xa1"         => "\xc5\xa0",
1775
      "\xc5\x9f"         => "\xc5\x9e",
1776
      "\xc5\x9d"         => "\xc5\x9c",
1777
      "\xc5\x9b"         => "\xc5\x9a",
1778
      "\xc5\x99"         => "\xc5\x98",
1779
      "\xc5\x97"         => "\xc5\x96",
1780
      "\xc5\x95"         => "\xc5\x94",
1781
      "\xc5\x93"         => "\xc5\x92",
1782
      "\xc5\x91"         => "\xc5\x90",
1783
      "\xc5\x8f"         => "\xc5\x8e",
1784
      "\xc5\x8d"         => "\xc5\x8c",
1785
      "\xc5\x8b"         => "\xc5\x8a",
1786
      "\xc5\x88"         => "\xc5\x87",
1787
      "\xc5\x86"         => "\xc5\x85",
1788
      "\xc5\x84"         => "\xc5\x83",
1789
      "\xc5\x82"         => "\xc5\x81",
1790
      "\xc5\x80"         => "\xc4\xbf",
1791
      "\xc4\xbe"         => "\xc4\xbd",
1792
      "\xc4\xbc"         => "\xc4\xbb",
1793
      "\xc4\xba"         => "\xc4\xb9",
1794
      "\xc4\xb7"         => "\xc4\xb6",
1795
      "\xc4\xb5"         => "\xc4\xb4",
1796
      "\xc4\xb3"         => "\xc4\xb2",
1797
      "\xc4\xb1"         => "\x49",
1798
      "\xc4\xaf"         => "\xc4\xae",
1799
      "\xc4\xad"         => "\xc4\xac",
1800
      "\xc4\xab"         => "\xc4\xaa",
1801
      "\xc4\xa9"         => "\xc4\xa8",
1802
      "\xc4\xa7"         => "\xc4\xa6",
1803
      "\xc4\xa5"         => "\xc4\xa4",
1804
      "\xc4\xa3"         => "\xc4\xa2",
1805
      "\xc4\xa1"         => "\xc4\xa0",
1806
      "\xc4\x9f"         => "\xc4\x9e",
1807
      "\xc4\x9d"         => "\xc4\x9c",
1808
      "\xc4\x9b"         => "\xc4\x9a",
1809
      "\xc4\x99"         => "\xc4\x98",
1810
      "\xc4\x97"         => "\xc4\x96",
1811
      "\xc4\x95"         => "\xc4\x94",
1812
      "\xc4\x93"         => "\xc4\x92",
1813
      "\xc4\x91"         => "\xc4\x90",
1814
      "\xc4\x8f"         => "\xc4\x8e",
1815
      "\xc4\x8d"         => "\xc4\x8c",
1816
      "\xc4\x8b"         => "\xc4\x8a",
1817
      "\xc4\x89"         => "\xc4\x88",
1818
      "\xc4\x87"         => "\xc4\x86",
1819
      "\xc4\x85"         => "\xc4\x84",
1820
      "\xc4\x83"         => "\xc4\x82",
1821
      "\xc4\x81"         => "\xc4\x80",
1822
      "\xc3\xbf"         => "\xc5\xb8",
1823
      "\xc3\xbe"         => "\xc3\x9e",
1824
      "\xc3\xbd"         => "\xc3\x9d",
1825
      "\xc3\xbc"         => "\xc3\x9c",
1826
      "\xc3\xbb"         => "\xc3\x9b",
1827
      "\xc3\xba"         => "\xc3\x9a",
1828
      "\xc3\xb9"         => "\xc3\x99",
1829
      "\xc3\xb8"         => "\xc3\x98",
1830
      "\xc3\xb6"         => "\xc3\x96",
1831
      "\xc3\xb5"         => "\xc3\x95",
1832
      "\xc3\xb4"         => "\xc3\x94",
1833
      "\xc3\xb3"         => "\xc3\x93",
1834
      "\xc3\xb2"         => "\xc3\x92",
1835
      "\xc3\xb1"         => "\xc3\x91",
1836
      "\xc3\xb0"         => "\xc3\x90",
1837
      "\xc3\xaf"         => "\xc3\x8f",
1838
      "\xc3\xae"         => "\xc3\x8e",
1839
      "\xc3\xad"         => "\xc3\x8d",
1840
      "\xc3\xac"         => "\xc3\x8c",
1841
      "\xc3\xab"         => "\xc3\x8b",
1842
      "\xc3\xaa"         => "\xc3\x8a",
1843
      "\xc3\xa9"         => "\xc3\x89",
1844
      "\xc3\xa8"         => "\xc3\x88",
1845
      "\xc3\xa7"         => "\xc3\x87",
1846
      "\xc3\xa6"         => "\xc3\x86",
1847
      "\xc3\xa5"         => "\xc3\x85",
1848
      "\xc3\xa4"         => "\xc3\x84",
1849
      "\xc3\xa3"         => "\xc3\x83",
1850
      "\xc3\xa2"         => "\xc3\x82",
1851
      "\xc3\xa1"         => "\xc3\x81",
1852
      "\xc3\xa0"         => "\xc3\x80",
1853
      "\xc2\xb5"         => "\xce\x9c",
1854
      "\x7a"             => "\x5a",
1855
      "\x79"             => "\x59",
1856
      "\x78"             => "\x58",
1857
      "\x77"             => "\x57",
1858
      "\x76"             => "\x56",
1859
      "\x75"             => "\x55",
1860
      "\x74"             => "\x54",
1861 157
      "\x73"             => "\x53",
1862
      "\x72"             => "\x52",
1863 157
      "\x71"             => "\x51",
1864
      "\x70"             => "\x50",
1865 1
      "\x6f"             => "\x4f",
1866 1
      "\x6e"             => "\x4e",
1867 1
      "\x6d"             => "\x4d",
1868 1
      "\x6c"             => "\x4c",
1869 1
      "\x6b"             => "\x4b",
1870 157
      "\x6a"             => "\x4a",
1871
      "\x69"             => "\x49",
1872
      "\x68"             => "\x48",
1873
      "\x67"             => "\x47",
1874
      "\x66"             => "\x46",
1875
      "\x65"             => "\x45",
1876
      "\x64"             => "\x44",
1877
      "\x63"             => "\x43",
1878
      "\x62"             => "\x42",
1879 8
      "\x61"             => "\x41",
1880
1881 8
    );
1882
1883 8
    return $case;
1884
  }
1885
1886
  /**
1887
   * This method will auto-detect your server environment for UTF-8 support.
1888
   *
1889
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1890 8
   */
1891
  public static function checkForSupport()
1892
  {
1893
    if (!isset(self::$support['mbstring'])) {
1894
1895
      self::$support['mbstring'] = self::mbstring_loaded();
1896
      self::$support['iconv'] = self::iconv_loaded();
1897
      self::$support['intl'] = self::intl_loaded();
1898
      self::$support['intlChar'] = self::intlChar_loaded();
1899
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1900
    }
1901
  }
1902 1
1903
  /**
1904 1
   * Generates a UTF-8 encoded character from the given code point.
1905
   *
1906 1
   * INFO: opposite to UTF8::ord()
1907
   *
1908
   * @param    int $code_point The code point for which to generate a character.
1909
   *
1910
   * @return   string|null Multi-Byte character, returns null on failure to encode.
1911
   */
1912
  public static function chr($code_point)
1913
  {
1914
    self::checkForSupport();
1915
1916
    $i = (int)$code_point;
1917
1918
    if (self::$support['intlChar'] === true) {
1919
      return \IntlChar::chr($code_point);
1920
    }
1921 2
1922
    if ($i !== $code_point) {
1923 2
      $i = self::hex_to_int($code_point);
1924 2
    }
1925
1926
    if (!$i) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $i of type integer|false is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1927 2
      return null;
1928
    }
1929
1930
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1931
  }
1932
1933
  /**
1934
   * Applies callback to all characters of a string.
1935
   *
1936
   * @param  string|array $callback The callback function.
1937 2
   * @param  string       $str      UTF-8 string to run callback on.
1938
   *
1939 2
   * @return array The outcome of callback.
1940 2
   */
1941 2
  public static function chr_map($callback, $str)
1942
  {
1943 2
    $chars = self::split($str);
1944
1945 2
    return array_map($callback, $chars);
1946
  }
1947
1948 2
  /**
1949
   * Generates an array of byte length of each character of a Unicode string.
1950 2
   *
1951 2
   * 1 byte => U+0000  - U+007F
1952 2
   * 2 byte => U+0080  - U+07FF
1953
   * 3 byte => U+0800  - U+FFFF
1954 1
   * 4 byte => U+10000 - U+10FFFF
1955 1
   *
1956 1
   * @param    string $str The original Unicode string.
1957
   *
1958
   * @return   array An array of byte lengths of each character.
1959
   */
1960
  public static function chr_size_list($str)
1961
  {
1962 2
    if (!$str) {
1963
      return array();
1964 2
    }
1965 2
1966
    return array_map('strlen', self::split($str));
1967 2
  }
1968
1969
  /**
1970
   * Get a decimal code representation of a specific character.
1971
   *
1972
   * @param   string $char The input character
1973
   *
1974
   * @return  int
1975
   */
1976
  public static function chr_to_decimal($char)
1977
  {
1978
    $char = (string)$char;
1979
    $code = self::ord($char[0]);
1980
    $bytes = 1;
1981
1982
    if (!($code & 0x80)) {
1983
      // 0xxxxxxx
1984
      return $code;
1985
    }
1986
1987
    if (($code & 0xe0) === 0xc0) {
1988
      // 110xxxxx
1989
      $bytes = 2;
1990
      $code &= ~0xc0;
1991
    } elseif (($code & 0xf0) === 0xe0) {
1992
      // 1110xxxx
1993 1
      $bytes = 3;
1994
      $code &= ~0xe0;
1995 1
    } elseif (($code & 0xf8) === 0xf0) {
1996
      // 11110xxx
1997
      $bytes = 4;
1998
      $code &= ~0xf0;
1999
    }
2000
2001
    for ($i = 2; $i <= $bytes; $i++) {
2002
      // 10xxxxxx
2003
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
2004
    }
2005
2006
    return $code;
2007
  }
2008
2009 35
  /**
2010
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
2011
   *
2012
   * @param    string $char The input character
2013
   * @param    string $pfix
2014
   *
2015
   * @return   string The code point encoded as U+xxxx
2016
   */
2017
  public static function chr_to_hex($char, $pfix = 'U+')
2018
  {
2019
    return self::int_to_hex(self::ord($char), $pfix);
2020
  }
2021
2022
  /**
2023
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2024 35
   *
2025 35
   * @param    string $body     The original string to be split.
2026
   * @param    int    $chunklen The maximum character length of a chunk.
2027 35
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2028 35
   *
2029
   * @return   string The chunked string
2030 35
   */
2031 7
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2032 7
  {
2033
    return implode($end, self::split($body, $chunklen));
2034 35
  }
2035 1
2036 1
  /**
2037
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2038 35
   *
2039 4
   * @param string $str                     The string to be sanitized.
2040 4
   * @param bool   $remove_bom
2041
   * @param bool   $normalize_whitespace
2042 35
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2043
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2044
   *
2045
   * @return string Clean UTF-8 encoded string
2046
   */
2047
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2048
  {
2049
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2050
    // caused connection reset problem on larger strings
2051
2052 3
    $regx = '/
2053
      (
2054 3
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2055
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2056 3
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2057 1
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2058
        ){1,100}                      # ...one or more times
2059
      )
2060
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2061 3
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2062
    /x';
2063
    $str = preg_replace($regx, '$1', $str);
2064
2065
    $str = self::replace_diamond_question_mark($str, '');
2066
    $str = self::remove_invisible_characters($str);
2067
2068 3
    if ($normalize_whitespace === true) {
2069
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2070 3
    }
2071
2072
    if ($normalize_msword === true) {
2073
      $str = self::normalize_msword($str);
2074
    }
2075
2076
    if ($remove_bom === true) {
2077
      $str = self::removeBOM($str);
2078
    }
2079
2080
    return $str;
2081
  }
2082 3
2083
  /**
2084 3
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2085 3
   *
2086 3
   * @param string $str
2087
   *
2088 3
   * @return string
2089
   */
2090 3
  public static function cleanup($str)
2091 3
  {
2092 3
    $str = (string)$str;
2093
2094 3
    if (!isset($str[0])) {
2095
      return '';
2096 3
    }
2097
2098
    // fixed ISO <-> UTF-8 Errors
2099
    $str = self::fix_simple_utf8($str);
2100
2101
    // remove all none UTF-8 symbols
2102
    // && remove diamond question mark (�)
2103
    // && remove remove invisible characters (e.g. "\0")
2104
    // && remove BOM
2105
    // && normalize whitespace chars (but keep non-breaking-spaces)
2106 3
    $str = self::clean($str, true, true, false, true);
2107
2108
    return (string)$str;
2109
  }
2110
2111
  /**
2112
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2113
   *
2114
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2115
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2116
   *                                    default, code points will be returned as integers.
2117 3
   *
2118
   * @return   array The array of code points
2119 3
   */
2120
  public static function codepoints($arg, $u_style = false)
2121 3
  {
2122
    if (is_string($arg)) {
2123 3
      $arg = self::split($arg);
2124
    }
2125
2126
    $arg = array_map(
2127
        array(
2128
            '\\voku\\helper\\UTF8',
2129
            'ord',
2130
        ),
2131
        $arg
2132
    );
2133 1
2134
    if ($u_style) {
2135 1
      $arg = array_map(
2136
          array(
2137 1
              '\\voku\\helper\\UTF8',
2138 1
              'int_to_hex',
2139 1
          ),
2140
          $arg
2141 1
      );
2142
    }
2143
2144
    return $arg;
2145
  }
2146
2147
  /**
2148
   * Returns count of characters used in a string.
2149
   *
2150
   * @param    string $str       The input string.
2151
   * @param    bool   $cleanUtf8 Clean non UTF-8 chars from the string.
2152
   *
2153
   * @return   array An associative array of Character as keys and
2154
   *           their count as values.
2155 11
   */
2156
  public static function count_chars($str, $cleanUtf8 = false)
2157 11
  {
2158
    return array_count_values(self::split($str, 1, $cleanUtf8));
2159 11
  }
2160 11
2161
  /**
2162
   * Get a UTF-8 character from its decimal code representation.
2163 1
   *
2164 1
   * @param   int $code Code.
2165
   *
2166
   * @return  string
2167
   */
2168
  public static function decimal_to_chr($code)
2169
  {
2170
    self::checkForSupport();
2171
2172
    return \mb_convert_encoding(
2173
        '&#x' . dechex($code) . ';',
2174
        'UTF-8',
2175
        'HTML-ENTITIES'
2176
    );
2177
  }
2178
2179
  /**
2180
   * Encode a string with a new charset-encoding.
2181
   *
2182
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2183
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2184
   *
2185
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2186
   * @param string $str      the string
2187
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2188
   *                         otherwise we auto-detect the current string-encoding
2189
   *
2190
   * @return string
2191
   */
2192
  public static function encode($encoding, $str, $force = true)
2193
  {
2194
    $str = (string)$str;
2195
    $encoding = (string)$encoding;
2196
2197
    if (!isset($str[0], $encoding[0])) {
2198
      return $str;
2199
    }
2200
2201
    $encoding = self::normalizeEncoding($encoding);
2202
    $encodingDetected = self::str_detect_encoding($str);
2203
2204
    if (
2205
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2206
        &&
2207
        (
2208
            $force === true
2209
            ||
2210
            $encodingDetected !== $encoding
2211
        )
2212
    ) {
2213
      self::checkForSupport();
2214
2215 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2216
          $encoding === 'UTF-8'
2217
          &&
2218
          (
2219
              $force === true
2220
              || $encodingDetected === 'UTF-8'
2221
              || $encodingDetected === 'WINDOWS-1252'
2222
              || $encodingDetected === 'ISO-8859-1'
2223
          )
2224
      ) {
2225
        return self::to_utf8($str);
2226
      }
2227
2228 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2229
          $encoding === 'ISO-8859-1'
2230
          &&
2231
          (
2232
              $force === true
2233
              || $encodingDetected === 'ISO-8859-1'
2234
              || $encodingDetected === 'UTF-8'
2235
          )
2236
      ) {
2237
        return self::to_win1252($str);
2238
      }
2239
2240
      $strEncoded = \mb_convert_encoding(
2241
          $str,
2242
          $encoding,
2243
          $encodingDetected
2244
      );
2245
2246
      if ($strEncoded) {
2247
        return $strEncoded;
2248
      }
2249
    }
2250
2251
    return $str;
2252 2
  }
2253
2254
  /**
2255 2
   * Reads entire file into a string.
2256 2
   *
2257
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2258 2
   *
2259 2
   * @link http://php.net/manual/en/function.file-get-contents.php
2260
   *
2261
   * @param string        $filename      <p>
2262
   *                                     Name of the file to read.
2263 2
   *                                     </p>
2264 2
   * @param int|null      $flags         [optional] <p>
2265
   *                                     Prior to PHP 6, this parameter is called
2266 2
   *                                     use_include_path and is a bool.
2267 2
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2268
   *                                     to trigger include path
2269 2
   *                                     search.
2270 1
   *                                     </p>
2271 1
   *                                     <p>
2272 2
   *                                     The value of flags can be any combination of
2273
   *                                     the following flags (with some restrictions), joined with the
2274
   *                                     binary OR (|)
2275
   *                                     operator.
2276 2
   *                                     </p>
2277
   *                                     <p>
2278
   *                                     <table>
2279
   *                                     Available flags
2280 2
   *                                     <tr valign="top">
2281 2
   *                                     <td>Flag</td>
2282
   *                                     <td>Description</td>
2283 2
   *                                     </tr>
2284
   *                                     <tr valign="top">
2285 2
   *                                     <td>
2286 1
   *                                     FILE_USE_INCLUDE_PATH
2287 1
   *                                     </td>
2288 1
   *                                     <td>
2289 1
   *                                     Search for filename in the include directory.
2290 1
   *                                     See include_path for more
2291 1
   *                                     information.
2292
   *                                     </td>
2293 2
   *                                     </tr>
2294 2
   *                                     <tr valign="top">
2295 2
   *                                     <td>
2296 2
   *                                     FILE_TEXT
2297
   *                                     </td>
2298
   *                                     <td>
2299 2
   *                                     As of PHP 6, the default encoding of the read
2300
   *                                     data is UTF-8. You can specify a different encoding by creating a
2301
   *                                     custom context or by changing the default using
2302
   *                                     stream_default_encoding. This flag cannot be
2303
   *                                     used with FILE_BINARY.
2304
   *                                     </td>
2305
   *                                     </tr>
2306
   *                                     <tr valign="top">
2307
   *                                     <td>
2308
   *                                     FILE_BINARY
2309 1
   *                                     </td>
2310
   *                                     <td>
2311 1
   *                                     With this flag, the file is read in binary mode. This is the default
2312
   *                                     setting and cannot be used with FILE_TEXT.
2313
   *                                     </td>
2314
   *                                     </tr>
2315
   *                                     </table>
2316
   *                                     </p>
2317
   * @param resource|null $context       [optional] <p>
2318
   *                                     A valid context resource created with
2319
   *                                     stream_context_create. If you don't need to use a
2320
   *                                     custom context, you can skip this parameter by &null;.
2321
   *                                     </p>
2322
   * @param int|null      $offset        [optional] <p>
2323 7
   *                                     The offset where the reading starts.
2324
   *                                     </p>
2325 7
   * @param int|null      $maxlen        [optional] <p>
2326 7
   *                                     Maximum length of data read. The default is to read until end
2327 2
   *                                     of file is reached.
2328
   *                                     </p>
2329 1
   * @param int           $timeout
2330 2
   *
2331 2
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2332 7
   *                                     non default utf-8 chars
2333 1
   *
2334 1
   * @return string The function returns the read data or false on failure.
2335 1
   */
2336 1
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2337 7
  {
2338 7
    // init
2339
    $timeout = (int)$timeout;
2340
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2341
2342 7
    if ($timeout && $context === null) {
2343 7
      $context = stream_context_create(
2344 1
          array(
2345 1
              'http' =>
2346 7
                  array(
2347
                      'timeout' => $timeout,
2348 7
                  ),
2349 5
          )
2350 5
      );
2351 4
    }
2352
2353
    if (is_int($maxlen)) {
2354
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2355 7
    } else {
2356
      $data = file_get_contents($filename, $flags, $context, $offset);
2357
    }
2358
2359
    // return false on error
2360 7
    if ($data === false) {
2361 7
      return false;
2362 7
    }
2363
2364 7
    if ($convertToUtf8 === true) {
2365
      self::checkForSupport();
2366
2367
      $data = self::encode('UTF-8', $data, false);
2368
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2369
    }
2370
2371
    // clean utf-8 string
2372
    return $data;
2373
  }
2374
2375
  /**
2376
   * Checks if a file starts with BOM (Byte Order Mark) character.
2377
   *
2378
   * @param    string $file_path Path to a valid file.
2379
   *
2380
   * @return   bool True if the file has BOM at the start, False otherwise.
2381
   */
2382
  public static function file_has_bom($file_path)
2383
  {
2384
    return self::string_has_bom(file_get_contents($file_path));
2385
  }
2386
2387
  /**
2388
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2389
   *
2390
   * @param mixed  $var
2391
   * @param int    $normalization_form
2392
   * @param string $leading_combining
2393
   *
2394
   * @return mixed
2395
   */
2396
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
2397
  {
2398
    switch (gettype($var)) {
2399 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2400
        foreach ($var as $k => $v) {
2401
          /** @noinspection AlterInForeachInspection */
2402
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2403
        }
2404
        break;
2405 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2406
        foreach ($var as $k => $v) {
2407
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
2408
        }
2409
        break;
2410
      case 'string':
2411
        if (false !== strpos($var, "\r")) {
2412
          // Workaround https://bugs.php.net/65732
2413
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2414
        }
2415
        if (preg_match('/[\x80-\xFF]/', $var)) {
2416
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2417 1
            $n = '-';
2418
          } else {
2419 1
            $n = \Normalizer::normalize($var, $normalization_form);
2420 1
2421 1
            if (isset($n[0])) {
2422 1
              $var = $n;
2423
            } else {
2424
              $var = self::encode('UTF-8', $var);
2425 1
            }
2426
2427
          }
2428
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2429
            // Prevent leading combining chars
2430
            // for NFC-safe concatenations.
2431
            $var = $leading_combining . $var;
2432
          }
2433
        }
2434
        break;
2435
    }
2436
2437 1
    return $var;
2438
  }
2439 1
2440 1
  /**
2441 1
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2442 1
   *
2443
   * @param int    $type
2444
   * @param string $var
2445 1
   * @param int    $filter
2446
   * @param mixed  $option
2447
   *
2448
   * @return mixed
2449
   */
2450 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2451
  {
2452
    if (4 > func_num_args()) {
2453
      $var = filter_input($type, $var, $filter);
2454
    } else {
2455
      $var = filter_input($type, $var, $filter, $option);
2456
    }
2457 1
2458
    return self::filter($var);
2459 1
  }
2460
2461
  /**
2462
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2463
   *
2464
   * @param int   $type
2465
   * @param mixed $definition
2466
   * @param bool  $add_empty
2467
   *
2468
   * @return mixed
2469 8
   */
2470 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2471 8
  {
2472 8
    if (2 > func_num_args()) {
2473
      $a = filter_input_array($type);
2474 8
    } else {
2475
      $a = filter_input_array($type, $definition, $add_empty);
2476 8
    }
2477 2
2478
    return self::filter($a);
2479
  }
2480 8
2481 1
  /**
2482 1
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2483 1
   *
2484
   * @param mixed $var
2485 8
   * @param int   $filter
2486
   * @param mixed $option
2487
   *
2488
   * @return mixed
2489
   */
2490 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2491
  {
2492
    if (3 > func_num_args()) {
2493
      $var = filter_var($var, $filter);
2494
    } else {
2495 1
      $var = filter_var($var, $filter, $option);
2496
    }
2497 1
2498
    return self::filter($var);
2499
  }
2500
2501
  /**
2502
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2503
   *
2504
   * @param array $data
2505
   * @param mixed $definition
2506
   * @param bool  $add_empty
2507 1
   *
2508 1
   * @return mixed
2509 1
   */
2510 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2511 1
  {
2512
    if (2 > func_num_args()) {
2513 1
      $a = filter_var_array($data);
2514
    } else {
2515
      $a = filter_var_array($data, $definition, $add_empty);
2516
    }
2517
2518
    return self::filter($a);
2519
  }
2520
2521
  /**
2522
   * Check if the number of unicode characters are not more than the specified integer.
2523 1
   *
2524
   * @param    string $str      The original string to be checked.
2525 1
   * @param    int    $box_size The size in number of chars to be checked against string.
2526
   *
2527 1
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2528 1
   */
2529
  public static function fits_inside($str, $box_size)
2530
  {
2531 1
    return (self::strlen($str) <= $box_size);
2532
  }
2533 1
2534 1
  /**
2535 1
   * Try to fix simple broken UTF-8 strings.
2536 1
   *
2537 1
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2538 1
   *
2539 1
   * @param string $str
2540 1
   *
2541 1
   * @return string
2542 1
   */
2543 1
  public static function fix_simple_utf8($str)
2544
  {
2545
    static $brokenUtf8ToUtf8Keys = null;
2546
    static $brokenUtf8ToUtf8Values = null;
2547
2548
    $str = (string)$str;
2549
2550
    if (!isset($str[0])) {
2551
      return '';
2552
    }
2553
2554
    if ($brokenUtf8ToUtf8Keys === null) {
2555
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2556
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2557
    }
2558
2559
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2560
  }
2561
2562
  /**
2563 1
   * Fix a double (or multiple) encoded UTF8 string.
2564 1
   *
2565
   * @param string|string[] $str You can use a string or an array of strings.
2566
   *
2567
   * @return mixed
2568
   */
2569
  public static function fix_utf8($str)
2570
  {
2571
    if (is_array($str)) {
2572
2573
      foreach ($str as $k => $v) {
2574
        /** @noinspection AlterInForeachInspection */
2575
        /** @noinspection OffsetOperationsInspection */
2576
        $str[$k] = self::fix_utf8($v);
2577
      }
2578
2579
      return $str;
2580
    }
2581
2582
    $last = '';
2583
    while ($last !== $str) {
2584
      $last = $str;
2585
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2585 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2586
    }
2587
2588
    return $str;
2589
  }
2590
2591
  /**
2592
   * Get character of a specific character.
2593
   *
2594
   * @param   string $char Character.
2595
   *
2596
   * @return  string 'RTL' or 'LTR'
2597
   */
2598
  public static function getCharDirection($char)
2599
  {
2600
    // init
2601
    self::checkForSupport();
2602
2603
    if (self::$support['intlChar'] === true) {
2604
      $tmpReturn = \IntlChar::charDirection($char);
2605
2606
      // from "IntlChar"-Class
2607
      $charDirection = array(
2608
          'RTL' => array(1, 13, 14, 15, 21),
2609
          'LTR' => array(0, 11, 12, 20),
2610
      );
2611
2612
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2613
        return 'LTR';
2614
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2615
        return 'RTL';
2616
      }
2617
    }
2618
2619
    $c = static::chr_to_decimal($char);
2620
2621
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2622
      return 'LTR';
2623 2
    }
2624
2625 2
    if (0x85e >= $c) {
2626 2
2627 2
      if (0x5be === $c ||
2628
          0x5c0 === $c ||
2629
          0x5c3 === $c ||
2630
          0x5c6 === $c ||
2631
          (0x5d0 <= $c && 0x5ea >= $c) ||
2632
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2633
          0x608 === $c ||
2634
          0x60b === $c ||
2635
          0x60d === $c ||
2636
          0x61b === $c ||
2637
          (0x61e <= $c && 0x64a >= $c) ||
2638
          (0x66d <= $c && 0x66f >= $c) ||
2639
          (0x671 <= $c && 0x6d5 >= $c) ||
2640 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2641
          (0x6ee <= $c && 0x6ef >= $c) ||
2642 1
          (0x6fa <= $c && 0x70d >= $c) ||
2643 1
          0x710 === $c ||
2644
          (0x712 <= $c && 0x72f >= $c) ||
2645 1
          (0x74d <= $c && 0x7a5 >= $c) ||
2646 1
          0x7b1 === $c ||
2647
          (0x7c0 <= $c && 0x7ea >= $c) ||
2648
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2649
          0x7fa === $c ||
2650 1
          (0x800 <= $c && 0x815 >= $c) ||
2651
          0x81a === $c ||
2652 1
          0x824 === $c ||
2653 1
          0x828 === $c ||
2654 1
          (0x830 <= $c && 0x83e >= $c) ||
2655
          (0x840 <= $c && 0x858 >= $c) ||
2656 1
          0x85e === $c
2657 1
      ) {
2658 1
        return 'RTL';
2659 1
      }
2660 1
2661
    } elseif (0x200f === $c) {
2662 1
2663
      return 'RTL';
2664 1
2665 1
    } elseif (0xfb1d <= $c) {
2666
2667
      if (0xfb1d === $c ||
2668
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2669 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2670 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2671
          0xfb3e === $c ||
2672 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2673
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2674 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2675 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2676 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2677
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2678 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2679
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2680
          (0xfe76 <= $c && 0xfefc >= $c) ||
2681
          (0x10800 <= $c && 0x10805 >= $c) ||
2682
          0x10808 === $c ||
2683
          (0x1080a <= $c && 0x10835 >= $c) ||
2684
          (0x10837 <= $c && 0x10838 >= $c) ||
2685
          0x1083c === $c ||
2686
          (0x1083f <= $c && 0x10855 >= $c) ||
2687
          (0x10857 <= $c && 0x1085f >= $c) ||
2688
          (0x10900 <= $c && 0x1091b >= $c) ||
2689
          (0x10920 <= $c && 0x10939 >= $c) ||
2690
          0x1093f === $c ||
2691
          0x10a00 === $c ||
2692
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2693
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2694
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2695
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2696
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2697
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2698
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2699
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2700
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2701
          (0x10b78 <= $c && 0x10b7f >= $c)
2702
      ) {
2703
        return 'RTL';
2704
      }
2705
    }
2706
2707
    return 'LTR';
2708 1
  }
2709
2710 1
  /**
2711 1
   * get data from "/data/*.ser"
2712
   *
2713 1
   * @param string $file
2714 1
   *
2715 1
   * @return bool|string|array|int false on error
2716 1
   */
2717 1
  protected static function getData($file)
2718 1
  {
2719
    $file = __DIR__ . '/data/' . $file . '.php';
2720
    if (file_exists($file)) {
2721
      /** @noinspection PhpIncludeInspection */
2722
      return require $file;
2723
    } else {
2724
      return false;
2725
    }
2726
  }
2727
2728
  /**
2729
   * Converts hexadecimal U+xxxx code point representation to integer.
2730
   *
2731
   * INFO: opposite to UTF8::int_to_hex()
2732
   *
2733
   * @param    string $str The hexadecimal code point representation.
2734
   *
2735
   * @return   int|false The code point, or false on failure.
2736
   */
2737
  public static function hex_to_int($str)
2738
  {
2739
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2740
      return intval($match[1], 16);
2741
    }
2742
2743
    return false;
2744
  }
2745
2746
  /**
2747
   * alias for "UTF8::html_entity_decode()"
2748
   *
2749
   * @see UTF8::html_entity_decode()
2750
   *
2751
   * @param string $str
2752
   * @param int    $flags
2753
   * @param string $encoding
2754
   *
2755
   * @return string
2756
   */
2757
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2758
  {
2759
    return self::html_entity_decode($str, $flags, $encoding);
2760
  }
2761
2762
  /**
2763
   * Converts a UTF-8 string to a series of HTML numbered entities.
2764
   *
2765
   * INFO: opposite to UTF8::html_decode()
2766
   *
2767
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2768
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2769
   * @param  string $encoding
2770
   *
2771
   * @return string HTML numbered entities.
2772
   */
2773
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2774
  {
2775
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2776
    if (function_exists('mb_encode_numericentity')) {
2777
2778
      $startCode = 0x00;
2779
      if ($keepAsciiChars === true) {
2780
        $startCode = 0x80;
2781
      }
2782
2783
      $encoding = self::normalizeEncoding($encoding);
2784
2785
      return mb_encode_numericentity(
2786
          $str,
2787
          array($startCode, 0xffff, 0, 0xffff,),
2788
          $encoding
2789
      );
2790 15
    }
2791
2792 15
    return implode(
2793
        array_map(
2794 15
            function ($data) use ($keepAsciiChars) {
2795 3
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
2796
            },
2797
            self::split($str)
2798 15
        )
2799 4
    );
2800
  }
2801
2802 15
  /**
2803 3
   * UTF-8 version of html_entity_decode()
2804 3
   *
2805 3
   * The reason we are not using html_entity_decode() by itself is because
2806
   * while it is not technically correct to leave out the semicolon
2807
   * at the end of an entity most browsers will still interpret the entity
2808 3
   * correctly. html_entity_decode() does not convert entities without
2809
   * semicolons, so we are left with our own little solution here. Bummer.
2810
   *
2811 15
   * Convert all HTML entities to their applicable characters
2812
   *
2813 15
   * INFO: opposite to UTF8::html_encode()
2814
   *
2815
   * @link http://php.net/manual/en/function.html-entity-decode.php
2816 15
   *
2817 15
   * @param string $str      <p>
2818 15
   *                         The input string.
2819
   *                         </p>
2820 15
   * @param int    $flags    [optional] <p>
2821
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2822 15
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2823
   *                         <table>
2824 15
   *                         Available <i>flags</i> constants
2825
   *                         <tr valign="top">
2826
   *                         <td>Constant Name</td>
2827
   *                         <td>Description</td>
2828
   *                         </tr>
2829
   *                         <tr valign="top">
2830
   *                         <td><b>ENT_COMPAT</b></td>
2831
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2832
   *                         </tr>
2833
   *                         <tr valign="top">
2834 12
   *                         <td><b>ENT_QUOTES</b></td>
2835
   *                         <td>Will convert both double and single quotes.</td>
2836 12
   *                         </tr>
2837
   *                         <tr valign="top">
2838 12
   *                         <td><b>ENT_NOQUOTES</b></td>
2839
   *                         <td>Will leave both double and single quotes unconverted.</td>
2840 12
   *                         </tr>
2841 5
   *                         <tr valign="top">
2842
   *                         <td><b>ENT_HTML401</b></td>
2843
   *                         <td>
2844 11
   *                         Handle code as HTML 4.01.
2845
   *                         </td>
2846
   *                         </tr>
2847
   *                         <tr valign="top">
2848
   *                         <td><b>ENT_XML1</b></td>
2849
   *                         <td>
2850
   *                         Handle code as XML 1.
2851
   *                         </td>
2852
   *                         </tr>
2853
   *                         <tr valign="top">
2854
   *                         <td><b>ENT_XHTML</b></td>
2855
   *                         <td>
2856
   *                         Handle code as XHTML.
2857
   *                         </td>
2858
   *                         </tr>
2859
   *                         <tr valign="top">
2860
   *                         <td><b>ENT_HTML5</b></td>
2861
   *                         <td>
2862
   *                         Handle code as HTML 5.
2863
   *                         </td>
2864
   *                         </tr>
2865
   *                         </table>
2866
   *                         </p>
2867
   * @param string $encoding [optional] <p>
2868
   *                         Encoding to use.
2869
   *                         </p>
2870
   *
2871
   * @return string the decoded string.
2872
   */
2873
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2874
  {
2875
    $str = (string)$str;
2876
2877
    if (!isset($str[0])) {
2878
      return '';
2879
    }
2880
2881
    if (strpos($str, '&') === false) {
2882
      return $str;
2883
    }
2884
2885
    self::checkForSupport();
2886
2887
    $encoding = self::normalizeEncoding($encoding);
2888
2889
    if ($flags === null) {
2890
      if (Bootup::is_php('5.4') === true) {
2891
        $flags = ENT_COMPAT | ENT_HTML5;
2892
      } else {
2893
        $flags = ENT_COMPAT;
2894
      }
2895
    }
2896
2897
    do {
2898
      $str_compare = $str;
2899
2900
      $str = preg_replace_callback("/&#\d{2,5};/", function($matches) {
2901
        $returnTmp =  \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2902
2903
        if ($returnTmp !== '"' && $returnTmp !== "'") {
2904
          return $returnTmp;
2905
        } else {
2906
          return $matches[0];
2907
        }
2908
      }, $str);
2909
2910
      // decode numeric & UTF16 two byte entities
2911
      $str = html_entity_decode(
2912
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2913
          $flags,
2914
          $encoding
2915
      );
2916
2917
    } while ($str_compare !== $str);
2918
2919
    return $str;
2920
  }
2921
2922
  /**
2923
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2924
   *
2925
   * @link http://php.net/manual/en/function.htmlentities.php
2926
   *
2927
   * @param string $str           <p>
2928
   *                              The input string.
2929
   *                              </p>
2930
   * @param int    $flags         [optional] <p>
2931
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2932
   *                              invalid code unit sequences and the used document type. The default is
2933
   *                              ENT_COMPAT | ENT_HTML401.
2934
   *                              <table>
2935
   *                              Available <i>flags</i> constants
2936
   *                              <tr valign="top">
2937
   *                              <td>Constant Name</td>
2938
   *                              <td>Description</td>
2939
   *                              </tr>
2940
   *                              <tr valign="top">
2941
   *                              <td><b>ENT_COMPAT</b></td>
2942
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2943
   *                              </tr>
2944
   *                              <tr valign="top">
2945
   *                              <td><b>ENT_QUOTES</b></td>
2946
   *                              <td>Will convert both double and single quotes.</td>
2947
   *                              </tr>
2948
   *                              <tr valign="top">
2949
   *                              <td><b>ENT_NOQUOTES</b></td>
2950 2
   *                              <td>Will leave both double and single quotes unconverted.</td>
2951
   *                              </tr>
2952 2
   *                              <tr valign="top">
2953
   *                              <td><b>ENT_IGNORE</b></td>
2954
   *                              <td>
2955
   *                              Silently discard invalid code unit sequences instead of returning
2956
   *                              an empty string. Using this flag is discouraged as it
2957
   *                              may have security implications.
2958
   *                              </td>
2959
   *                              </tr>
2960
   *                              <tr valign="top">
2961
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2962
   *                              <td>
2963
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2964
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2965
   *                              </td>
2966
   *                              </tr>
2967
   *                              <tr valign="top">
2968
   *                              <td><b>ENT_DISALLOWED</b></td>
2969
   *                              <td>
2970
   *                              Replace invalid code points for the given document type with a
2971
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2972
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2973
   *                              instance, to ensure the well-formedness of XML documents with
2974
   *                              embedded external content.
2975
   *                              </td>
2976
   *                              </tr>
2977
   *                              <tr valign="top">
2978
   *                              <td><b>ENT_HTML401</b></td>
2979
   *                              <td>
2980
   *                              Handle code as HTML 4.01.
2981
   *                              </td>
2982
   *                              </tr>
2983
   *                              <tr valign="top">
2984
   *                              <td><b>ENT_XML1</b></td>
2985
   *                              <td>
2986
   *                              Handle code as XML 1.
2987
   *                              </td>
2988
   *                              </tr>
2989
   *                              <tr valign="top">
2990
   *                              <td><b>ENT_XHTML</b></td>
2991
   *                              <td>
2992
   *                              Handle code as XHTML.
2993
   *                              </td>
2994
   *                              </tr>
2995
   *                              <tr valign="top">
2996
   *                              <td><b>ENT_HTML5</b></td>
2997
   *                              <td>
2998
   *                              Handle code as HTML 5.
2999
   *                              </td>
3000
   *                              </tr>
3001
   *                              </table>
3002
   *                              </p>
3003
   * @param string $encoding      [optional] <p>
3004
   *                              Like <b>htmlspecialchars</b>,
3005
   *                              <b>htmlentities</b> takes an optional third argument
3006
   *                              <i>encoding</i> which defines encoding used in
3007
   *                              conversion.
3008
   *                              Although this argument is technically optional, you are highly
3009
   *                              encouraged to specify the correct value for your code.
3010
   *                              </p>
3011
   * @param bool   $double_encode [optional] <p>
3012
   *                              When <i>double_encode</i> is turned off PHP will not
3013
   *                              encode existing html entities. The default is to convert everything.
3014
   *                              </p>
3015
   *
3016
   *
3017
   * @return string the encoded string.
3018
   * </p>
3019
   * <p>
3020
   * If the input <i>string</i> contains an invalid code unit
3021
   * sequence within the given <i>encoding</i> an empty string
3022
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3023
   * <b>ENT_SUBSTITUTE</b> flags are set.
3024
   */
3025
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3026
  {
3027
    $encoding = self::normalizeEncoding($encoding);
3028
3029
    $str = htmlentities($str, $flags, $encoding, $double_encode);
3030
3031
    if ($encoding !== 'UTF-8') {
3032
      return $str;
3033
    }
3034
3035
    $byteLengths = self::chr_size_list($str);
3036
    $search = array();
3037
    $replacements = array();
3038
    foreach ($byteLengths as $counter => $byteLength) {
3039
      if ($byteLength >= 3) {
3040
        $char = self::access($str, $counter);
3041
3042
        if (!isset($replacements[$char])) {
3043
          $search[$char] = $char;
3044
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 3040 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3045
        }
3046
      }
3047
    }
3048
3049
    return str_replace($search, $replacements, $str);
3050
  }
3051
3052
  /**
3053
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3054
   *
3055
   * INFO: Take a look at "UTF8::htmlentities()"
3056
   *
3057
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3058
   *
3059
   * @param string $str           <p>
3060
   *                              The string being converted.
3061
   *                              </p>
3062 1
   * @param int    $flags         [optional] <p>
3063
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3064 1
   *                              invalid code unit sequences and the used document type. The default is
3065
   *                              ENT_COMPAT | ENT_HTML401.
3066
   *                              <table>
3067
   *                              Available <i>flags</i> constants
3068
   *                              <tr valign="top">
3069
   *                              <td>Constant Name</td>
3070
   *                              <td>Description</td>
3071
   *                              </tr>
3072 1
   *                              <tr valign="top">
3073
   *                              <td><b>ENT_COMPAT</b></td>
3074 1
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3075
   *                              </tr>
3076
   *                              <tr valign="top">
3077
   *                              <td><b>ENT_QUOTES</b></td>
3078
   *                              <td>Will convert both double and single quotes.</td>
3079
   *                              </tr>
3080
   *                              <tr valign="top">
3081
   *                              <td><b>ENT_NOQUOTES</b></td>
3082
   *                              <td>Will leave both double and single quotes unconverted.</td>
3083
   *                              </tr>
3084
   *                              <tr valign="top">
3085
   *                              <td><b>ENT_IGNORE</b></td>
3086
   *                              <td>
3087
   *                              Silently discard invalid code unit sequences instead of returning
3088
   *                              an empty string. Using this flag is discouraged as it
3089
   *                              may have security implications.
3090
   *                              </td>
3091
   *                              </tr>
3092
   *                              <tr valign="top">
3093
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3094
   *                              <td>
3095
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3096
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3097
   *                              </td>
3098
   *                              </tr>
3099
   *                              <tr valign="top">
3100
   *                              <td><b>ENT_DISALLOWED</b></td>
3101
   *                              <td>
3102
   *                              Replace invalid code points for the given document type with a
3103 1
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3104
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3105 1
   *                              instance, to ensure the well-formedness of XML documents with
3106
   *                              embedded external content.
3107
   *                              </td>
3108
   *                              </tr>
3109
   *                              <tr valign="top">
3110
   *                              <td><b>ENT_HTML401</b></td>
3111
   *                              <td>
3112
   *                              Handle code as HTML 4.01.
3113
   *                              </td>
3114
   *                              </tr>
3115 1
   *                              <tr valign="top">
3116
   *                              <td><b>ENT_XML1</b></td>
3117 1
   *                              <td>
3118
   *                              Handle code as XML 1.
3119
   *                              </td>
3120
   *                              </tr>
3121
   *                              <tr valign="top">
3122
   *                              <td><b>ENT_XHTML</b></td>
3123
   *                              <td>
3124
   *                              Handle code as XHTML.
3125
   *                              </td>
3126
   *                              </tr>
3127 1
   *                              <tr valign="top">
3128
   *                              <td><b>ENT_HTML5</b></td>
3129 1
   *                              <td>
3130
   *                              Handle code as HTML 5.
3131
   *                              </td>
3132
   *                              </tr>
3133
   *                              </table>
3134
   *                              </p>
3135
   * @param string $encoding      [optional] <p>
3136
   *                              Defines encoding used in conversion.
3137
   *                              </p>
3138
   *                              <p>
3139
   *                              For the purposes of this function, the encodings
3140
   *                              ISO-8859-1, ISO-8859-15,
3141
   *                              UTF-8, cp866,
3142
   *                              cp1251, cp1252, and
3143
   *                              KOI8-R are effectively equivalent, provided the
3144
   *                              <i>string</i> itself is valid for the encoding, as
3145
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3146
   *                              the same positions in all of these encodings.
3147
   *                              </p>
3148
   * @param bool   $double_encode [optional] <p>
3149
   *                              When <i>double_encode</i> is turned off PHP will not
3150
   *                              encode existing html entities, the default is to convert everything.
3151
   *                              </p>
3152
   *
3153
   * @return string The converted string.
3154
   * </p>
3155
   * <p>
3156
   * If the input <i>string</i> contains an invalid code unit
3157
   * sequence within the given <i>encoding</i> an empty string
3158
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3159
   * <b>ENT_SUBSTITUTE</b> flags are set.
3160
   */
3161
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3162
  {
3163
    $encoding = self::normalizeEncoding($encoding);
3164
3165
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3166
  }
3167
3168
  /**
3169
   * checks whether iconv is available on the server
3170
   *
3171
   * @return   bool True if available, False otherwise
3172
   */
3173
  public static function iconv_loaded()
3174
  {
3175
    return extension_loaded('iconv') ? true : false;
3176
  }
3177
3178
  /**
3179 16
   * Converts Integer to hexadecimal U+xxxx code point representation.
3180
   *
3181 16
   * INFO: opposite to UTF8::hex_to_int()
3182
   *
3183
   * @param    int    $int The integer to be converted to hexadecimal code point.
3184
   * @param    string $pfix
3185
   *
3186
   * @return   string The code point, or empty string on failure.
3187
   */
3188
  public static function int_to_hex($int, $pfix = 'U+')
3189
  {
3190
    if (ctype_digit((string)$int)) {
3191
      $hex = dechex((int)$int);
3192 4
3193
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3194 4
3195
      return $pfix . $hex;
3196
    }
3197
3198
    return '';
3199
  }
3200
3201
  /**
3202
   * checks whether intl-char is available on the server
3203
   *
3204 1
   * @return   bool True if available, False otherwise
3205
   */
3206 1
  public static function intlChar_loaded()
3207
  {
3208 1
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3209 1
  }
3210
3211
  /**
3212 1
   * checks whether intl is available on the server
3213 1
   *
3214
   * @return   bool True if available, False otherwise
3215 1
   */
3216
  public static function intl_loaded()
3217
  {
3218
    return extension_loaded('intl') ? true : false;
3219
  }
3220
3221
  /**
3222
   * alias for "UTF8::is_ascii()"
3223
   *
3224
   * @see UTF8::is_ascii()
3225
   *
3226 4
   * @param string $str
3227
   *
3228
   * @return boolean
3229 4
   */
3230
  public static function isAscii($str)
3231
  {
3232 4
    return self::is_ascii($str);
3233
  }
3234 4
3235 4
  /**
3236 4
   * alias for "UTF8::is_base64()"
3237 4
   *
3238 3
   * @see UTF8::is_base64()
3239
   *
3240 4
   * @param string $str
3241
   *
3242
   * @return bool
3243
   */
3244
  public static function isBase64($str)
3245
  {
3246
    return self::is_base64($str);
3247
  }
3248
3249
  /**
3250
   * alias for "UTF8::is_binary()"
3251
   *
3252
   * @see UTF8::is_binary()
3253
   *
3254
   * @param string $str
3255
   *
3256
   * @return bool
3257
   */
3258
  public static function isBinary($str)
3259
  {
3260
    return self::is_binary($str);
3261
  }
3262
3263
  /**
3264
   * alias for "UTF8::is_bom()"
3265
   *
3266
   * @see UTF8::is_bom()
3267
   *
3268
   * @param string $utf8_chr
3269
   *
3270
   * @return boolean
3271
   */
3272
  public static function isBom($utf8_chr)
3273 2
  {
3274
    return self::is_bom($utf8_chr);
3275 2
  }
3276
3277
  /**
3278
   * alias for "UTF8::is_html()"
3279
   *
3280
   * @see UTF8::is_html()
3281
   *
3282
   * @param string $str
3283
   *
3284
   * @return boolean
3285 2
   */
3286
  public static function isHtml($str)
3287 2
  {
3288 2
    return self::is_html($str);
3289
  }
3290 2
3291 2
  /**
3292 2
   * alias for "UTF8::is_json()"
3293 2
   *
3294 2
   * @see UTF8::is_json()
3295 2
   *
3296 2
   * @param string $str
3297 2
   *
3298 2
   * @return bool
3299 1
   */
3300 1
  public static function isJson($str)
3301 2
  {
3302 2
    return self::is_json($str);
3303 2
  }
3304
3305 2
  /**
3306 2
   * alias for "UTF8::is_utf16()"
3307 2
   *
3308 2
   * @see UTF8::is_utf16()
3309 2
   *
3310 2
   * @param string $str
3311 2
   *
3312 2
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3313 2
   */
3314 1
  public static function isUtf16($str)
3315 1
  {
3316 2
    return self::is_utf16($str);
3317 2
  }
3318 2
3319
  /**
3320 2
   * alias for "UTF8::is_utf32()"
3321 1
   *
3322 1
   * @see UTF8::is_utf32()
3323
   *
3324 1
   * @param string $str
3325
   *
3326
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3327
   */
3328 2
  public static function isUtf32($str)
3329
  {
3330 2
    return self::is_utf32($str);
3331
  }
3332
3333
  /**
3334
   * alias for "UTF8::is_utf8()"
3335
   *
3336
   * @see UTF8::is_utf8()
3337
   *
3338
   * @param string $str
3339
   * @param  bool  $strict
3340 2
   *
3341
   * @return bool
3342 2
   */
3343 2
  public static function isUtf8($str, $strict = false)
3344
  {
3345 2
    return self::is_utf8($str, $strict);
3346 2
  }
3347 2
3348 2
  /**
3349 2
   * Checks if a string is 7 bit ASCII.
3350 2
   *
3351 2
   * @param    string $str The string to check.
3352 2
   *
3353 2
   * @return   bool <strong>true</strong> if it is ASCII<br />
3354
   *                <strong>false</strong> otherwise
3355
   */
3356 2
  public static function is_ascii($str)
3357 2
  {
3358 2
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3359
  }
3360 2
3361 2
  /**
3362 2
   * Returns true if the string is base64 encoded, false otherwise.
3363 1
   *
3364 1
   * @param string $str
3365 1
   *
3366 1
   * @return bool Whether or not $str is base64 encoded
3367 1
   */
3368 1
  public static function is_base64($str)
3369
  {
3370
    $str = (string)$str;
3371 1
3372 1
    if (!isset($str[0])) {
3373 1
      return false;
3374
    }
3375 2
3376
    if (base64_encode(base64_decode($str, true)) === $str) {
3377
      return true;
3378
    } else {
3379
      return false;
3380
    }
3381
  }
3382
3383 2
  /**
3384
   * Check if the input is binary... (is look like a hack).
3385 2
   *
3386
   * @param mixed $input
3387
   *
3388
   * @return bool
3389
   */
3390
  public static function is_binary($input)
3391
  {
3392
3393
    $testLength = strlen($input);
3394
3395
    if (
3396
        preg_match('~^[01]+$~', $input)
3397 34
        ||
3398
        substr_count($input, "\x00") > 0
3399 34
        ||
3400
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3401 34
    ) {
3402 3
      return true;
3403
    } else {
3404
      return false;
3405 32
    }
3406
  }
3407
3408
  /**
3409
   * Check if the file is binary.
3410
   *
3411
   * @param string $file
3412
   *
3413
   * @return boolean
3414
   */
3415 32
  public static function is_binary_file($file)
3416
  {
3417 32
    try {
3418 32
      $fp = fopen($file, 'r');
3419 32
      $block = fread($fp, 512);
3420
      fclose($fp);
3421
    } catch (\Exception $e) {
3422 32
      $block = '';
3423 32
    }
3424 32
3425
    return self::is_binary($block);
3426
  }
3427 32
3428
  /**
3429 30
   * Checks if the given string is equal to any "Byte Order Mark".
3430 32
   *
3431
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3432 28
   *
3433 28
   * @param    string $str The input string.
3434 28
   *
3435 28
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3436 30
   */
3437
  public static function is_bom($str)
3438 13
  {
3439 13
    foreach (self::$bom as $bomString => $bomByteLength) {
3440 13
      if ($str === $bomString) {
3441 13
        return true;
3442 23
      }
3443
    }
3444 6
3445 6
    return false;
3446 6
  }
3447 6
3448 12
  /**
3449
   * Check if the string contains any html-tags <lall>.
3450
   *
3451
   * @param string $str
3452
   *
3453
   * @return boolean
3454
   */
3455
  public static function is_html($str)
3456
  {
3457 3
    $str = (string)$str;
3458 3
3459 3
    if (!isset($str[0])) {
3460 3
      return false;
3461 7
    }
3462
3463 3
    // init
3464 3
    $matches = array();
3465 3
3466 3
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3467 3
3468
    if (count($matches) == 0) {
3469
      return false;
3470
    } else {
3471 3
      return true;
3472
    }
3473 32
  }
3474
3475
  /**
3476 30
   * Try to check if "$str" is an json-string.
3477
   *
3478 28
   * @param string $str
3479 28
   *
3480 28
   * @return bool
3481 28
   */
3482
  public static function is_json($str)
3483
  {
3484
    $str = (string)$str;
3485
3486 28
    if (!isset($str[0])) {
3487
      return false;
3488
    }
3489
3490
    if (
3491
        is_object(self::json_decode($str))
3492 28
        &&
3493 28
        json_last_error() === JSON_ERROR_NONE
3494 28
    ) {
3495 28
      return true;
3496
    } else {
3497 28
      return false;
3498
    }
3499 28
  }
3500 28
3501 5
  /**
3502
   * Check if the string is UTF-16.
3503
   *
3504 28
   * @param string $str
3505 28
   *
3506 28
   * @return int|false false if is't not UTF-16, 1 for UTF-16LE, 2 for UTF-16BE.
3507 28
   */
3508 28 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3509
  {
3510
    $str = self::remove_bom($str);
3511
3512
    if (self::is_binary($str)) {
3513 13
      self::checkForSupport();
3514
3515
      $maybeUTF16LE = 0;
3516 32
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3517
      if ($test) {
3518 14
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3519
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3520
        if ($test3 === $test) {
3521
          $strChars = self::count_chars($str, true);
3522
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3523
            if (in_array($test3char, $strChars, true) === true) {
3524
              $maybeUTF16LE++;
3525
            }
3526
          }
3527
        }
3528
      }
3529
3530
      $maybeUTF16BE = 0;
3531
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3532
      if ($test) {
3533
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3534
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3535
        if ($test3 === $test) {
3536
          $strChars = self::count_chars($str, true);
3537
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3538
            if (in_array($test3char, $strChars, true) === true) {
3539
              $maybeUTF16BE++;
3540
            }
3541
          }
3542
        }
3543
      }
3544
3545
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3546
        if ($maybeUTF16LE > $maybeUTF16BE) {
3547
          return 1;
3548
        } else {
3549
          return 2;
3550
        }
3551
      }
3552
3553
    }
3554
3555
    return false;
3556
  }
3557
3558 2
  /**
3559
   * Check if the string is UTF-32.
3560 2
   *
3561
   * @param string $str
3562 2
   *
3563 2
   * @return int|false false if is't not UTF-16, 1 for UTF-32LE, 2 for UTF-32BE.
3564 2
   */
3565 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3566
  {
3567
    $str = self::remove_bom($str);
3568 2
3569
    if (self::is_binary($str)) {
3570
      self::checkForSupport();
3571
3572
      $maybeUTF32LE = 0;
3573
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3574
      if ($test) {
3575
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3576
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3577
        if ($test3 === $test) {
3578
          $strChars = self::count_chars($str, true);
3579
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3580
            if (in_array($test3char, $strChars, true) === true) {
3581
              $maybeUTF32LE++;
3582
            }
3583
          }
3584
        }
3585
      }
3586
3587
      $maybeUTF32BE = 0;
3588
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3589
      if ($test) {
3590
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3591
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3592
        if ($test3 === $test) {
3593
          $strChars = self::count_chars($str, true);
3594
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3595
            if (in_array($test3char, $strChars, true) === true) {
3596
              $maybeUTF32BE++;
3597
            }
3598
          }
3599
        }
3600
      }
3601
3602
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3603
        if ($maybeUTF32LE > $maybeUTF32BE) {
3604
          return 1;
3605
        } else {
3606
          return 2;
3607 1
        }
3608
      }
3609 1
3610
    }
3611 1
3612
    return false;
3613
  }
3614 1
3615
  /**
3616
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3617 1
   *
3618
   * @see    http://hsivonen.iki.fi/php-utf8/
3619
   *
3620
   * @param  string $str    The string to be checked.
3621
   * @param  bool   $strict Check also if the string is not UTF-16 or UTF-32.
3622
   *
3623
   * @return bool
3624
   */
3625
  public static function is_utf8($str, $strict = false)
3626
  {
3627 6
    $str = (string)$str;
3628
3629 6
    if (!isset($str[0])) {
3630
      return true;
3631
    }
3632
3633
    if ($strict === true) {
3634
      if (self::is_utf16($str) !== false) {
3635
        return false;
3636
      }
3637
3638
      if (self::is_utf32($str) !== false) {
3639
        return false;
3640
      }
3641
    }
3642 24
3643
    if (self::pcre_utf8_support() !== true) {
3644 24
3645
      // If even just the first character can be matched, when the /u
3646 24
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3647 2
      // invalid, nothing at all will match, even if the string contains
3648
      // some valid sequences
3649
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3650 23
3651
    } else {
3652 23
3653
      $mState = 0; // cached expected number of octets after the current octet
3654
      // until the beginning of the next UTF8 character sequence
3655
      $mUcs4 = 0; // cached Unicode character
3656
      $mBytes = 1; // cached expected number of octets in the current sequence
3657
      $len = strlen($str);
3658
3659
      /** @noinspection ForeachInvariantsInspection */
3660
      for ($i = 0; $i < $len; $i++) {
3661
        $in = ord($str[$i]);
3662 1
        if ($mState === 0) {
3663
          // When mState is zero we expect either a US-ASCII character or a
3664 1
          // multi-octet sequence.
3665
          if (0 === (0x80 & $in)) {
3666
            // US-ASCII, pass straight through.
3667
            $mBytes = 1;
3668 1 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3669
            // First octet of 2 octet sequence.
3670
            $mUcs4 = $in;
3671
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3672
            $mState = 1;
3673
            $mBytes = 2;
3674
          } elseif (0xE0 === (0xF0 & $in)) {
3675
            // First octet of 3 octet sequence.
3676
            $mUcs4 = $in;
3677
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3678
            $mState = 2;
3679 1
            $mBytes = 3;
3680 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3681 1
            // First octet of 4 octet sequence.
3682 1
            $mUcs4 = $in;
3683 1
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3684
            $mState = 3;
3685 1
            $mBytes = 4;
3686
          } elseif (0xF8 === (0xFC & $in)) {
3687
            /* First octet of 5 octet sequence.
3688
            *
3689
            * This is illegal because the encoded codepoint must be either
3690
            * (a) not the shortest form or
3691
            * (b) outside the Unicode range of 0-0x10FFFF.
3692
            * Rather than trying to resynchronize, we will carry on until the end
3693
            * of the sequence and let the later error handling code catch it.
3694 2
            */
3695
            $mUcs4 = $in;
3696 2
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3697
            $mState = 4;
3698 2
            $mBytes = 5;
3699 2 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3700 2
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3701
            $mUcs4 = $in;
3702 2
            $mUcs4 = ($mUcs4 & 1) << 30;
3703
            $mState = 5;
3704
            $mBytes = 6;
3705
          } else {
3706
            /* Current octet is neither in the US-ASCII range nor a legal first
3707
             * octet of a multi-octet sequence.
3708
             */
3709
            return false;
3710
          }
3711
        } else {
3712 1
          // When mState is non-zero, we expect a continuation of the multi-octet
3713
          // sequence
3714 1
          if (0x80 === (0xC0 & $in)) {
3715
            // Legal continuation.
3716
            $shift = ($mState - 1) * 6;
3717
            $tmp = $in;
3718 1
            $tmp = ($tmp & 0x0000003F) << $shift;
3719
            $mUcs4 |= $tmp;
3720
            /**
3721
             * End of the multi-octet sequence. mUcs4 now contains the final
3722
             * Unicode code point to be output
3723
             */
3724
            if (0 === --$mState) {
3725
              /*
3726
              * Check for illegal sequences and code points.
3727
              */
3728 13
              // From Unicode 3.1, non-shortest form is illegal
3729
              if (
3730 13
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3731
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3732 13
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3733
                  (4 < $mBytes) ||
3734
                  // From Unicode 3.2, surrogate characters are illegal.
3735 13
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3736 13
                  // Code points outside the Unicode range are illegal.
3737 13
                  ($mUcs4 > 0x10FFFF)
3738 13
              ) {
3739 13
                return false;
3740 13
              }
3741 13
              // initialize UTF8 cache
3742 13
              $mState = 0;
3743 13
              $mUcs4 = 0;
3744 13
              $mBytes = 1;
3745 13
            }
3746 13
          } else {
3747 13
            /**
3748 13
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3749
             * Incomplete multi-octet sequence.
3750 13
             */
3751 2
            return false;
3752
          }
3753
        }
3754 13
      }
3755
3756
      return true;
3757
    }
3758
  }
3759
3760
  /**
3761
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3762
   * Decodes a JSON string
3763
   *
3764 2
   * @link http://php.net/manual/en/function.json-decode.php
3765
   *
3766 2
   * @param string $json    <p>
3767 2
   *                        The <i>json</i> string being decoded.
3768
   *                        </p>
3769 2
   *                        <p>
3770 1
   *                        This function only works with UTF-8 encoded strings.
3771 1
   *                        </p>
3772 1
   *                        <p>PHP implements a superset of
3773
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3774 2
   *                        only supports these values when they are nested inside an array or an object.
3775
   *                        </p>
3776
   * @param bool   $assoc   [optional] <p>
3777
   *                        When <b>TRUE</b>, returned objects will be converted into
3778
   *                        associative arrays.
3779
   *                        </p>
3780
   * @param int    $depth   [optional] <p>
3781
   *                        User specified recursion depth.
3782
   *                        </p>
3783
   * @param int    $options [optional] <p>
3784
   *                        Bitmask of JSON decode options. Currently only
3785
   *                        <b>JSON_BIGINT_AS_STRING</b>
3786 8
   *                        is supported (default is to cast large integers as floats)
3787
   *                        </p>
3788 8
   *
3789 8
   * @return mixed the value encoded in <i>json</i> in appropriate
3790
   * PHP type. Values true, false and
3791 8
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3792
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3793 8
   * <i>json</i> cannot be decoded or if the encoded
3794
   * data is deeper than the recursion limit.
3795 2
   */
3796
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3797 2
  {
3798
    $json = self::filter($json);
3799 1
3800 1
    if (Bootup::is_php('5.4') === true) {
3801
      $json = json_decode($json, $assoc, $depth, $options);
3802 2
    } else {
3803 2
      $json = json_decode($json, $assoc, $depth);
3804
    }
3805 8
3806 8
    return $json;
3807 1
  }
3808 1
3809
  /**
3810 8
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3811 8
   * Returns the JSON representation of a value.
3812
   *
3813 8
   * @link http://php.net/manual/en/function.json-encode.php
3814
   *
3815
   * @param mixed $value   <p>
3816
   *                       The <i>value</i> being encoded. Can be any type except
3817
   *                       a resource.
3818
   *                       </p>
3819
   *                       <p>
3820
   *                       All string data must be UTF-8 encoded.
3821
   *                       </p>
3822
   *                       <p>PHP implements a superset of
3823
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3824
   *                       only supports these values when they are nested inside an array or an object.
3825
   *                       </p>
3826 1
   * @param int   $options [optional] <p>
3827
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3828 1
   *                       <b>JSON_HEX_TAG</b>,
3829 1
   *                       <b>JSON_HEX_AMP</b>,
3830
   *                       <b>JSON_HEX_APOS</b>,
3831
   *                       <b>JSON_NUMERIC_CHECK</b>,
3832
   *                       <b>JSON_PRETTY_PRINT</b>,
3833
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3834
   *                       <b>JSON_FORCE_OBJECT</b>,
3835
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3836
   *                       constants is described on
3837
   *                       the JSON constants page.
3838
   *                       </p>
3839
   * @param int   $depth   [optional] <p>
3840
   *                       Set the maximum depth. Must be greater than zero.
3841
   *                       </p>
3842 1
   *
3843
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3844 1
   */
3845
  public static function json_encode($value, $options = 0, $depth = 512)
3846
  {
3847
    $value = self::filter($value);
3848
3849
    if (Bootup::is_php('5.5')) {
3850
      $json = json_encode($value, $options, $depth);
3851
    } else {
3852
      $json = json_encode($value, $options);
3853
    }
3854
3855 15
    return $json;
3856
  }
3857 15
3858 2
  /**
3859
   * Makes string's first char lowercase.
3860
   *
3861 14
   * @param    string $str The input string
3862 14
   *
3863
   * @return   string The resulting string
3864 14
   */
3865 2
  public static function lcfirst($str)
3866
  {
3867
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3868 13
  }
3869 7
3870
  /**
3871
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3872 12
   *
3873 8
   * @param  string $str   The string to be trimmed
3874
   * @param  string $chars Optional characters to be stripped
3875
   *
3876 10
   * @return string The string with unwanted characters stripped from the left
3877
   */
3878 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3879
  {
3880
    $str = (string)$str;
3881
3882
    if (!isset($str[0])) {
3883
      return '';
3884
    }
3885
3886
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3887
    if ($chars === INF || !$chars) {
3888
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3889
    }
3890
3891
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3892
3893
    return preg_replace("/^{$chars}+/u", '', $str);
3894
  }
3895
3896
  /**
3897 1
   * Returns the UTF-8 character with the maximum code point in the given data.
3898
   *
3899
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3900 1
   *
3901
   * @return   string The character with the highest code point than others.
3902 1
   */
3903 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3904 1
  {
3905 1
    if (is_array($arg)) {
3906
      $arg = implode($arg);
3907
    }
3908
3909
    return self::chr(max(self::codepoints($arg)));
3910
  }
3911
3912 33
  /**
3913
   * Calculates and returns the maximum number of bytes taken by any
3914
   * UTF-8 encoded character in the given string.
3915 33
   *
3916
   * @param  string $str The original Unicode string.
3917
   *
3918
   * @return int Max byte lengths of the given chars.
3919
   */
3920
  public static function max_chr_width($str)
3921
  {
3922
    $bytes = self::chr_size_list($str);
3923
    if (count($bytes) > 0) {
3924
      return (int)max($bytes);
3925
    } else {
3926 1
      return 0;
3927
    }
3928 1
  }
3929 1
3930
  /**
3931
   * checks whether mbstring is available on the server
3932 1
   *
3933
   * @return   bool True if available, False otherwise
3934 1
   */
3935
  public static function mbstring_loaded()
3936
  {
3937 1
    $return = extension_loaded('mbstring');
3938
3939
    if ($return === true) {
3940 1
      \mb_internal_encoding('UTF-8');
3941
    }
3942
3943
    return $return;
3944 1
  }
3945
3946 1
  /**
3947
   * Returns the UTF-8 character with the minimum code point in the given data.
3948
   *
3949 1
   * @param  mixed $arg A UTF-8 encoded string or an array of such strings.
3950
   *
3951
   * @return string The character with the lowest code point than others.
3952 1
   */
3953 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3954
  {
3955
    if (is_array($arg)) {
3956 1
      $arg = implode($arg);
3957
    }
3958 1
3959 1
    return self::chr(min(self::codepoints($arg)));
3960 1
  }
3961 1
3962 1
  /**
3963
   * alias for "UTF8::normalize_encoding()"
3964
   *
3965
   * @see UTF8::normalize_encoding()
3966
   *
3967
   * @param string $encoding
3968
   *
3969
   * @return string
3970
   */
3971
  public static function normalizeEncoding($encoding)
3972
  {
3973
    return self::normalize_encoding($encoding);
3974
  }
3975 7
3976
  /**
3977 7
   * Normalize the encoding-"name" input.
3978
   *
3979
   * @param  string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3980 7
   *
3981 2
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3982 2
   */
3983 7
  public static function normalize_encoding($encoding)
3984
  {
3985 7
    static $staticNormalizeEncodingCache = array();
3986
3987
    if (!$encoding) {
3988 3
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3989 1
    }
3990 1
3991
    if ('UTF-8' === $encoding) {
3992
      return $encoding;
3993
    }
3994 3
3995 1
    if (in_array($encoding, self::$iconvEncoding, true)) {
3996 1
      return $encoding;
3997 3
    }
3998
3999 7
    if (isset($staticNormalizeEncodingCache[$encoding])) {
4000
      return $staticNormalizeEncodingCache[$encoding];
4001
    }
4002 3
4003 1
    $encodingOrig = $encoding;
4004 1
    $encoding = strtoupper($encoding);
4005
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4006
4007
    $equivalences = array(
4008 3
        'ISO88591'    => 'ISO-8859-1',
4009 1
        'ISO8859'     => 'ISO-8859-1',
4010 1
        'ISO'         => 'ISO-8859-1',
4011 3
        'LATIN1'      => 'ISO-8859-1',
4012
        'LATIN'       => 'ISO-8859-1',
4013 7
        'UTF16'       => 'UTF-16',
4014
        'UTF32'       => 'UTF-32',
4015
        'UTF8'        => 'UTF-8',
4016
        'UTF'         => 'UTF-8',
4017
        'UTF7'        => 'UTF-7',
4018
        'WIN1252'     => 'ISO-8859-1',
4019
        'WINDOWS1252' => 'ISO-8859-1',
4020
        '8BIT'        => 'CP850',
4021
        'BINARY'      => 'CP850',
4022
    );
4023
4024 1
    if (!empty($equivalences[$encodingUpperHelper])) {
4025
      $encoding = $equivalences[$encodingUpperHelper];
4026 1
    }
4027 1
4028 1
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
4029
4030 1
    return $encoding;
4031 1
  }
4032 1
4033 1
  /**
4034 1
   * Normalize some MS Word special characters.
4035
   *
4036 1
   * @param string $str The string to be normalized.
4037
   *
4038
   * @return string
4039
   */
4040
  public static function normalize_msword($str)
4041
  {
4042
    static $utf8MSWordKeys = null;
4043
    static $utf8MSWordValues = null;
4044
4045
    if ($utf8MSWordKeys === null) {
4046
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
4047
      $utf8MSWordValues = array_values(self::$utf8MSWord);
4048
    }
4049
4050
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
4051
  }
4052 36
4053
  /**
4054
   * Normalize the whitespace.
4055 36
   *
4056
   * @param string $str                     The string to be normalized.
4057
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
4058
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
4059 36
   *
4060 36
   * @return string
4061 36
   */
4062 36
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
4063
  {
4064 36
    static $whitespaces = array();
4065
    static $bidiUniCodeControls = null;
4066
4067 36
    $cacheKey = (int)$keepNonBreakingSpace;
4068 36
4069
    if (!isset($whitespaces[$cacheKey])) {
4070 36
4071
      $whitespaces[$cacheKey] = self::$whitespaceTable;
4072
4073
      if ($keepNonBreakingSpace === true) {
4074
        /** @noinspection OffsetOperationsInspection */
4075
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
4076
      }
4077
4078
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
4079
    }
4080
4081 36
    if ($keepBidiUnicodeControls === false) {
4082
      if ($bidiUniCodeControls === null) {
4083 36
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
4084
      }
4085 36
4086 36
      $str = str_replace($bidiUniCodeControls, '', $str);
4087 36
    }
4088
4089 36
    return str_replace($whitespaces[$cacheKey], ' ', $str);
4090 36
  }
4091 36
4092
  /**
4093 36
   * Format a number with grouped thousands.
4094
   *
4095
   * @param float  $number
4096
   * @param int    $decimals
4097
   * @param string $dec_point
4098
   * @param string $thousands_sep
4099
   *
4100
   * @deprecated
4101
   *
4102
   * @return string
4103
   */
4104
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4105
  {
4106 23
    $thousands_sep = (string)$thousands_sep;
4107
    $dec_point = (string)$dec_point;
4108 23
4109
    if (
4110 23
        isset($thousands_sep[1], $dec_point[1])
4111 5
        &&
4112
        Bootup::is_php('5.4') === true
4113
    ) {
4114 19
      return str_replace(
4115
          array(
4116 19
              '.',
4117
              ',',
4118
          ),
4119
          array(
4120
              $dec_point,
4121
              $thousands_sep,
4122
          ),
4123
          number_format($number, $decimals, '.', ',')
4124
      );
4125
    }
4126
4127 40
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4128
  }
4129 40
4130
  /**
4131 40
   * Calculates Unicode code point of the given UTF-8 encoded character.
4132
   *
4133 40
   * INFO: opposite to UTF8::chr()
4134 30
   *
4135
   * @param  string $chr The character of which to calculate code point.
4136
   *
4137 16
   * @return int Unicode code point of the given character,<br />
4138
   *         0 on invalid UTF-8 byte sequence.
4139 16
   */
4140 15
  public static function ord($chr)
4141
  {
4142 15
    if (!$chr && $chr !== '0') {
4143 14
      return 0;
4144 15
    }
4145 1
4146 1
    // init
4147
    self::checkForSupport();
4148
4149 16
    if (self::$support['intlChar'] === true) {
4150
      $tmpReturn = \IntlChar::ord($chr);
4151 16
      if ($tmpReturn) {
4152
        return $tmpReturn;
4153 16
      }
4154 16
    }
4155 16
4156
    $chr = unpack('C*', substr($chr, 0, 4));
4157
    $a = $chr ? $chr[1] : 0;
4158
4159 16
    if (0xF0 <= $a && isset($chr[4])) {
4160
      return (($a - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
4161 16
    }
4162
4163
    if (0xE0 <= $a && isset($chr[3])) {
4164
      return (($a - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
4165
    }
4166
4167
    if (0xC0 <= $a && isset($chr[2])) {
4168
      return (($a - 0xC0) << 6) + $chr[2] - 0x80;
4169
    }
4170
4171
    return $a;
4172
  }
4173
4174
  /**
4175
   * Parses the string into an array (into the the second parameter).
4176
   *
4177
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4178
   *          if the second parameter is not set!
4179
   *
4180
   * @link http://php.net/manual/en/function.parse-str.php
4181 2
   *
4182
   * @param string $str     <p>
4183 2
   *                        The input string.
4184 1
   *                        </p>
4185
   * @param array  $result  <p>
4186
   *                        The result will be returned into this reference parameter.
4187 2
   *                        </p>
4188
   *
4189
   * @return bool will return false if php can't parse the string and we haven't any $result
4190
   */
4191
  public static function parse_str($str, &$result)
4192
  {
4193
    // init
4194
    self::checkForSupport();
4195
4196
    $str = self::clean($str);
4197
4198
    $return = \mb_parse_str($str, $result);
4199 25
    if ($return === false || empty($result)) {
4200
      return false;
4201 25
    }
4202
4203 25
    return true;
4204 5
  }
4205
4206
  /**
4207
   * checks if \u modifier is available that enables Unicode support in PCRE.
4208 24
   *
4209 24
   * @return   bool True if support is available, false otherwise
4210 24
   */
4211
  public static function pcre_utf8_support()
4212 24
  {
4213
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4214 24
    return (bool)@preg_match('//u', '');
4215
  }
4216
4217
  /**
4218 24
   * Create an array containing a range of UTF-8 characters.
4219 24
   *
4220 24
   * @param  mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4221 24
   * @param  mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4222 24
   *
4223
   * @return array
4224 24
   */
4225
  public static function range($var1, $var2)
4226
  {
4227
    if (!$var1 || !$var2) {
4228
      return array();
4229
    }
4230
4231 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4232
      $start = (int)$var1;
4233
    } elseif (ctype_xdigit($var1)) {
4234
      $start = (int)self::hex_to_int($var1);
4235
    } else {
4236
      $start = self::ord($var1);
4237
    }
4238
4239
    if (!$start) {
4240
      return array();
4241
    }
4242
4243 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4244
      $end = (int)$var2;
4245
    } elseif (ctype_xdigit($var2)) {
4246
      $end = (int)self::hex_to_int($var2);
4247
    } else {
4248
      $end = self::ord($var2);
4249
    }
4250
4251
    if (!$end) {
4252
      return array();
4253
    }
4254
4255
    return array_map(
4256 24
        array(
4257 5
            '\\voku\\helper\\UTF8',
4258
            'chr',
4259 5
        ),
4260 5
        range($start, $end)
4261
    );
4262 24
  }
4263
4264
  /**
4265
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4266 24
   *
4267
   * @param string $str
4268
   *
4269
   * @return string
4270
   */
4271
  public static function remove_bom($str)
4272
  {
4273
    foreach (self::$bom as $bomString => $bomByteLength) {
4274
      if (0 === strpos($str, $bomString)) {
4275
        $str = substr($str, $bomByteLength);
4276
      }
4277 3
    }
4278
4279
    return $str;
4280
  }
4281
4282
  /**
4283
   * alias for "UTF8::remove_bom()"
4284 3
   *
4285 2
   * @see UTF8::remove_bom()
4286 1
   *
4287 2
   * @param string $str
4288 1
   *
4289 2
   * @return string
4290
   */
4291 2
  public static function removeBOM($str)
4292
  {
4293
    return self::remove_bom($str);
4294 2
  }
4295
4296
  /**
4297
   * Removes duplicate occurrences of a string in another string.
4298
   *
4299
   * @param    string       $str  The base string
4300 3
   * @param    string|array $what String to search for in the base string
4301 1
   *
4302
   * @return   string The result string with removed duplicates
4303
   */
4304
  public static function remove_duplicates($str, $what = ' ')
4305
  {
4306
    if (is_string($what)) {
4307
      $what = array($what);
4308
    }
4309
4310 3
    if (is_array($what)) {
4311 3
      foreach ($what as $item) {
4312 3
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4313 3
      }
4314 3
    }
4315 3
4316 3
    return $str;
4317 3
  }
4318
4319
  /**
4320 3
   * Remove Invisible Characters
4321 3
   *
4322 3
   * This prevents sandwiching null characters
4323 3
   * between ascii characters, like Java\0script.
4324
   *
4325
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4326
   *
4327
   * @param  string $str
4328
   * @param  bool   $url_encoded
4329
   * @param  string $replacement
4330
   *
4331
   * @return  string
4332
   */
4333
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4334
  {
4335
    // init
4336
    $non_displayables = array();
4337
4338
    // every control character except newline (dec 10),
4339
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4340
    if ($url_encoded) {
4341
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4342
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4343
    }
4344
4345
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4346
4347
    do {
4348
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4349
    } while ($count !== 0);
4350
4351
    return $str;
4352
  }
4353 13
4354
  /**
4355 13
   * replace diamond question mark (�)
4356
   *
4357
   * @param string $str
4358 13
   * @param string $unknown
4359 13
   *
4360 1
   * @return string
4361 1
   */
4362 12
  public static function replace_diamond_question_mark($str, $unknown = '?')
4363
  {
4364 13
    return str_replace(
4365
        array(
4366 13
            "\xEF\xBF\xBD",
4367 13
            '�',
4368
        ),
4369 13
        array(
4370
            $unknown,
4371
            $unknown,
4372
        ),
4373
        $str
4374
    );
4375
  }
4376
4377
  /**
4378
   * Strip whitespace or other characters from end of a UTF-8 string.
4379
   *
4380
   * @param    string $str   The string to be trimmed
4381 1
   * @param    string $chars Optional characters to be stripped
4382
   *
4383 1
   * @return   string The string with unwanted characters stripped from the right
4384
   */
4385 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4386
  {
4387 1
    $str = (string)$str;
4388
4389 1
    if (!isset($str[0])) {
4390
      return '';
4391
    }
4392
4393 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4394 1
    if ($chars === INF || !$chars) {
4395
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4396
    }
4397 1
4398 1
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4399 1
4400 1
    return preg_replace("/{$chars}+$/u", '', $str);
4401
  }
4402 1
4403
  /**
4404
   * rxClass
4405 1
   *
4406
   * @param string $s
4407
   * @param string $class
4408 1
   *
4409
   * @return string
4410
   */
4411
  protected static function rxClass($s, $class = '')
4412
  {
4413
    static $rxClassCache = array();
4414
4415
    $cacheKey = $s . $class;
4416
4417
    if (isset($rxClassCache[$cacheKey])) {
4418
      return $rxClassCache[$cacheKey];
4419
    }
4420
4421 2
    $class = array($class);
4422
4423 2
    /** @noinspection SuspiciousLoopInspection */
4424
    foreach (self::str_split($s) as $s) {
4425 2
      if ('-' === $s) {
4426 2
        $class[0] = '-' . $class[0];
4427
      } elseif (!isset($s[2])) {
4428 2
        $class[0] .= preg_quote($s, '/');
4429
      } elseif (1 === self::strlen($s)) {
4430
        $class[0] .= $s;
4431 2
      } else {
4432 2
        $class[] = $s;
4433 2
      }
4434 2
    }
4435 2
4436
    if ($class[0]) {
4437 2
      $class[0] = '[' . $class[0] . ']';
4438 2
    }
4439 2
4440 2
    if (1 === count($class)) {
4441 2
      $return = $class[0];
4442 2
    } else {
4443
      $return = '(?:' . implode('|', $class) . ')';
4444 2
    }
4445 2
4446 2
    $rxClassCache[$cacheKey] = $return;
4447 2
4448 2
    return $return;
4449 2
  }
4450
4451 2
  /**
4452
   * Echo native UTF8-Support libs, e.g. for debugging.
4453
   */
4454 2
  public static function showSupport()
4455
  {
4456
    foreach (self::$support as $utf8Support) {
4457
      echo $utf8Support . "\n<br>";
4458
    }
4459
  }
4460
4461
  /**
4462
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4463
   *
4464
   * @param    string $char           The Unicode character to be encoded as numbered entity.
4465
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4466
   *
4467
   * @return   string The HTML numbered entity.
4468
   */
4469
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
4470
  {
4471
    if (!$char) {
4472
      return '';
4473
    }
4474
4475 1
    if (
4476
        $keepAsciiChars === true
4477 1
        &&
4478
        self::isAscii($char) === true
4479 1
    ) {
4480
      return $char;
4481
    }
4482
4483
    return '&#' . self::ord($char) . ';';
4484
  }
4485
4486
  /**
4487
   * Convert a string to an array of Unicode characters.
4488
   *
4489
   * @param    string  $str       The string to split into array.
4490
   * @param    int     $length    Max character length of each array element.
4491
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4492
   *
4493
   * @return   array An array containing chunks of the string.
4494
   */
4495
  public static function split($str, $length = 1, $cleanUtf8 = false)
4496
  {
4497
    $str = (string)$str;
4498
4499
    if (!isset($str[0])) {
4500
      return array();
4501
    }
4502
4503
    // init
4504
    self::checkForSupport();
4505
    $str = (string)$str;
4506
    $ret = array();
4507
4508
    if (self::$support['pcre_utf8'] === true) {
4509
4510
      if ($cleanUtf8 === true) {
4511
        $str = self::clean($str);
4512 12
      }
4513
4514 12
      preg_match_all('/./us', $str, $retArray);
4515
      if (isset($retArray[0])) {
4516
        $ret = $retArray[0];
4517
      }
4518
      unset($retArray);
4519
4520
    } else {
4521
4522
      // fallback
4523
4524
      $len = strlen($str);
4525
4526
      /** @noinspection ForeachInvariantsInspection */
4527
      for ($i = 0; $i < $len; $i++) {
4528
        if (($str[$i] & "\x80") === "\x00") {
4529
          $ret[] = $str[$i];
4530
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4531
          if (($str[$i + 1] & "\xC0") === "\x80") {
4532
            $ret[] = $str[$i] . $str[$i + 1];
4533
4534
            $i++;
4535
          }
4536 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4537
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4538
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4539
4540
            $i += 2;
4541
          }
4542 1
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4543 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4544 1
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4545
4546 1
            $i += 3;
4547 1
          }
4548 1
        }
4549
      }
4550 1
    }
4551 1
4552 1
    if ($length > 1) {
4553 1
      $ret = array_chunk($ret, $length);
4554
4555
      $ret = array_map('implode', $ret);
4556 1
    }
4557
4558
    /** @noinspection OffsetOperationsInspection */
4559
    if (isset($ret[0]) && $ret[0] === '') {
4560
      return array();
4561
    }
4562
4563
    return $ret;
4564
  }
4565
4566
  /**
4567 17
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4568
   *
4569
   * @param string $str
4570 17
   *
4571
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4572 17
   *                      otherwise it will return false.
4573
   */
4574
  public static function str_detect_encoding($str)
4575
  {
4576
4577
    //
4578 17
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4579 17
    //
4580 17
4581 17
    if (self::is_binary($str)) {
4582 17
      if (self::is_utf16($str) === 1) {
4583 16
        return 'UTF-16LE';
4584 16
      } elseif (self::is_utf16($str) === 2) {
4585 17
        return 'UTF-16BE';
4586
      } elseif (self::is_utf32($str) === 1) {
4587
        return 'UTF-32LE';
4588
      } elseif (self::is_utf32($str) === 2) {
4589
        return 'UTF-32BE';
4590 17
      }
4591 17
    }
4592
4593
    //
4594 1
    // 2.) simple check for ASCII chars
4595 1
    //
4596
4597
    if (self::is_ascii($str) === true) {
4598 1
      return 'ASCII';
4599 1
    }
4600 1
4601 1
    //
4602 1
    // 3.) simple check for UTF-8 chars
4603
    //
4604 1
4605
    if (self::is_utf8($str) === true) {
4606 1
      return 'UTF-8';
4607
    }
4608
4609
    //
4610
    // 4.) check via "\mb_detect_encoding()"
4611
    //
4612
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4613
4614
    $detectOrder = array(
4615
        'windows-1251',
4616 1
        'ISO-8859-1',
4617
        'ASCII',
4618 1
        'UTF-8',
4619
    );
4620 1
4621
    self::checkForSupport();
4622
4623
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4624
    if ($encoding) {
4625 1
      return $encoding;
4626 1
    }
4627
4628
    //
4629 1
    // 5.) check via "iconv()"
4630 1
    //
4631 1
4632
    $md5 = md5($str);
4633 1
    foreach (self::$iconvEncoding as $encodingTmp) {
4634
      # INFO: //IGNORE and //TRANSLIT still throw notice
4635
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4636
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4637
        return $encodingTmp;
4638
      }
4639
    }
4640
4641
    return false;
4642
  }
4643
4644
  /**
4645
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4646
   *
4647
   * @link  http://php.net/manual/en/function.str-ireplace.php
4648
   *
4649
   * @param mixed $search  <p>
4650
   *                       Every replacement with search array is
4651
   *                       performed on the result of previous replacement.
4652
   *                       </p>
4653
   * @param mixed $replace <p>
4654 8
   *                       </p>
4655
   * @param mixed $subject <p>
4656 8
   *                       If subject is an array, then the search and
4657
   *                       replace is performed with every entry of
4658 8
   *                       subject, and the return value is an array as
4659
   *                       well.
4660 8
   *                       </p>
4661 2
   * @param int   $count   [optional] <p>
4662
   *                       The number of matched and replaced needles will
4663
   *                       be returned in count which is passed by
4664 7
   *                       reference.
4665
   *                       </p>
4666 7
   *
4667 7
   * @return mixed a string or an array of replacements.
4668 7
   * @since 5.0
4669
   */
4670 7
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4671
  {
4672 7
    $search = (array)$search;
4673 6
4674
    /** @noinspection AlterInForeachInspection */
4675
    foreach ($search as &$s) {
4676 4
      if ('' === $s .= '') {
4677
        $s = '/^(?<=.)$/';
4678
      } else {
4679 4
        $s = '/' . preg_quote($s, '/') . '/ui';
4680 4
      }
4681 4
    }
4682
4683 4
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4684 3
    $count = $replace;
4685
4686 3
    return $subject;
4687 3
  }
4688 3
4689
  /**
4690 3
   * Limit the number of characters in a string, but also after the next word.
4691 1
   *
4692
   * @param  string $str
4693 1
   * @param  int    $length
4694 1
   * @param  string $strAddOn
4695 1
   *
4696
   * @return string
4697 1
   */
4698
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4699
  {
4700
    $str = (string)$str;
4701
4702
    if (!isset($str[0])) {
4703
      return '';
4704
    }
4705
4706
    $length = (int)$length;
4707
4708
    if (self::strlen($str) <= $length) {
4709
      return $str;
4710
    }
4711
4712 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4713 3
      return self::substr($str, 0, $length - 1) . $strAddOn;
4714
    }
4715 4
4716
    $str = self::substr($str, 0, $length);
4717
    $array = explode(' ', $str);
4718
    array_pop($array);
4719
    $new_str = implode(' ', $array);
4720 4
4721
    if ($new_str === '') {
4722
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4723
    } else {
4724
      $str = $new_str . $strAddOn;
4725 4
    }
4726 4
4727 2
    return $str;
4728 2
  }
4729
4730 2
  /**
4731 2
   * Pad a UTF-8 string to given length with another string.
4732 1
   *
4733
   * @param    string $str        The input string
4734 2
   * @param    int    $pad_length The length of return string
4735
   * @param    string $pad_string String to use for padding the input string
4736 4
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4737 4
   *
4738 4
   * @return   string Returns the padded string
4739 4
   */
4740 1
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4741
  {
4742 7
    $str_length = self::strlen($str);
4743
4744 7
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $str_length)) {
4745
      $ps_length = self::strlen($pad_string);
4746
4747
      $diff = $pad_length - $str_length;
4748
4749
      switch ($pad_type) {
4750 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4751
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4752
          $pre = self::substr($pre, 0, $diff);
4753
          $post = '';
4754
          break;
4755
4756 1
        case STR_PAD_BOTH:
4757
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4758 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
4759 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4760 1
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4761 1
          break;
4762
4763 1
        case STR_PAD_RIGHT:
4764 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4765
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4766
          $post = self::substr($post, 0, $diff);
4767 1
          $pre = '';
4768
      }
4769
4770
      return $pre . $str . $post;
4771
    }
4772
4773
    return $str;
4774
  }
4775
4776 1
  /**
4777
   * Repeat a string.
4778
   *
4779 1
   * @param string $str        <p>
4780
   *                           The string to be repeated.
4781
   *                           </p>
4782
   * @param int    $multiplier <p>
4783
   *                           Number of time the input string should be
4784
   *                           repeated.
4785
   *                           </p>
4786
   *                           <p>
4787
   *                           multiplier has to be greater than or equal to 0.
4788
   *                           If the multiplier is set to 0, the function
4789
   *                           will return an empty string.
4790 8
   *                           </p>
4791
   *
4792 8
   * @return string the repeated string.
4793
   */
4794
  public static function str_repeat($str, $multiplier)
4795
  {
4796
    $str = self::filter($str);
4797
4798
    return str_repeat($str, $multiplier);
4799
  }
4800
4801
  /**
4802
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4803
   *
4804
   * (PHP 4, PHP 5)<br/>
4805 8
   * Replace all occurrences of the search string with the replacement string
4806
   *
4807 8
   * @link http://php.net/manual/en/function.str-replace.php
4808 5
   *
4809 5
   * @param mixed $search  <p>
4810 8
   *                       The value being searched for, otherwise known as the needle.
4811
   *                       An array may be used to designate multiple needles.
4812
   *                       </p>
4813
   * @param mixed $replace <p>
4814
   *                       The replacement value that replaces found search
4815
   *                       values. An array may be used to designate multiple replacements.
4816
   *                       </p>
4817
   * @param mixed $subject <p>
4818
   *                       The string or array being searched and replaced on,
4819
   *                       otherwise known as the haystack.
4820
   *                       </p>
4821
   *                       <p>
4822
   *                       If subject is an array, then the search and
4823 5
   *                       replace is performed with every entry of
4824
   *                       subject, and the return value is an array as
4825 5
   *                       well.
4826
   *                       </p>
4827
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4828
   *
4829 5
   * @return mixed This function returns a string or an array with the replaced values.
4830
   */
4831
  public static function str_replace($search, $replace, $subject, &$count = null)
4832 5
  {
4833
    return str_replace($search, $replace, $subject, $count);
4834
  }
4835
4836 5
  /**
4837 5
   * Shuffles all the characters in the string.
4838
   *
4839
   * @param    string $str The input string
4840
   *
4841
   * @return   string The shuffled string.
4842
   */
4843
  public static function str_shuffle($str)
4844
  {
4845
    $array = self::split($str);
4846
4847
    shuffle($array);
4848
4849
    return implode('', $array);
4850 2
  }
4851
4852 2
  /**
4853 2
   * Sort all characters according to code points.
4854
   *
4855 2
   * @param    string $str    A UTF-8 string.
4856 2
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4857 2
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4858
   *
4859 2
   * @return   string String of sorted characters
4860 2
   */
4861
  public static function str_sort($str, $unique = false, $desc = false)
4862
  {
4863
    $array = self::codepoints($str);
4864
4865
    if ($unique) {
4866
      $array = array_flip(array_flip($array));
4867
    }
4868
4869
    if ($desc) {
4870 1
      arsort($array);
4871
    } else {
4872 1
      asort($array);
4873
    }
4874
4875
    return self::string($array);
4876
  }
4877
4878
  /**
4879
   * Convert a string to an array.
4880
   *
4881
   * @param string $str
4882
   * @param int    $len
4883
   *
4884
   * @return array
4885
   */
4886
  public static function str_split($str, $len = 1)
4887
  {
4888
    // init
4889
    self::checkForSupport();
4890
    $len = (int)$len;
4891
4892
    if ($len < 1) {
4893
      return str_split($str, $len);
4894 2
    }
4895
4896
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4897 2
    $a = $a[0];
4898
4899 2
    if ($len === 1) {
4900
      return $a;
4901
    }
4902
4903
    $arrayOutput = array();
4904
    $p = -1;
4905
4906
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4907
    foreach ($a as $l => $a) {
4908
      if ($l % $len) {
4909
        $arrayOutput[$p] .= $a;
4910
      } else {
4911
        $arrayOutput[++$p] = $a;
4912
      }
4913
    }
4914
4915
    return $arrayOutput;
4916
  }
4917
4918
  /**
4919
   * Get a binary representation of a specific string.
4920
   *
4921
   * @param  string $str The input string.
4922
   *
4923
   * @return string
4924
   */
4925 8
  public static function str_to_binary($str)
4926
  {
4927 8
    $str = (string)$str;
4928 8
4929
    $value = unpack('H*', $str);
4930 8
4931 2
    return base_convert($value[1], 16, 2);
4932
  }
4933
4934
  /**
4935 7
   * alias for "UTF8::to_ascii()"
4936
   *
4937 7
   * @see UTF8::to_ascii()
4938 1
   *
4939 1
   * @param string $str
4940 1
   * @param string $unknown
4941
   *
4942
   * @return string
4943 7
   */
4944 1
  public static function str_transliterate($str, $unknown = '?')
4945 1
  {
4946
    return self::to_ascii($str, $unknown);
4947 7
  }
4948
4949
  /**
4950
   * Counts number of words in the UTF-8 string.
4951
   *
4952
   * @param string $str    The input string.
4953
   * @param int    $format <strong>0</strong> => return a number of words<br />
4954
   *                       <strong>1</strong> => return an array of words
4955
   *                       <strong>2</strong> => return an array of words with word-offset as key
4956
   * @param string $charlist
4957
   *
4958
   * @return array|float The number of words in the string
4959 7
   */
4960
  public static function str_word_count($str, $format = 0, $charlist = '')
4961 7
  {
4962 2
    $charlist = self::rxClass($charlist, '\pL');
4963
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4964
4965
    $len = count($strParts);
4966 5
4967
    if ($format === 1) {
4968 5
4969
      $numberOfWords = array();
4970
      for ($i = 1; $i < $len; $i += 2) {
4971
        $numberOfWords[] = $strParts[$i];
4972
      }
4973
4974
    } elseif ($format === 2) {
4975
4976
      self::checkForSupport();
4977
4978
      $numberOfWords = array();
4979
      $offset = self::strlen($strParts[0]);
4980
      for ($i = 1; $i < $len; $i += 2) {
4981
        $numberOfWords[$offset] = $strParts[$i];
4982
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4983
      }
4984
4985 66
    } else {
4986
4987 66
      $numberOfWords = ($len - 1) / 2;
4988
4989 66
    }
4990 4
4991
    return $numberOfWords;
4992
  }
4993
4994 65
  /**
4995
   * Case-insensitive string comparison.
4996
   *
4997 65
   * @param string $str1
4998
   * @param string $str2
4999
   *
5000
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5001 65
   */
5002
  public static function strcasecmp($str1, $str2)
5003
  {
5004
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5005 65
  }
5006
5007
  /**
5008
   * String comparison.
5009
   *
5010
   * @param string $str1
5011
   * @param string $str2
5012
   *
5013
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
5014
   *              <strong>> 0</strong> if str1 is greater than str2<br />
5015
   *              <strong>0</strong> if they are equal.
5016
   */
5017 1
  public static function strcmp($str1, $str2)
5018
  {
5019 1
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5020
        \Normalizer::normalize($str1, \Normalizer::NFD),
5021
        \Normalizer::normalize($str2, \Normalizer::NFD)
5022
    );
5023
  }
5024
5025
  /**
5026
   * Find length of initial segment not matching mask.
5027
   *
5028
   * @param string $str
5029
   * @param string $charList
5030
   * @param int    $offset
5031 2
   * @param int    $length
5032
   *
5033 2
   * @return int|null
5034
   */
5035
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5036
  {
5037
    if ('' === $charList .= '') {
5038
      return null;
5039
    }
5040
5041
    if ($offset || 2147483647 !== $length) {
5042
      $str = (string)self::substr($str, $offset, $length);
5043
    } else {
5044
      $str = (string)$str;
5045
    }
5046
5047
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5048
      /** @noinspection OffsetOperationsInspection */
5049
      return self::strlen($length[1]);
5050
    } else {
5051
      return self::strlen($str);
5052
    }
5053
  }
5054
5055
  /**
5056
   * Makes a UTF-8 string from code points.
5057
   *
5058
   * @param    array $array Integer or Hexadecimal codepoints
5059
   *
5060
   * @return   string UTF-8 encoded string
5061
   */
5062
  public static function string($array)
5063
  {
5064
    return implode(
5065
        array_map(
5066
            array(
5067
                '\\voku\\helper\\UTF8',
5068
                'chr',
5069
            ),
5070
            $array
5071
        )
5072
    );
5073
  }
5074
5075
  /**
5076
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5077
   *
5078
   * @param    string $str The input string.
5079
   *
5080
   * @return   bool True if the string has BOM at the start, False otherwise.
5081
   */
5082
  public static function string_has_bom($str)
5083
  {
5084
    foreach (self::$bom as $bomString => $bomByteLength) {
5085
      if (0 === strpos($str, $bomString)) {
5086
        return true;
5087
      }
5088
    }
5089
5090
    return false;
5091
  }
5092
5093
  /**
5094
   * Strip HTML and PHP tags from a string.
5095
   *
5096
   * @link http://php.net/manual/en/function.strip-tags.php
5097
   *
5098
   * @param string $str            <p>
5099
   *                               The input string.
5100
   *                               </p>
5101
   * @param string $allowable_tags [optional] <p>
5102
   *                               You can use the optional second parameter to specify tags which should
5103 11
   *                               not be stripped.
5104
   *                               </p>
5105 11
   *                               <p>
5106 11
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5107
   *                               can not be changed with allowable_tags.
5108 11
   *                               </p>
5109 2
   *
5110
   * @return string the stripped string.
5111
   */
5112
  public static function strip_tags($str, $allowable_tags = null)
5113 10
  {
5114 10
    // clean broken utf8
5115
    $str = self::clean($str);
5116
5117
    return strip_tags($str, $allowable_tags);
5118 10
  }
5119
5120
  /**
5121
   * Finds position of first occurrence of a string within another, case insensitive.
5122 10
   *
5123
   * @link http://php.net/manual/en/function.mb-stripos.php
5124
   *
5125
   * @param string  $haystack  <p>
5126 1
   *                           The string from which to get the position of the first occurrence
5127 1
   *                           of needle
5128 1
   *                           </p>
5129
   * @param string  $needle    <p>
5130 10
   *                           The string to find in haystack
5131
   *                           </p>
5132
   * @param int     $offset    [optional] <p>
5133 10
   *                           The position in haystack
5134 1
   *                           to start searching
5135 1
   *                           </p>
5136
   * @param string  $encoding
5137 10
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5138
   *
5139
   * @return int Return the numeric position of the first occurrence of
5140
   * needle in the haystack
5141
   * string, or false if needle is not found.
5142
   */
5143
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5144
  {
5145
    $haystack = (string)$haystack;
5146
    $needle = (string)$needle;
5147
5148
    if (!isset($haystack[0], $needle[0])) {
5149
      return false;
5150
    }
5151
5152
    // init
5153
    self::checkForSupport();
5154
5155
    if ($cleanUtf8 === true) {
5156
      $haystack = self::clean($haystack);
5157
      $needle = self::clean($needle);
5158
    }
5159
5160
    // INFO: this is only a fallback for old versions
5161
    if ($encoding === true || $encoding === false) {
5162
      $encoding = 'UTF-8';
5163
    } else {
5164
      $encoding = self::normalizeEncoding($encoding);
5165
    }
5166
5167
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5168
  }
5169
5170
  /**
5171
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5172
   *
5173
   * @param string $str
5174
   * @param string $needle
5175
   * @param bool   $before_needle
5176
   *
5177
   * @return false|string
5178
   */
5179
  public static function stristr($str, $needle, $before_needle = false)
5180
  {
5181
    if ('' === $needle .= '') {
5182
      return false;
5183
    }
5184
5185
    // init
5186 1
    self::checkForSupport();
5187
5188 1
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5189
  }
5190 1
5191
  /**
5192
   * Get the string length, not the byte-length!
5193
   *
5194
   * @link     http://php.net/manual/en/function.mb-strlen.php
5195
   *
5196
   * @param string  $str       The string being checked for length.
5197
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5198
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5199
   *
5200 4
   * @return int the number of characters in
5201
   *           string str having character encoding
5202 4
   *           encoding. A multi-byte character is
5203
   *           counted as 1.
5204
   */
5205
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5206
  {
5207
    $str = (string)$str;
5208
5209
    if (!isset($str[0])) {
5210
      return 0;
5211
    }
5212
5213
    // INFO: this is only a fallback for old versions
5214
    if ($encoding === true || $encoding === false) {
5215
      $encoding = 'UTF-8';
5216
    } else {
5217
      $encoding = self::normalizeEncoding($encoding);
5218
    }
5219
5220
    switch ($encoding) {
5221
      case 'ASCII':
5222
      case 'CP850':
5223
        return strlen($str);
5224
    }
5225
5226
    self::checkForSupport();
5227
5228
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5229
      $str = self::clean($str);
5230
    }
5231
5232
    return \mb_strlen($str, $encoding);
5233 1
  }
5234
5235 1
  /**
5236
   * Case insensitive string comparisons using a "natural order" algorithm.
5237 1
   *
5238
   * @param string $str1
5239
   * @param string $str2
5240
   *
5241
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5242
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5243
   *             <strong>0</strong> if they are equal
5244
   */
5245
  public static function strnatcasecmp($str1, $str2)
5246
  {
5247
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5248
  }
5249 1
5250
  /**
5251 1
   * String comparisons using a "natural order" algorithm
5252
   *
5253
   * @link  http://php.net/manual/en/function.strnatcmp.php
5254
   *
5255
   * @param string $str1 <p>
5256
   *                     The first string.
5257
   *                     </p>
5258
   * @param string $str2 <p>
5259
   *                     The second string.
5260
   *                     </p>
5261
   *
5262
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5263
   * str1 is less than str2; &gt;
5264
   * 0 if str1 is greater than
5265
   * str2, and 0 if they are equal.
5266
   * @since 4.0
5267
   * @since 5.0
5268
   */
5269
  public static function strnatcmp($str1, $str2)
5270
  {
5271
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5272
  }
5273
5274
  /**
5275
   * Binary safe case-insensitive string comparison of the first n characters
5276 10
   *
5277
   * @link  http://php.net/manual/en/function.strncasecmp.php
5278 10
   *
5279 10
   * @param string $str1 <p>
5280
   *                     The first string.
5281 10
   *                     </p>
5282 2
   * @param string $str2 <p>
5283
   *                     The second string.
5284
   *                     </p>
5285
   * @param int    $len  <p>
5286 9
   *                     The length of strings to be used in the comparison.
5287
   *                     </p>
5288 9
   *
5289
   * @return int &lt; 0 if <i>str1</i> is less than
5290
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5291
   * greater than <i>str2</i>, and 0 if they are equal.
5292 9
   * @since 4.0.4
5293 9
   * @since 5.0
5294
   */
5295 9
  public static function strncasecmp($str1, $str2, $len)
5296
  {
5297
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5298 1
  }
5299 1
5300 1
  /**
5301
   * Binary safe string comparison of the first n characters
5302 9
   *
5303 9
   * @link  http://php.net/manual/en/function.strncmp.php
5304
   *
5305
   * @param string $str1 <p>
5306
   *                     The first string.
5307
   *                     </p>
5308
   * @param string $str2 <p>
5309
   *                     The second string.
5310
   *                     </p>
5311
   * @param int    $len  <p>
5312
   *                     Number of characters to use in the comparison.
5313
   *                     </p>
5314
   *
5315
   * @return int &lt; 0 if <i>str1</i> is less than
5316
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5317
   * is greater than <i>str2</i>, and 0 if they are
5318
   * equal.
5319
   * @since 4.0
5320
   * @since 5.0
5321
   */
5322
  public static function strncmp($str1, $str2, $len)
5323
  {
5324
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str1, 0, $len) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
Security Bug introduced by
It seems like self::substr($str2, 0, $len) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5325
  }
5326
5327
  /**
5328
   * Search a string for any of a set of characters
5329
   *
5330
   * @link  http://php.net/manual/en/function.strpbrk.php
5331
   *
5332
   * @param string $haystack  <p>
5333
   *                          The string where char_list is looked for.
5334
   *                          </p>
5335
   * @param string $char_list <p>
5336
   *                          This parameter is case sensitive.
5337
   *                          </p>
5338
   *
5339 6
   * @return string a string starting from the character found, or false if it is
5340
   * not found.
5341 6
   * @since 5.0
5342
   */
5343
  public static function strpbrk($haystack, $char_list)
5344
  {
5345 6
    $haystack = (string)$haystack;
5346
    $char_list = (string)$char_list;
5347
5348
    if (!isset($haystack[0], $char_list[0])) {
5349
      return false;
5350
    }
5351
5352
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5353
      return substr($haystack, strpos($haystack, $m[0]));
5354
    } else {
5355
      return false;
5356
    }
5357
  }
5358
5359
  /**
5360
   * Find position of first occurrence of string in a string.
5361
   *
5362
   * @link http://php.net/manual/en/function.mb-strpos.php
5363
   *
5364
   * @param string  $haystack     <p>
5365
   *                              The string being checked.
5366 1
   *                              </p>
5367
   * @param string  $needle       <p>
5368 1
   *                              The position counted from the beginning of haystack.
5369
   *                              </p>
5370 1
   * @param int     $offset       [optional] <p>
5371
   *                              The search offset. If it is not specified, 0 is used.
5372
   *                              </p>
5373
   * @param string  $encoding
5374
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5375
   *
5376
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5377
   *             If needle is not found it returns false.
5378
   */
5379
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5380
  {
5381
    $haystack = (string)$haystack;
5382
    $needle = (string)$needle;
5383 10
5384
    if (!isset($haystack[0], $needle[0])) {
5385 10
      return false;
5386 10
    }
5387 10
5388
    // init
5389 10
    self::checkForSupport();
5390 1
    $offset = (int)$offset;
5391 1
5392 1
    // iconv and mbstring do not support integer $needle
5393
5394 10
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5395
      $needle = self::chr($needle);
5396 10
    }
5397
5398 10
    if ($cleanUtf8 === true) {
5399 1
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5400 1
      // iconv_strpos is not tolerant to invalid characters
5401
5402
      $needle = self::clean((string)$needle);
5403 10
      $haystack = self::clean($haystack);
5404 10
    }
5405
5406 10 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5407
5408 10
      // INFO: this is only a fallback for old versions
5409
      if ($encoding === true || $encoding === false) {
5410
        $encoding = 'UTF-8';
5411
      } else {
5412
        $encoding = self::normalizeEncoding($encoding);
5413
      }
5414
5415
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5416
    }
5417
5418
    if (self::$support['iconv'] === true) {
5419
      // ignore invalid negative offset to keep compatility
5420
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5421
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5422
    }
5423
5424 20
    if ($offset > 0) {
5425
      $haystack = self::substr($haystack, $offset);
5426 20
    }
5427
5428 20 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5429 5
      $left = substr($haystack, 0, $pos);
5430
5431
      // negative offset not supported in PHP strpos(), ignoring
5432
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5433 18
    }
5434
5435 18
    return false;
5436
  }
5437
5438
  /**
5439
   * Finds the last occurrence of a character in a string within another.
5440
   *
5441
   * @link http://php.net/manual/en/function.mb-strrchr.php
5442
   *
5443
   * @param string $haystack <p>
5444
   *                         The string from which to get the last occurrence
5445 3
   *                         of needle
5446
   *                         </p>
5447 3
   * @param string $needle   <p>
5448
   *                         The string to find in haystack
5449
   *                         </p>
5450
   * @param bool   $part     [optional] <p>
5451
   *                         Determines which portion of haystack
5452
   *                         this function returns.
5453
   *                         If set to true, it returns all of haystack
5454
   *                         from the beginning to the last occurrence of needle.
5455
   *                         If set to false, it returns all of haystack
5456
   *                         from the last occurrence of needle to the end,
5457
   *                         </p>
5458
   * @param string $encoding [optional] <p>
5459
   *                         Character encoding name to use.
5460
   *                         If it is omitted, internal character encoding is used.
5461
   *                         </p>
5462 16
   *
5463
   * @return string the portion of haystack.
5464 16
   * or false if needle is not found.
5465
   */
5466 16 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5467 4
  {
5468
    self::checkForSupport();
5469
    $encoding = self::normalizeEncoding($encoding);
5470
5471 15
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5472
  }
5473 15
5474 15
  /**
5475
   * Reverses characters order in the string.
5476
   *
5477
   * @param    string $str The input string
5478
   *
5479
   * @return   string The string with characters in the reverse sequence
5480
   */
5481
  public static function strrev($str)
5482
  {
5483
    return implode(array_reverse(self::split($str)));
5484
  }
5485
5486
  /**
5487
   * Finds the last occurrence of a character in a string within another, case insensitive.
5488
   *
5489
   * @link http://php.net/manual/en/function.mb-strrichr.php
5490
   *
5491
   * @param string $haystack <p>
5492
   *                         The string from which to get the last occurrence
5493
   *                         of needle
5494
   *                         </p>
5495
   * @param string $needle   <p>
5496
   *                         The string to find in haystack
5497
   *                         </p>
5498
   * @param bool   $part     [optional] <p>
5499
   *                         Determines which portion of haystack
5500
   *                         this function returns.
5501
   *                         If set to true, it returns all of haystack
5502
   *                         from the beginning to the last occurrence of needle.
5503 1
   *                         If set to false, it returns all of haystack
5504
   *                         from the last occurrence of needle to the end,
5505 1
   *                         </p>
5506
   * @param string $encoding [optional] <p>
5507
   *                         Character encoding name to use.
5508
   *                         If it is omitted, internal character encoding is used.
5509
   *                         </p>
5510
   *
5511
   * @return string the portion of haystack.
5512
   * or false if needle is not found.
5513
   */
5514 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5515
  {
5516
    self::checkForSupport();
5517
    $encoding = self::normalizeEncoding($encoding);
5518
5519
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5520 1
  }
5521
5522
  /**
5523
   * Find position of last occurrence of a case-insensitive string.
5524
   *
5525
   * @param    string $haystack The string to look in
5526
   * @param    string $needle   The string to look for
5527
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5528
   *
5529
   * @return   int The position of offset
5530 1
   */
5531
  public static function strripos($haystack, $needle, $offset = 0)
5532
  {
5533 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5534
  }
5535 1
5536
  /**
5537
   * Find position of last occurrence of a string in a string.
5538
   *
5539
   * @link http://php.net/manual/en/function.mb-strrpos.php
5540
   *
5541
   * @param string     $haystack  <p>
5542
   *                              The string being checked, for the last occurrence
5543
   *                              of needle
5544
   *                              </p>
5545
   * @param string|int $needle    <p>
5546
   *                              The string to find in haystack.
5547
   *                              Or a code point as int.
5548
   *                              </p>
5549
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5550
   *                              the string. Negative values will stop searching at an arbitrary point
5551
   *                              prior to the end of the string.
5552
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5553
   *
5554
   * @return int the numeric position of
5555
   * the last occurrence of needle in the
5556
   * haystack string. If
5557
   * needle is not found, it returns false.
5558 39
   */
5559
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5560 39
  {
5561
    $haystack = (string)$haystack;
5562 39
5563 9
    if (((int)$needle) === $needle && ($needle >= 0)) {
5564
      $needle = self::chr($needle);
5565
    }
5566
5567 37
    $needle = (string)$needle;
5568
5569 37
    if (!isset($haystack[0], $needle[0])) {
5570
      return false;
5571
    }
5572
5573 1
    // init
5574 1
    self::checkForSupport();
5575
5576 37
    $needle = (string)$needle;
5577 22
    $offset = (int)$offset;
5578 22
5579 33
    if ($cleanUtf8 === true) {
5580
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5581
5582 37
      $needle = self::clean($needle);
5583
      $haystack = self::clean($haystack);
5584
    }
5585 37
5586 1
    if (self::$support['mbstring'] === true) {
5587 1
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5588
    }
5589 37
5590
    if (self::$support['iconv'] === true) {
5591
      return \grapheme_strrpos($haystack, $needle, $offset);
5592
    }
5593
5594
    // fallback
5595
5596
    if ($offset > 0) {
5597
      $haystack = self::substr($haystack, $offset);
5598
    } elseif ($offset < 0) {
5599
      $haystack = self::substr($haystack, 0, $offset);
5600
    }
5601
5602 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5603
      $left = substr($haystack, 0, $pos);
5604
5605
      // negative offset not supported in PHP strpos(), ignoring
5606
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5607
    }
5608
5609
    return false;
5610
  }
5611
5612
  /**
5613
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5614
   * mask.
5615
   *
5616
   * @param string $str
5617
   * @param string $mask
5618 1
   * @param int    $offset
5619
   * @param int    $length
5620 1
   *
5621 1
   * @return int|null
5622
   */
5623 1
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5624
  {
5625
    if ($offset || 2147483647 !== $length) {
5626
      $str = self::substr($str, $offset, $length);
5627
    }
5628
5629
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5630
  }
5631
5632
  /**
5633
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5634
   *
5635
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5636
   *
5637
   * @param string $haystack      <p>
5638
   *                              The input string. Must be valid UTF-8.
5639
   *                              </p>
5640
   * @param string $needle        <p>
5641
   *                              The string to look for. Must be valid UTF-8.
5642
   *                              </p>
5643
   * @param bool   $before_needle [optional] <p>
5644
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5645
   *                              haystack before the first occurrence of the needle (excluding the needle).
5646
   *                              </p>
5647
   *
5648
   * @return string the portion of string, or FALSE if needle is not found.
5649
   */
5650
  public static function strstr($haystack, $needle, $before_needle = false)
5651
  {
5652
    self::checkForSupport();
5653
5654
    return \grapheme_strstr($haystack, $needle, $before_needle);
5655
  }
5656
5657
  /**
5658
   * Unicode transformation for case-less matching.
5659
   *
5660
   * @link http://unicode.org/reports/tr21/tr21-5.html
5661
   *
5662
   * @param string $str
5663
   * @param bool   $full
5664
   *
5665 6
   * @return string
5666
   */
5667
  public static function strtocasefold($str, $full = true)
5668 6
  {
5669 1
    static $fullCaseFold = null;
5670
    static $commonCaseFoldKeys = null;
5671
    static $commonCaseFoldValues = null;
5672 1
5673 1
    if ($commonCaseFoldKeys === null) {
5674 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5675 1
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5676
    }
5677
5678
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5679 1
5680 1
    if ($full) {
5681 1
5682 1
      if ($fullCaseFold === null) {
5683 1
        $fullCaseFold = self::getData('caseFolding_full');
5684 1
      }
5685 1
5686 1
      /** @noinspection OffsetOperationsInspection */
5687
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5688
    }
5689
5690 1
    $str = self::clean($str);
5691 1
5692 1
    return self::strtolower($str);
5693 1
  }
5694 1
5695 1
  /**
5696 1
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5697 1
   * Make a string lowercase.
5698
   *
5699
   * @link http://php.net/manual/en/function.mb-strtolower.php
5700 1
   *
5701 1
   * @param string $str <p>
5702 1
   *                    The string being lowercased.
5703 1
   *                    </p>
5704
   * @param string $encoding
5705
   *
5706
   * @return string str with all alphabetic characters converted to lowercase.
5707 1
   */
5708
  public static function strtolower($str, $encoding = 'UTF-8')
5709 6
  {
5710 1
    $str = (string)$str;
5711 1
5712 1
    if (!isset($str[0])) {
5713 1
      return '';
5714
    }
5715 1
5716
    // init
5717
    self::checkForSupport();
5718 6
    $encoding = self::normalizeEncoding($encoding);
5719 6
5720
    return \mb_strtolower($str, $encoding);
5721 6
  }
5722 4
5723
  /**
5724 4
   * Generic case sensitive transformation for collation matching.
5725 4
   *
5726
   * @param string $s
5727 6
   *
5728
   * @return string
5729 6
   */
5730
  protected static function strtonatfold($s)
5731
  {
5732
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5733
  }
5734
5735
  /**
5736
   * Make a string uppercase.
5737
   *
5738
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5739
   *
5740 1
   * @param string $str <p>
5741
   *                    The string being uppercased.
5742 1
   *                    </p>
5743
   * @param string $encoding
5744 1
   *
5745 1
   * @return string str with all alphabetic characters converted to uppercase.
5746
   */
5747
  public static function strtoupper($str, $encoding = 'UTF-8')
5748 1
  {
5749
    $str = (string)$str;
5750 1
5751 1
    if (!isset($str[0])) {
5752
      return '';
5753 1
    }
5754
5755 1
    // init
5756 1
    self::checkForSupport();
5757
5758 1
    if (self::$support['mbstring'] === true) {
5759
      $encoding = self::normalizeEncoding($encoding);
5760 1
5761
      return \mb_strtoupper($str, $encoding);
5762 1
    } else {
5763
5764 1
      // fallback
5765
5766
      static $caseTableKeys = null;
5767
      static $caseTableValues = null;
5768
5769
      if ($caseTableKeys === null) {
5770
        $caseTable = self::case_table();
5771
        $caseTableKeys = array_keys($caseTable);
5772
        $caseTableValues = array_values($caseTable);
5773
      }
5774
5775 6
      $str = self::clean($str);
5776
5777 6
      return str_replace($caseTableKeys, $caseTableValues, $str);
5778
    }
5779
  }
5780
5781
  /**
5782
   * Translate characters or replace sub-strings.
5783
   *
5784
   * @link  http://php.net/manual/en/function.strtr.php
5785
   *
5786
   * @param string       $str  <p>
5787
   *                           The string being translated.
5788
   *                           </p>
5789
   * @param string|array $from <p>
5790
   *                           The string replacing from.
5791
   *                           </p>
5792
   * @param string|array $to   <p>
5793
   *                           The string being translated to to.
5794
   *                           </p>
5795
   *
5796
   * @return string This function returns a copy of str,
5797
   * translating all occurrences of each character in
5798
   * from to the corresponding character in
5799
   * to.
5800
   * @since 4.0
5801
   * @since 5.0
5802
   */
5803
  public static function strtr($str, $from, $to = INF)
5804
  {
5805
    if (INF !== $to) {
5806
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5806 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5807
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5807 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5808
      $countFrom = count($from);
5809
      $countTo = count($to);
5810
5811
      if ($countFrom > $countTo) {
5812 7
        $from = array_slice($from, 0, $countTo);
5813
      } elseif ($countFrom < $countTo) {
5814 7
        $to = array_slice($to, 0, $countFrom);
5815
      }
5816 7
5817
      $from = array_combine($from, $to);
5818 7
    }
5819 2
5820
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5803 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5821
  }
5822 6
5823
  /**
5824 6
   * Return the width of a string.
5825 3
   *
5826
   * @param string $s
5827 3
   *
5828
   * @return int
5829 3
   */
5830
  public static function strwidth($s)
5831
  {
5832 3
    // init
5833
    self::checkForSupport();
5834 3
5835 3
    return \mb_strwidth($s, 'UTF-8');
5836
  }
5837
5838 3
  /**
5839 3
   * Get part of a string.
5840 3
   *
5841
   * @link http://php.net/manual/en/function.mb-substr.php
5842
   *
5843
   * @param string  $str       <p>
5844
   *                           The string being checked.
5845
   *                           </p>
5846
   * @param int     $start     <p>
5847
   *                           The first position used in str.
5848
   *                           </p>
5849
   * @param int     $length    [optional] <p>
5850
   *                           The maximum length of the returned string.
5851
   *                           </p>
5852 3
   * @param string  $encoding
5853
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5854 1
   *
5855 1
   * @return string mb_substr returns the portion of
5856 1
   * str specified by the start and length parameters.
5857
   */
5858 1
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5859 1
  {
5860 1
    $str = (string)$str;
5861 1
5862
    if (!isset($str[0])) {
5863 1
      return '';
5864
    }
5865
5866 1
    // init
5867
    self::checkForSupport();
5868
5869 1
    if ($cleanUtf8 === true) {
5870
      // iconv and mbstring are not tolerant to invalid encoding
5871 3
      // further, their behaviour is inconsistent with that of PHP's substr
5872 1
5873 1
      $str = self::clean($str);
5874
    }
5875 3
5876 3
    $str_length = 0;
5877
    if ($start || $length === null) {
5878 3
      $str_length = (int)self::strlen($str);
5879 3
    }
5880
5881 6
    if ($start && $start > $str_length) {
5882
      return false;
5883
    }
5884
5885
    if ($length === null) {
5886
      $length = $str_length;
5887
    } else {
5888
      $length = (int)$length;
5889
    }
5890
5891 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5892
5893
      // INFO: this is only a fallback for old versions
5894
      if ($encoding === true || $encoding === false) {
5895
        $encoding = 'UTF-8';
5896
      } else {
5897
        $encoding = self::normalizeEncoding($encoding);
5898
      }
5899
5900
      return \mb_substr($str, $start, $length, $encoding);
5901
    }
5902
5903 2
    if (self::$support['iconv'] === true) {
5904
      return (string)\grapheme_substr($str, $start, $length);
5905 2
    }
5906
5907
    // fallback
5908
5909
    // split to array, and remove invalid characters
5910
    $array = self::split($str);
5911
5912
    // extract relevant part, and join to make sting again
5913
    return implode(array_slice($array, $start, $length));
5914
  }
5915
5916
  /**
5917
   * Binary safe comparison of two strings from an offset, up to length characters.
5918
   *
5919
   * @param string  $main_str           The main string being compared.
5920
   * @param string  $str                The secondary string being compared.
5921
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5922
   *                                    end of the string.
5923
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5924
   *                                    the str compared to the length of main_str less the offset.
5925
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5926
   *
5927
   * @return int
5928
   */
5929 20
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5930
  {
5931 20
    $main_str = self::substr($main_str, $offset, $length);
5932 2
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5931 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5933
5934 2
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5931 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5932 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5931 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5932 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5935 2
  }
5936
5937 2
  /**
5938
   * Count the number of substring occurrences
5939
   *
5940 20
   * @link  http://php.net/manual/en/function.substr-count.php
5941
   *
5942 20
   * @param string $haystack <p>
5943 9
   *                         The string to search in
5944
   *                         </p>
5945
   * @param string $needle   <p>
5946 20
   *                         The substring to search for
5947
   *                         </p>
5948 20
   * @param int    $offset   [optional] <p>
5949
   *                         The offset where to start counting
5950 20
   *                         </p>
5951 20
   * @param int    $length   [optional] <p>
5952
   *                         The maximum length after the specified offset to search for the
5953 20
   *                         substring. It outputs a warning if the offset plus the length is
5954 20
   *                         greater than the haystack length.
5955 20
   *                         </p>
5956 20
   *
5957
   * @return int This functions returns an integer.
5958 20
   * @since 4.0
5959
   * @since 5.0
5960 18
   */
5961 17
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5962 17
  {
5963 17
    $haystack = (string)$haystack;
5964 5
    $needle = (string)$needle;
5965 5
5966 5
    if (!isset($haystack[0], $needle[0])) {
5967
      return false;
5968
    }
5969 20
5970
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5971 18
      $offset = (int)$offset;
5972 14
      $length = (int)$length;
5973 14
5974 14
      if ($length + $offset <= 0) {
5975 8
        return false;
5976 8
      }
5977 8
5978
      $haystack = self::substr($haystack, $offset, $length);
5979
    }
5980 19
5981
    self::checkForSupport();
5982 9
5983 3
    return \mb_substr_count($haystack, $needle);
5984 3
  }
5985 3
5986 6
  /**
5987 6
   * Replace text within a portion of a string.
5988 6
   *
5989
   * source: https://gist.github.com/stemar/8287074
5990
   *
5991 9
   * @param string|array   $str
5992 6
   * @param string|array   $replacement
5993 6
   * @param int|array      $start
5994 6
   * @param null|int|array $length
5995
   *
5996
   * @return array|string
5997 20
   */
5998
  public static function substr_replace($str, $replacement, $start, $length = null)
5999 2
  {
6000 2
    if (is_array($str)) {
6001
      $num = count($str);
6002
6003 2
      // $replacement
6004 2
      if (is_array($replacement)) {
6005 2
        $replacement = array_slice($replacement, 0, $num);
6006
      } else {
6007
        $replacement = array_pad(array($replacement), $num, $replacement);
6008 2
      }
6009 18
6010
      // $start
6011 20 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6012
        $start = array_slice($start, 0, $num);
6013 20
        foreach ($start as &$valueTmp) {
6014
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6015
        }
6016 20
        unset($valueTmp);
6017 20
      } else {
6018
        $start = array_pad(array($start), $num, $start);
6019 3
      }
6020 20
6021
      // $length
6022 20
      if (!isset($length)) {
6023
        $length = array_fill(0, $num, 0);
6024 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6025 20
        $length = array_slice($length, 0, $num);
6026 20
        foreach ($length as &$valueTmpV2) {
6027 20
          if (isset($valueTmpV2)) {
6028 2
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6029 20
          } else {
6030
            $valueTmpV2 = 0;
6031 20
          }
6032
        }
6033 20
        unset($valueTmpV2);
6034
      } else {
6035
        $length = array_pad(array($length), $num, $length);
6036
      }
6037
6038
      // Recursive call
6039
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6040
    } else {
6041
      if (is_array($replacement)) {
6042
        if (count($replacement) > 0) {
6043 2
          $replacement = $replacement[0];
6044
        } else {
6045 2
          $replacement = '';
6046
        }
6047 1
      }
6048
    }
6049 1
6050 1
    preg_match_all('/./us', (string)$str, $smatches);
6051
    preg_match_all('/./us', (string)$replacement, $rmatches);
6052 1
6053 2
    if ($length === null) {
6054 2
      self::checkForSupport();
6055
6056
      $length = \mb_strlen($str);
6057
    }
6058
6059
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6060
6061
    return implode($smatches[0], null);
6062
  }
6063
6064
  /**
6065
   * Returns a case swapped version of the string.
6066
   *
6067
   * @param string $str
6068
   * @param string $encoding
6069
   *
6070
   * @return string each character's case swapped
6071
   */
6072
  public static function swapCase($str, $encoding = 'UTF-8')
6073 26
  {
6074
    $str = (string)$str;
6075 26
6076
    if (!isset($str[0])) {
6077 26
      return '';
6078 5
    }
6079
6080
    $encoding = self::normalizeEncoding($encoding);
6081
    $str = self::clean($str);
6082 22
6083 6
    $strSwappedCase = preg_replace_callback(
6084
        '/[\S]/u',
6085
        function ($match) use ($encoding) {
6086 16
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6087
6088
          if ($match[0] === $marchToUpper) {
6089
            return UTF8::strtolower($match[0], $encoding);
6090
          } else {
6091
            return $marchToUpper;
6092
          }
6093
        },
6094
        $str
6095
    );
6096 14
6097
    return $strSwappedCase;
6098 14
  }
6099
6100
  /**
6101
   * alias for "UTF8::to_ascii()"
6102
   *
6103
   * @see UTF8::to_ascii()
6104
   *
6105
   * @param string $s The input string e.g. a UTF-8 String
6106
   * @param string $subst_chr
6107
   *
6108
   * @return string
6109
   */
6110
  public static function toAscii($s, $subst_chr = '?')
6111
  {
6112
    return self::to_ascii($s, $subst_chr);
6113
  }
6114
6115
  /**
6116
   * alias for "UTF8::to_latin1()"
6117
   *
6118
   * @see UTF8::to_latin1()
6119
   *
6120
   * @param $str
6121 8
   *
6122
   * @return string
6123 8
   */
6124 2
  public static function toLatin1($str)
6125
  {
6126
    return self::to_latin1($str);
6127
  }
6128 7
6129 7
  /**
6130
   * alias for "UTF8::to_utf8()"
6131 7
   *
6132 1
   * @see UTF8::to_utf8()
6133 1
   *
6134 7
   * @param string $str
6135
   *
6136
   * @return string
6137 7
   */
6138
  public static function toUTF8($str)
6139 7
  {
6140
    return self::to_utf8($str);
6141
  }
6142
6143 1
  /**
6144 1
   * convert to ASCII
6145 1
   *
6146 7
   * @param string $str     The input string.
6147 7
   * @param string $unknown Character use if character unknown. (default is ?)
6148 7
   *
6149 7
   * @return string
6150 7
   */
6151
  public static function to_ascii($str, $unknown = '?')
6152 7
  {
6153
    static $UTF8_TO_ASCII;
6154
6155
    // init
6156
    $str = (string)$str;
6157
6158
    if (!isset($str[0])) {
6159
      return '';
6160
    }
6161
6162
    $str = self::clean($str);
6163
6164
    self::checkForSupport();
6165
    if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6166
      $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6167
6168
      // check again, if we only have ASCII, now ...
6169
      if (!preg_match("/[\x80-\xFF]/", $str)) {
6170
        return $str;
6171
      }
6172 1
    }
6173
6174 1
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6175
    $chars = $ar[0];
6176 1
    foreach ($chars as &$c) {
6177 1
6178
      $ordC0 = ord($c[0]);
6179
6180 1
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6181
        continue;
6182 1
      }
6183
6184 1
      $ordC1 = ord($c[1]);
6185 1
6186 1
      // ASCII - next please
6187 1
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6188
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6189 1
      }
6190 1
6191 1
      if ($ordC0 >= 224) {
6192
        $ordC2 = ord($c[2]);
6193 1
6194
        if ($ordC0 <= 239) {
6195
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6196
        }
6197
6198
        if ($ordC0 >= 240) {
6199
          $ordC3 = ord($c[3]);
6200
6201
          if ($ordC0 <= 247) {
6202
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6203
          }
6204
6205
          if ($ordC0 >= 248) {
6206
            $ordC4 = ord($c[4]);
6207
6208 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6209
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6210
            }
6211
6212
            if ($ordC0 >= 252) {
6213
              $ordC5 = ord($c[5]);
6214
6215 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6216
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6217
              }
6218
            }
6219
          }
6220
        }
6221
      }
6222
6223
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6224
        $c = $unknown;
6225
        continue;
6226
      }
6227
6228
      if (!isset($ord)) {
6229
        $c = $unknown;
6230
        continue;
6231
      }
6232
6233
      $bank = $ord >> 8;
6234
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6235
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6236
        if (file_exists($bankfile)) {
6237
          /** @noinspection PhpIncludeInspection */
6238
          require $bankfile;
6239
        } else {
6240
          $UTF8_TO_ASCII[$bank] = array();
6241
        }
6242
      }
6243
6244
      $newchar = $ord & 255;
6245
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6246
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6247
      } else {
6248
        $c = $unknown;
6249
      }
6250
    }
6251
6252
    return implode('', $chars);
6253
  }
6254
6255
  /**
6256
   * alias for "UTF8::to_win1252()"
6257
   *
6258
   * @see UTF8::to_win1252()
6259
   *
6260
   * @param   string $str
6261
   *
6262
   * @return  array|string
6263
   */
6264
  public static function to_iso8859($str)
6265
  {
6266
    return self::to_win1252($str);
6267
  }
6268
6269
  /**
6270
   * alias for "UTF8::to_win1252()"
6271
   *
6272
   * @see UTF8::to_win1252()
6273
   *
6274
   * @param string|array $str
6275
   *
6276
   * @return string|array
6277
   */
6278
  public static function to_latin1($str)
6279
  {
6280
    return self::to_win1252($str);
6281
  }
6282
6283
  /**
6284
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6285
   *
6286
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6287
   *
6288
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6289
   *
6290
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6291
   *    are followed by any of these:  ("group B")
6292
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6293
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6294
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6295
   * is also a valid unicode character, and will be left unchanged.
6296
   *
6297
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6298
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6299
   *
6300
   * @param string|array $str Any string or array.
6301
   *
6302
   * @return string The same string, but UTF8 encoded.
6303
   */
6304
  public static function to_utf8($str)
6305
  {
6306
    if (is_array($str)) {
6307
      foreach ($str as $k => $v) {
6308
        /** @noinspection AlterInForeachInspection */
6309
        $str[$k] = self::to_utf8($v);
6310
      }
6311
6312
      return $str;
6313
    }
6314
6315
    $str = (string)$str;
6316
6317
    if (!isset($str[0])) {
6318
      return $str;
6319
    }
6320
6321
    $max = strlen($str);
6322
    $buf = '';
6323
6324
    /** @noinspection ForeachInvariantsInspection */
6325
    for ($i = 0; $i < $max; $i++) {
6326
      $c1 = $str[$i];
6327
6328
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6329
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6330
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6331
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6332
6333
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6334
6335
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6336
            $buf .= $c1 . $c2;
6337
            $i++;
6338
          } else { // not valid UTF8 - convert it
6339
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6340
            $cc2 = ($c1 & "\x3f") | "\x80";
6341
            $buf .= $cc1 . $cc2;
6342
          }
6343
6344 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6345
6346
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6347
            $buf .= $c1 . $c2 . $c3;
6348
            $i += 2;
6349
          } else { // not valid UTF8 - convert it
6350
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6351
            $cc2 = ($c1 & "\x3f") | "\x80";
6352
            $buf .= $cc1 . $cc2;
6353
          }
6354
6355
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6356
6357 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6358
            $buf .= $c1 . $c2 . $c3 . $c4;
6359
            $i += 3;
6360
          } else { // not valid UTF8 - convert it
6361
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6362
            $cc2 = ($c1 & "\x3f") | "\x80";
6363
            $buf .= $cc1 . $cc2;
6364
          }
6365
6366
        } else { // doesn't look like UTF8, but should be converted
6367
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6368
          $cc2 = (($c1 & "\x3f") | "\x80");
6369
          $buf .= $cc1 . $cc2;
6370
        }
6371
6372
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6373
6374
        $ordC1 = ord($c1);
6375
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6376
          $buf .= self::$win1252ToUtf8[$ordC1];
6377
        } else {
6378
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6379
          $cc2 = (($c1 & "\x3f") | "\x80");
6380
          $buf .= $cc1 . $cc2;
6381
        }
6382
6383
      } else { // it doesn't need conversion
6384
        $buf .= $c1;
6385
      }
6386
    }
6387
6388
    self::checkForSupport();
6389
6390
    // decode unicode escape sequences
6391
    $buf = preg_replace_callback(
6392
        '/\\\\u([0-9a-f]{4})/i',
6393
        function ($match) {
6394
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6395
        },
6396
        $buf
6397
    );
6398
6399
    // decode UTF-8 codepoints
6400
    $buf = preg_replace_callback(
6401
        '/&#\d{2,4};/',
6402
        function ($match) {
6403
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6404
        },
6405
        $buf
6406
    );
6407
6408
    return $buf;
6409
  }
6410
6411
  /**
6412
   * Convert a string into "win1252"-encoding.
6413
   *
6414
   * @param  string|array $str
6415
   *
6416
   * @return string|array
6417
   */
6418
  protected static function to_win1252($str)
6419
  {
6420
    if (is_array($str)) {
6421
6422
      foreach ($str as $k => $v) {
6423
        /** @noinspection AlterInForeachInspection */
6424
        $str[$k] = self::to_win1252($v);
6425
      }
6426
6427
      return $str;
6428
    }
6429
6430
    $str = (string)$str;
6431
6432
    if (!isset($str[0])) {
6433
      return '';
6434
    }
6435
6436
    return self::utf8_decode($str);
6437
  }
6438
6439
  /**
6440 6
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6441
   *
6442 6
   * INFO: This is slower then "trim()"
6443 6
   *
6444
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6445 6
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6446
   *
6447 6
   * @param    string $str   The string to be trimmed
6448 5
   * @param    string $chars Optional characters to be stripped
6449
   *
6450
   * @return   string The trimmed string
6451
   */
6452 6
  public static function trim($str = '', $chars = INF)
6453
  {
6454 6
    $str = (string)$str;
6455
6456 6
    if (!isset($str[0])) {
6457 1
      return '';
6458 1
    }
6459 1
6460
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6461 6
    if ($chars === INF || !$chars) {
6462
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6463
    }
6464
6465
    return self::rtrim(self::ltrim($str, $chars), $chars);
6466
  }
6467
6468
  /**
6469
   * Makes string's first char uppercase.
6470
   *
6471 6
   * @param    string $str The input string
6472
   *
6473 6
   * @return   string The resulting string
6474
   */
6475 6
  public static function ucfirst($str)
6476 6
  {
6477
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6478
  }
6479 5
6480 5
  /**
6481
   * alias for "UTF8::ucfirst()"
6482 5
   *
6483 1
   * @see UTF8::ucfirst()
6484 1
   *
6485 1
   * @param string $word
6486
   *
6487 5
   * @return string
6488
   */
6489
  public static function ucword($word)
6490
  {
6491
    return self::ucfirst($word);
6492
  }
6493
6494
  /**
6495
   * Uppercase for all words in the string.
6496
   *
6497
   * @param  string $str
6498
   * @param array   $exceptions
6499
   *
6500
   * @return string
6501
   */
6502
  public static function ucwords($str, $exceptions = array())
6503
  {
6504
    if (!$str) {
6505
      return '';
6506
    }
6507
6508
    // init
6509
    $words = explode(' ', $str);
6510
    $newwords = array();
6511
6512
    if (count($exceptions) > 0) {
6513
      $useExceptions = true;
6514
    } else {
6515
      $useExceptions = false;
6516
    }
6517
6518
    foreach ($words as $word) {
6519 1
      if (
6520
          ($useExceptions === false)
6521 1
          ||
6522
          (
6523
              $useExceptions === true
6524
              &&
6525
              !in_array($word, $exceptions, true)
6526
          )
6527
      ) {
6528
        $word = self::ucfirst($word);
6529
      }
6530
      $newwords[] = $word;
6531
    }
6532
6533 1
    return self::ucfirst(implode(' ', $newwords));
6534
  }
6535 1
6536
  /**
6537
   * Multi decode html entity & fix urlencoded-win1252-chars.
6538
   *
6539 1
   * e.g:
6540
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6541 1
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6542
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6543
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6544 1
   * 'Düsseldorf'                   => 'Düsseldorf'
6545 1
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6546 1
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6547 1
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6548 1
   *
6549
   * @param string $str
6550
   *
6551 1
   * @return string
6552
   */
6553
  public static function urldecode($str)
6554
  {
6555
    $str = (string)$str;
6556
6557
    if (!isset($str[0])) {
6558
      return '';
6559
    }
6560
6561
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6562
6563
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6564 4
6565
    $str = self::fix_simple_utf8(
6566 4
        rawurldecode(
6567
            self::html_entity_decode(
6568
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6569
                $flags
6570 4
            )
6571 4
        )
6572 4
    );
6573
6574 4
    return (string)$str;
6575 4
  }
6576 4
6577 4
  /**
6578
   * Return a array with "urlencoded"-win1252 -> UTF-8
6579 4
   *
6580
   * @return mixed
6581
   */
6582
  public static function urldecode_fix_win1252_chars()
6583
  {
6584 4
    static $array = array(
6585
        '%20' => ' ',
6586 4
        '%21' => '!',
6587
        '%22' => '"',
6588
        '%23' => '#',
6589
        '%24' => '$',
6590
        '%25' => '%',
6591 4
        '%26' => '&',
6592 4
        '%27' => "'",
6593
        '%28' => '(',
6594 4
        '%29' => ')',
6595 4
        '%2A' => '*',
6596 4
        '%2B' => '+',
6597 4
        '%2C' => ',',
6598 4
        '%2D' => '-',
6599
        '%2E' => '.',
6600 4
        '%2F' => '/',
6601 4
        '%30' => '0',
6602 4
        '%31' => '1',
6603 4
        '%32' => '2',
6604
        '%33' => '3',
6605 4
        '%34' => '4',
6606 3
        '%35' => '5',
6607 3
        '%36' => '6',
6608 3
        '%37' => '7',
6609 3
        '%38' => '8',
6610
        '%39' => '9',
6611 3
        '%3A' => ':',
6612
        '%3B' => ';',
6613
        '%3C' => '<',
6614
        '%3D' => '=',
6615 3
        '%3E' => '>',
6616 3
        '%3F' => '?',
6617
        '%40' => '@',
6618 4
        '%41' => 'A',
6619
        '%42' => 'B',
6620
        '%43' => 'C',
6621
        '%44' => 'D',
6622
        '%45' => 'E',
6623
        '%46' => 'F',
6624
        '%47' => 'G',
6625
        '%48' => 'H',
6626
        '%49' => 'I',
6627
        '%4A' => 'J',
6628
        '%4B' => 'K',
6629
        '%4C' => 'L',
6630
        '%4D' => 'M',
6631
        '%4E' => 'N',
6632
        '%4F' => 'O',
6633
        '%50' => 'P',
6634
        '%51' => 'Q',
6635
        '%52' => 'R',
6636
        '%53' => 'S',
6637
        '%54' => 'T',
6638
        '%55' => 'U',
6639
        '%56' => 'V',
6640
        '%57' => 'W',
6641
        '%58' => 'X',
6642
        '%59' => 'Y',
6643
        '%5A' => 'Z',
6644
        '%5B' => '[',
6645
        '%5C' => '\\',
6646
        '%5D' => ']',
6647
        '%5E' => '^',
6648
        '%5F' => '_',
6649
        '%60' => '`',
6650
        '%61' => 'a',
6651
        '%62' => 'b',
6652
        '%63' => 'c',
6653
        '%64' => 'd',
6654
        '%65' => 'e',
6655
        '%66' => 'f',
6656
        '%67' => 'g',
6657
        '%68' => 'h',
6658
        '%69' => 'i',
6659
        '%6A' => 'j',
6660
        '%6B' => 'k',
6661
        '%6C' => 'l',
6662
        '%6D' => 'm',
6663
        '%6E' => 'n',
6664
        '%6F' => 'o',
6665
        '%70' => 'p',
6666
        '%71' => 'q',
6667
        '%72' => 'r',
6668
        '%73' => 's',
6669
        '%74' => 't',
6670
        '%75' => 'u',
6671
        '%76' => 'v',
6672
        '%77' => 'w',
6673
        '%78' => 'x',
6674
        '%79' => 'y',
6675
        '%7A' => 'z',
6676
        '%7B' => '{',
6677
        '%7C' => '|',
6678
        '%7D' => '}',
6679
        '%7E' => '~',
6680
        '%7F' => '',
6681
        '%80' => '`',
6682
        '%81' => '',
6683
        '%82' => '‚',
6684
        '%83' => 'ƒ',
6685
        '%84' => '„',
6686
        '%85' => '…',
6687
        '%86' => '†',
6688
        '%87' => '‡',
6689
        '%88' => 'ˆ',
6690
        '%89' => '‰',
6691
        '%8A' => 'Š',
6692
        '%8B' => '‹',
6693
        '%8C' => 'Œ',
6694
        '%8D' => '',
6695
        '%8E' => 'Ž',
6696
        '%8F' => '',
6697
        '%90' => '',
6698
        '%91' => '‘',
6699
        '%92' => '’',
6700
        '%93' => '“',
6701
        '%94' => '”',
6702
        '%95' => '•',
6703
        '%96' => '–',
6704
        '%97' => '—',
6705
        '%98' => '˜',
6706
        '%99' => '™',
6707
        '%9A' => 'š',
6708
        '%9B' => '›',
6709
        '%9C' => 'œ',
6710
        '%9D' => '',
6711
        '%9E' => 'ž',
6712
        '%9F' => 'Ÿ',
6713
        '%A0' => '',
6714
        '%A1' => '¡',
6715
        '%A2' => '¢',
6716
        '%A3' => '£',
6717
        '%A4' => '¤',
6718
        '%A5' => '¥',
6719
        '%A6' => '¦',
6720
        '%A7' => '§',
6721
        '%A8' => '¨',
6722
        '%A9' => '©',
6723
        '%AA' => 'ª',
6724
        '%AB' => '«',
6725
        '%AC' => '¬',
6726
        '%AD' => '',
6727
        '%AE' => '®',
6728
        '%AF' => '¯',
6729
        '%B0' => '°',
6730
        '%B1' => '±',
6731
        '%B2' => '²',
6732
        '%B3' => '³',
6733
        '%B4' => '´',
6734
        '%B5' => 'µ',
6735
        '%B6' => '¶',
6736
        '%B7' => '·',
6737
        '%B8' => '¸',
6738
        '%B9' => '¹',
6739
        '%BA' => 'º',
6740
        '%BB' => '»',
6741
        '%BC' => '¼',
6742
        '%BD' => '½',
6743
        '%BE' => '¾',
6744
        '%BF' => '¿',
6745
        '%C0' => 'À',
6746
        '%C1' => 'Á',
6747
        '%C2' => 'Â',
6748
        '%C3' => 'Ã',
6749
        '%C4' => 'Ä',
6750
        '%C5' => 'Å',
6751
        '%C6' => 'Æ',
6752
        '%C7' => 'Ç',
6753
        '%C8' => 'È',
6754
        '%C9' => 'É',
6755
        '%CA' => 'Ê',
6756
        '%CB' => 'Ë',
6757
        '%CC' => 'Ì',
6758
        '%CD' => 'Í',
6759
        '%CE' => 'Î',
6760
        '%CF' => 'Ï',
6761
        '%D0' => 'Ð',
6762
        '%D1' => 'Ñ',
6763
        '%D2' => 'Ò',
6764
        '%D3' => 'Ó',
6765
        '%D4' => 'Ô',
6766
        '%D5' => 'Õ',
6767
        '%D6' => 'Ö',
6768
        '%D7' => '×',
6769
        '%D8' => 'Ø',
6770
        '%D9' => 'Ù',
6771
        '%DA' => 'Ú',
6772
        '%DB' => 'Û',
6773
        '%DC' => 'Ü',
6774
        '%DD' => 'Ý',
6775
        '%DE' => 'Þ',
6776
        '%DF' => 'ß',
6777
        '%E0' => 'à',
6778
        '%E1' => 'á',
6779
        '%E2' => 'â',
6780
        '%E3' => 'ã',
6781
        '%E4' => 'ä',
6782
        '%E5' => 'å',
6783
        '%E6' => 'æ',
6784
        '%E7' => 'ç',
6785
        '%E8' => 'è',
6786
        '%E9' => 'é',
6787
        '%EA' => 'ê',
6788
        '%EB' => 'ë',
6789
        '%EC' => 'ì',
6790
        '%ED' => 'í',
6791
        '%EE' => 'î',
6792
        '%EF' => 'ï',
6793
        '%F0' => 'ð',
6794
        '%F1' => 'ñ',
6795
        '%F2' => 'ò',
6796
        '%F3' => 'ó',
6797
        '%F4' => 'ô',
6798
        '%F5' => 'õ',
6799
        '%F6' => 'ö',
6800
        '%F7' => '÷',
6801
        '%F8' => 'ø',
6802
        '%F9' => 'ù',
6803
        '%FA' => 'ú',
6804
        '%FB' => 'û',
6805
        '%FC' => 'ü',
6806
        '%FD' => 'ý',
6807
        '%FE' => 'þ',
6808
        '%FF' => 'ÿ',
6809
    );
6810
6811
    return $array;
6812
  }
6813
6814
  /**
6815
   * Decodes an UTF-8 string to ISO-8859-1.
6816
   *
6817
   * @param string $str
6818
   *
6819
   * @return string
6820
   */
6821
  public static function utf8_decode($str)
6822
  {
6823
    static $utf8ToWin1252Keys = null;
6824
    static $utf8ToWin1252Values = null;
6825
6826
    $str = (string)$str;
6827
6828
    if (!isset($str[0])) {
6829
      return '';
6830
    }
6831
6832
    // init
6833
    self::checkForSupport();
6834
6835
    $str = self::to_utf8($str);
6836
6837
    if ($utf8ToWin1252Keys === null) {
6838
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6839
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6840
    }
6841
6842
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6843
  }
6844
6845
  /**
6846
   * Encodes an ISO-8859-1 string to UTF-8.
6847
   *
6848
   * @param string $str
6849
   *
6850
   * @return string
6851
   */
6852
  public static function utf8_encode($str)
6853
  {
6854
    $str = \utf8_encode($str);
6855
6856
    if (false === strpos($str, "\xC2")) {
6857
      return $str;
6858
    } else {
6859
6860
      static $cp1252ToUtf8Keys = null;
6861
      static $cp1252ToUtf8Values = null;
6862
6863
      if ($cp1252ToUtf8Keys === null) {
6864
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6865
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6866
      }
6867
6868
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6869
    }
6870
  }
6871
6872
  /**
6873
   * fix -> utf8-win1252 chars
6874
   *
6875
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6876
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6877
   * See: http://en.wikipedia.org/wiki/Windows-1252
6878
   *
6879
   * @deprecated use "UTF8::fix_simple_utf8()"
6880
   *
6881
   * @param   string $str
6882
   *
6883
   * @return  string
6884
   */
6885
  public static function utf8_fix_win1252_chars($str)
6886
  {
6887
    return self::fix_simple_utf8($str);
6888
  }
6889
6890
  /**
6891
   * Returns an array with all utf8 whitespace characters.
6892
   *
6893
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6894
   *
6895
   * @author: Derek E. [email protected]
6896
   *
6897
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6898
   *         as defined in above URL
6899
   */
6900
  public static function whitespace_table()
6901
  {
6902
    return self::$whitespaceTable;
6903
  }
6904
6905
  /**
6906
   * Limit the number of words in a string.
6907
   *
6908
   * @param  string $str
6909
   * @param  int    $words
6910
   * @param  string $strAddOn
6911
   *
6912
   * @return string
6913
   */
6914
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6915
  {
6916
    $str = (string)$str;
6917
6918
    if (!isset($str[0])) {
6919
      return '';
6920
    }
6921
6922
    $words = (int)$words;
6923
6924
    if ($words < 1) {
6925
      return '';
6926
    }
6927
6928
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6929
6930
    if (
6931
        !isset($matches[0])
6932
        ||
6933
        self::strlen($str) === self::strlen($matches[0])
6934
    ) {
6935
      return $str;
6936
    }
6937
6938
    return self::rtrim($matches[0]) . $strAddOn;
6939
  }
6940
6941
  /**
6942
   * Wraps a string to a given number of characters
6943
   *
6944
   * @link  http://php.net/manual/en/function.wordwrap.php
6945
   *
6946
   * @param string $str   <p>
6947
   *                      The input string.
6948
   *                      </p>
6949
   * @param int    $width [optional] <p>
6950
   *                      The column width.
6951
   *                      </p>
6952
   * @param string $break [optional] <p>
6953
   *                      The line is broken using the optional
6954
   *                      break parameter.
6955
   *                      </p>
6956
   * @param bool   $cut   [optional] <p>
6957
   *                      If the cut is set to true, the string is
6958
   *                      always wrapped at or before the specified width. So if you have
6959
   *                      a word that is larger than the given width, it is broken apart.
6960
   *                      (See second example).
6961
   *                      </p>
6962
   *
6963
   * @return string the given string wrapped at the specified column.
6964
   * @since 4.0.2
6965
   * @since 5.0
6966
   */
6967
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6968
  {
6969
    $str = (string)$str;
6970
    $break = (string)$break;
6971
6972
    if (!isset($str[0], $break[0])) {
6973
      return '';
6974
    }
6975
6976
    $w = '';
6977
    $strSplit = explode($break, $str);
6978
    $count = count($strSplit);
6979
6980
    if (1 === $count && '' === $strSplit[0]) {
6981
      return '';
6982
    }
6983
6984
    $chars = array();
6985
    /** @noinspection ForeachInvariantsInspection */
6986
    for ($i = 0; $i < $count; ++$i) {
6987
6988
      if ($i) {
6989
        $chars[] = $break;
6990
        $w .= '#';
6991
      }
6992
6993
      $c = $strSplit[$i];
6994
      unset($strSplit[$i]);
6995
6996
      foreach (self::split($c) as $c) {
6997
        $chars[] = $c;
6998
        $w .= ' ' === $c ? ' ' : '?';
6999
      }
7000
    }
7001
7002
    $strReturn = '';
7003
    $j = 0;
7004
    $b = $i = -1;
7005
    $w = wordwrap($w, $width, '#', $cut);
7006
7007
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7008
      for (++$i; $i < $b; ++$i) {
7009
        $strReturn .= $chars[$j];
7010
        unset($chars[$j++]);
7011
      }
7012
7013
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7014
        unset($chars[$j++]);
7015
      }
7016
7017
      $strReturn .= $break;
7018
    }
7019
7020
    return $strReturn . implode('', $chars);
7021
  }
7022
7023
  /**
7024
   * Returns an array of Unicode White Space characters.
7025
   *
7026
   * @return   array An array with numeric code point as key and White Space Character as value.
7027
   */
7028
  public static function ws()
7029
  {
7030
    return self::$whitespace;
7031
  }
7032
7033
}
7034