Completed
Push — master ( 4e95be...c9c63e )
by Lars
03:29
created

UTF8::case_table()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 1001
Code Lines 994

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 1001
ccs 0
cts 3
cp 0
rs 8.2857
cc 1
eloc 994
nc 1
nop 0
crap 2

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Intl\Normalizer\Normalizer;
7
use Symfony\Polyfill\Xml\Xml;
8
9
/**
10
 * UTF8-Helper-Class
11
 *
12
 * @package voku\helper
13
 */
14
class UTF8
15
{
16
  /**
17
   * @var array
18
   */
19
  protected static $win1252ToUtf8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  protected static $cp1252ToUtf8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Numeric code point => UTF-8 Character
84
   *
85
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
86
   *
87
   * @var array
88
   */
89
  protected static $whitespace = array(
90
    // NUL Byte
91
    0     => "\x0",
92
    // Tab
93
    9     => "\x9",
94
    // New Line
95
    10    => "\xa",
96
    // Vertical Tab
97
    11    => "\xb",
98
    // Carriage Return
99
    13    => "\xd",
100
    // Ordinary Space
101
    32    => "\x20",
102
    // NO-BREAK SPACE
103
    160   => "\xc2\xa0",
104
    // OGHAM SPACE MARK
105
    5760  => "\xe1\x9a\x80",
106
    // MONGOLIAN VOWEL SEPARATOR
107
    6158  => "\xe1\xa0\x8e",
108
    // EN QUAD
109
    8192  => "\xe2\x80\x80",
110
    // EM QUAD
111
    8193  => "\xe2\x80\x81",
112
    // EN SPACE
113
    8194  => "\xe2\x80\x82",
114
    // EM SPACE
115
    8195  => "\xe2\x80\x83",
116
    // THREE-PER-EM SPACE
117
    8196  => "\xe2\x80\x84",
118
    // FOUR-PER-EM SPACE
119
    8197  => "\xe2\x80\x85",
120
    // SIX-PER-EM SPACE
121
    8198  => "\xe2\x80\x86",
122
    // FIGURE SPACE
123
    8199  => "\xe2\x80\x87",
124
    // PUNCTUATION SPACE
125
    8200  => "\xe2\x80\x88",
126
    // THIN SPACE
127
    8201  => "\xe2\x80\x89",
128
    //HAIR SPACE
129
    8202  => "\xe2\x80\x8a",
130
    // LINE SEPARATOR
131
    8232  => "\xe2\x80\xa8",
132
    // PARAGRAPH SEPARATOR
133
    8233  => "\xe2\x80\xa9",
134
    // NARROW NO-BREAK SPACE
135
    8239  => "\xe2\x80\xaf",
136
    // MEDIUM MATHEMATICAL SPACE
137
    8287  => "\xe2\x81\x9f",
138
    // IDEOGRAPHIC SPACE
139
    12288 => "\xe3\x80\x80",
140
  );
141
142
  /**
143
   * @var array
144
   */
145
  protected static $whitespaceTable = array(
146
      'SPACE'                     => "\x20",
147
      'NO-BREAK SPACE'            => "\xc2\xa0",
148
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
149
      'EN QUAD'                   => "\xe2\x80\x80",
150
      'EM QUAD'                   => "\xe2\x80\x81",
151
      'EN SPACE'                  => "\xe2\x80\x82",
152
      'EM SPACE'                  => "\xe2\x80\x83",
153
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
154
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
155
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
156
      'FIGURE SPACE'              => "\xe2\x80\x87",
157
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
158
      'THIN SPACE'                => "\xe2\x80\x89",
159
      'HAIR SPACE'                => "\xe2\x80\x8a",
160
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
161
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
162
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
163
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
164
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
165
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
166
  );
167
168
  /**
169
   * bidirectional text chars
170
   *
171
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
172
   *
173
   * @var array
174
   */
175
  protected static $bidiUniCodeControlsTable = array(
176
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
177
    8234 => "\xE2\x80\xAA",
178
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
179
    8235 => "\xE2\x80\xAB",
180
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
181
    8236 => "\xE2\x80\xAC",
182
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
183
    8237 => "\xE2\x80\xAD",
184
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
185
    8238 => "\xE2\x80\xAE",
186
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
187
    8294 => "\xE2\x81\xA6",
188
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
189
    8295 => "\xE2\x81\xA7",
190
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
191
    8296 => "\xE2\x81\xA8",
192
    // POP DIRECTIONAL ISOLATE
193
    8297 => "\xE2\x81\xA9",
194
  );
195
196
  /**
197
   * @var array
198
   */
199
  protected static $commonCaseFold = array(
200
      'ſ'            => 's',
201
      "\xCD\x85"     => 'ι',
202
      'ς'            => 'σ',
203
      "\xCF\x90"     => 'β',
204
      "\xCF\x91"     => 'θ',
205
      "\xCF\x95"     => 'φ',
206
      "\xCF\x96"     => 'π',
207
      "\xCF\xB0"     => 'κ',
208
      "\xCF\xB1"     => 'ρ',
209
      "\xCF\xB5"     => 'ε',
210
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
211
      "\xE1\xBE\xBE" => 'ι',
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  protected static $brokenUtf8ToUtf8 = array(
218
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
219
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
220
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
221
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
222
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
223
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
224
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
225
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
226
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
227
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
228
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
229
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
230
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
231
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
232
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
233
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
234
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
235
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
236
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
237
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
238
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
239
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
240
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
241
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
242
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
243
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
244
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
245
      'ü'       => 'ü',
246
      'ä'       => 'ä',
247
      'ö'       => 'ö',
248
      'Ö'       => 'Ö',
249
      'ß'       => 'ß',
250
      'Ã '       => 'à',
251
      'á'       => 'á',
252
      'â'       => 'â',
253
      'ã'       => 'ã',
254
      'ù'       => 'ù',
255
      'ú'       => 'ú',
256
      'û'       => 'û',
257
      'Ù'       => 'Ù',
258
      'Ú'       => 'Ú',
259
      'Û'       => 'Û',
260
      'Ü'       => 'Ü',
261
      'ò'       => 'ò',
262
      'ó'       => 'ó',
263
      'ô'       => 'ô',
264
      'è'       => 'è',
265
      'é'       => 'é',
266
      'ê'       => 'ê',
267
      'ë'       => 'ë',
268
      'À'       => 'À',
269
      'Á'       => 'Á',
270
      'Â'       => 'Â',
271
      'Ã'       => 'Ã',
272
      'Ä'       => 'Ä',
273
      'Ã…'       => 'Å',
274
      'Ç'       => 'Ç',
275
      'È'       => 'È',
276
      'É'       => 'É',
277
      'Ê'       => 'Ê',
278
      'Ë'       => 'Ë',
279
      'ÃŒ'       => 'Ì',
280
      'Í'       => 'Í',
281
      'ÃŽ'       => 'Î',
282
      'Ï'       => 'Ï',
283
      'Ñ'       => 'Ñ',
284
      'Ã’'       => 'Ò',
285
      'Ó'       => 'Ó',
286
      'Ô'       => 'Ô',
287
      'Õ'       => 'Õ',
288
      'Ø'       => 'Ø',
289
      'Ã¥'       => 'å',
290
      'æ'       => 'æ',
291
      'ç'       => 'ç',
292
      'ì'       => 'ì',
293
      'í'       => 'í',
294
      'î'       => 'î',
295
      'ï'       => 'ï',
296
      'ð'       => 'ð',
297
      'ñ'       => 'ñ',
298
      'õ'       => 'õ',
299
      'ø'       => 'ø',
300
      'ý'       => 'ý',
301
      'ÿ'       => 'ÿ',
302
      '€'      => '€',
303
  );
304
305
  /**
306
   * @var array
307
   */
308
  protected static $utf8ToWin1252 = array(
309
      "\xe2\x82\xac" => "\x80", // EURO SIGN
310
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
311
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
312
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
313
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
314
      "\xe2\x80\xa0" => "\x86", // DAGGER
315
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
316
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
317
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
318
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
319
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
320
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
321
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
322
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
323
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
324
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
325
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
326
      "\xe2\x80\xa2" => "\x95", // BULLET
327
      "\xe2\x80\x93" => "\x96", // EN DASH
328
      "\xe2\x80\x94" => "\x97", // EM DASH
329
      "\xcb\x9c"     => "\x98", // SMALL TILDE
330
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
331
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
332
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
333
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
334
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
335
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
336
  );
337
338
  /**
339
   * @var array
340
   */
341
  protected static $utf8MSWord = array(
342
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
343
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
344
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
345
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
346
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
347
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
348
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
349
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
350
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
351
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
352
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
353
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
354
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
355
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
356
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
357
  );
358
359
  protected static $iconvEncoding = array(
360
      'ANSI_X3.4-1968',
361
      'ANSI_X3.4-1986',
362
      'ASCII',
363
      'CP367',
364
      'IBM367',
365
      'ISO-IR-6',
366
      'ISO646-US',
367
      'ISO_646.IRV:1991',
368
      'US',
369
      'US-ASCII',
370
      'CSASCII',
371
      'UTF-8',
372
      'ISO-10646-UCS-2',
373
      'UCS-2',
374
      'CSUNICODE',
375
      'UCS-2BE',
376
      'UNICODE-1-1',
377
      'UNICODEBIG',
378
      'CSUNICODE11',
379
      'UCS-2LE',
380
      'UNICODELITTLE',
381
      'ISO-10646-UCS-4',
382
      'UCS-4',
383
      'CSUCS4',
384
      'UCS-4BE',
385
      'UCS-4LE',
386
      'UTF-16',
387
      'UTF-16BE',
388
      'UTF-16LE',
389
      'UTF-32',
390
      'UTF-32BE',
391
      'UTF-32LE',
392
      'UNICODE-1-1-UTF-7',
393
      'UTF-7',
394
      'CSUNICODE11UTF7',
395
      'UCS-2-INTERNAL',
396
      'UCS-2-SWAPPED',
397
      'UCS-4-INTERNAL',
398
      'UCS-4-SWAPPED',
399
      'C99',
400
      'JAVA',
401
      'CP819',
402
      'IBM819',
403
      'ISO-8859-1',
404
      'ISO-IR-100',
405
      'ISO8859-1',
406
      'ISO_8859-1',
407
      'ISO_8859-1:1987',
408
      'L1',
409
      'LATIN1',
410
      'CSISOLATIN1',
411
      'ISO-8859-2',
412
      'ISO-IR-101',
413
      'ISO8859-2',
414
      'ISO_8859-2',
415
      'ISO_8859-2:1987',
416
      'L2',
417
      'LATIN2',
418
      'CSISOLATIN2',
419
      'ISO-8859-3',
420
      'ISO-IR-109',
421
      'ISO8859-3',
422
      'ISO_8859-3',
423
      'ISO_8859-3:1988',
424
      'L3',
425
      'LATIN3',
426
      'CSISOLATIN3',
427
      'ISO-8859-4',
428
      'ISO-IR-110',
429
      'ISO8859-4',
430
      'ISO_8859-4',
431
      'ISO_8859-4:1988',
432
      'L4',
433
      'LATIN4',
434
      'CSISOLATIN4',
435
      'CYRILLIC',
436
      'ISO-8859-5',
437
      'ISO-IR-144',
438
      'ISO8859-5',
439
      'ISO_8859-5',
440
      'ISO_8859-5:1988',
441
      'CSISOLATINCYRILLIC',
442
      'ARABIC',
443
      'ASMO-708',
444
      'ECMA-114',
445
      'ISO-8859-6',
446
      'ISO-IR-127',
447
      'ISO8859-6',
448
      'ISO_8859-6',
449
      'ISO_8859-6:1987',
450
      'CSISOLATINARABIC',
451
      'ECMA-118',
452
      'ELOT_928',
453
      'GREEK',
454
      'GREEK8',
455
      'ISO-8859-7',
456
      'ISO-IR-126',
457
      'ISO8859-7',
458
      'ISO_8859-7',
459
      'ISO_8859-7:1987',
460
      'ISO_8859-7:2003',
461
      'CSISOLATINGREEK',
462
      'HEBREW',
463
      'ISO-8859-8',
464
      'ISO-IR-138',
465
      'ISO8859-8',
466
      'ISO_8859-8',
467
      'ISO_8859-8:1988',
468
      'CSISOLATINHEBREW',
469
      'ISO-8859-9',
470
      'ISO-IR-148',
471
      'ISO8859-9',
472
      'ISO_8859-9',
473
      'ISO_8859-9:1989',
474
      'L5',
475
      'LATIN5',
476
      'CSISOLATIN5',
477
      'ISO-8859-10',
478
      'ISO-IR-157',
479
      'ISO8859-10',
480
      'ISO_8859-10',
481
      'ISO_8859-10:1992',
482
      'L6',
483
      'LATIN6',
484
      'CSISOLATIN6',
485
      'ISO-8859-11',
486
      'ISO8859-11',
487
      'ISO_8859-11',
488
      'ISO-8859-13',
489
      'ISO-IR-179',
490
      'ISO8859-13',
491
      'ISO_8859-13',
492
      'L7',
493
      'LATIN7',
494
      'ISO-8859-14',
495
      'ISO-CELTIC',
496
      'ISO-IR-199',
497
      'ISO8859-14',
498
      'ISO_8859-14',
499
      'ISO_8859-14:1998',
500
      'L8',
501
      'LATIN8',
502
      'ISO-8859-15',
503
      'ISO-IR-203',
504
      'ISO8859-15',
505
      'ISO_8859-15',
506
      'ISO_8859-15:1998',
507
      'LATIN-9',
508
      'ISO-8859-16',
509
      'ISO-IR-226',
510
      'ISO8859-16',
511
      'ISO_8859-16',
512
      'ISO_8859-16:2001',
513
      'L10',
514
      'LATIN10',
515
      'KOI8-R',
516
      'CSKOI8R',
517
      'KOI8-U',
518
      'KOI8-RU',
519
      'CP1250',
520
      'MS-EE',
521
      'WINDOWS-1250',
522
      'CP1251',
523
      'MS-CYRL',
524
      'WINDOWS-1251',
525
      'CP1252',
526
      'MS-ANSI',
527
      'WINDOWS-1252',
528
      'CP1253',
529
      'MS-GREEK',
530
      'WINDOWS-1253',
531
      'CP1254',
532
      'MS-TURK',
533
      'WINDOWS-1254',
534
      'CP1255',
535
      'MS-HEBR',
536
      'WINDOWS-1255',
537
      'CP1256',
538
      'MS-ARAB',
539
      'WINDOWS-1256',
540
      'CP1257',
541
      'WINBALTRIM',
542
      'WINDOWS-1257',
543
      'CP1258',
544
      'WINDOWS-1258',
545
      '850',
546
      'CP850',
547
      'IBM850',
548
      'CSPC850MULTILINGUAL',
549
      '862',
550
      'CP862',
551
      'IBM862',
552
      'CSPC862LATINHEBREW',
553
      '866',
554
      'CP866',
555
      'IBM866',
556
      'CSIBM866',
557
      'MAC',
558
      'MACINTOSH',
559
      'MACROMAN',
560
      'CSMACINTOSH',
561
      'MACCENTRALEUROPE',
562
      'MACICELAND',
563
      'MACCROATIAN',
564
      'MACROMANIA',
565
      'MACCYRILLIC',
566
      'MACUKRAINE',
567
      'MACGREEK',
568
      'MACTURKISH',
569
      'MACHEBREW',
570
      'MACARABIC',
571
      'MACTHAI',
572
      'HP-ROMAN8',
573
      'R8',
574
      'ROMAN8',
575
      'CSHPROMAN8',
576
      'NEXTSTEP',
577
      'ARMSCII-8',
578
      'GEORGIAN-ACADEMY',
579
      'GEORGIAN-PS',
580
      'KOI8-T',
581
      'CP154',
582
      'CYRILLIC-ASIAN',
583
      'PT154',
584
      'PTCP154',
585
      'CSPTCP154',
586
      'KZ-1048',
587
      'RK1048',
588
      'STRK1048-2002',
589
      'CSKZ1048',
590
      'MULELAO-1',
591
      'CP1133',
592
      'IBM-CP1133',
593
      'ISO-IR-166',
594
      'TIS-620',
595
      'TIS620',
596
      'TIS620-0',
597
      'TIS620.2529-1',
598
      'TIS620.2533-0',
599
      'TIS620.2533-1',
600
      'CP874',
601
      'WINDOWS-874',
602
      'VISCII',
603
      'VISCII1.1-1',
604
      'CSVISCII',
605
      'TCVN',
606
      'TCVN-5712',
607
      'TCVN5712-1',
608
      'TCVN5712-1:1993',
609
      'ISO-IR-14',
610
      'ISO646-JP',
611
      'JIS_C6220-1969-RO',
612
      'JP',
613
      'CSISO14JISC6220RO',
614
      'JISX0201-1976',
615
      'JIS_X0201',
616
      'X0201',
617
      'CSHALFWIDTHKATAKANA',
618
      'ISO-IR-87',
619
      'JIS0208',
620
      'JIS_C6226-1983',
621
      'JIS_X0208',
622
      'JIS_X0208-1983',
623
      'JIS_X0208-1990',
624
      'X0208',
625
      'CSISO87JISX0208',
626
      'ISO-IR-159',
627
      'JIS_X0212',
628
      'JIS_X0212-1990',
629
      'JIS_X0212.1990-0',
630
      'X0212',
631
      'CSISO159JISX02121990',
632
      'CN',
633
      'GB_1988-80',
634
      'ISO-IR-57',
635
      'ISO646-CN',
636
      'CSISO57GB1988',
637
      'CHINESE',
638
      'GB_2312-80',
639
      'ISO-IR-58',
640
      'CSISO58GB231280',
641
      'CN-GB-ISOIR165',
642
      'ISO-IR-165',
643
      'ISO-IR-149',
644
      'KOREAN',
645
      'KSC_5601',
646
      'KS_C_5601-1987',
647
      'KS_C_5601-1989',
648
      'CSKSC56011987',
649
      'EUC-JP',
650
      'EUCJP',
651
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
652
      'CSEUCPKDFMTJAPANESE',
653
      'MS_KANJI',
654
      'SHIFT-JIS',
655
      'SHIFT_JIS',
656
      'SJIS',
657
      'CSSHIFTJIS',
658
      'CP932',
659
      'ISO-2022-JP',
660
      'CSISO2022JP',
661
      'ISO-2022-JP-1',
662
      'ISO-2022-JP-2',
663
      'CSISO2022JP2',
664
      'CN-GB',
665
      'EUC-CN',
666
      'EUCCN',
667
      'GB2312',
668
      'CSGB2312',
669
      'GBK',
670
      'CP936',
671
      'MS936',
672
      'WINDOWS-936',
673
      'GB18030',
674
      'ISO-2022-CN',
675
      'CSISO2022CN',
676
      'ISO-2022-CN-EXT',
677
      'HZ',
678
      'HZ-GB-2312',
679
      'EUC-TW',
680
      'EUCTW',
681
      'CSEUCTW',
682
      'BIG-5',
683
      'BIG-FIVE',
684
      'BIG5',
685
      'BIGFIVE',
686
      'CN-BIG5',
687
      'CSBIG5',
688
      'CP950',
689
      'BIG5-HKSCS:1999',
690
      'BIG5-HKSCS:2001',
691
      'BIG5-HKSCS',
692
      'BIG5-HKSCS:2004',
693
      'BIG5HKSCS',
694
      'EUC-KR',
695
      'EUCKR',
696
      'CSEUCKR',
697
      'CP949',
698
      'UHC',
699
      'CP1361',
700
      'JOHAB',
701
      'ISO-2022-KR',
702
      'CSISO2022KR',
703
      'CP856',
704
      'CP922',
705
      'CP943',
706
      'CP1046',
707
      'CP1124',
708
      'CP1129',
709
      'CP1161',
710
      'IBM-1161',
711
      'IBM1161',
712
      'CSIBM1161',
713
      'CP1162',
714
      'IBM-1162',
715
      'IBM1162',
716
      'CSIBM1162',
717
      'CP1163',
718
      'IBM-1163',
719
      'IBM1163',
720
      'CSIBM1163',
721
      'DEC-KANJI',
722
      'DEC-HANYU',
723
      '437',
724
      'CP437',
725
      'IBM437',
726
      'CSPC8CODEPAGE437',
727
      'CP737',
728
      'CP775',
729
      'IBM775',
730
      'CSPC775BALTIC',
731
      '852',
732
      'CP852',
733
      'IBM852',
734
      'CSPCP852',
735
      'CP853',
736
      '855',
737
      'CP855',
738
      'IBM855',
739
      'CSIBM855',
740
      '857',
741
      'CP857',
742
      'IBM857',
743
      'CSIBM857',
744
      'CP858',
745
      '860',
746
      'CP860',
747
      'IBM860',
748
      'CSIBM860',
749
      '861',
750
      'CP-IS',
751
      'CP861',
752
      'IBM861',
753
      'CSIBM861',
754
      '863',
755
      'CP863',
756
      'IBM863',
757
      'CSIBM863',
758
      'CP864',
759
      'IBM864',
760
      'CSIBM864',
761
      '865',
762
      'CP865',
763
      'IBM865',
764
      'CSIBM865',
765
      '869',
766
      'CP-GR',
767
      'CP869',
768
      'IBM869',
769
      'CSIBM869',
770
      'CP1125',
771
      'EUC-JISX0213',
772
      'SHIFT_JISX0213',
773
      'ISO-2022-JP-3',
774
      'BIG5-2003',
775
      'ISO-IR-230',
776
      'TDS565',
777
      'ATARI',
778
      'ATARIST',
779
      'RISCOS-LATIN1',
780
  );
781
782
  /**
783
   * @var array
784
   */
785
  private static $support = array();
786
787
  /**
788
   * __construct()
789
   */
790 1
  public function __construct()
791
  {
792 1
    self::checkForSupport();
793 1
  }
794
795
  /**
796
   * Returns a single UTF-8 character from string.
797
   *
798
   * @param    string $str A UTF-8 string.
799
   * @param    int    $pos The position of character to return.
800
   *
801
   * @return   string Single Multi-Byte character.
802
   */
803 1
  public static function access($str, $pos)
804
  {
805
    // Return the character at the specified position: $str[1] like functionality.
806
807 1
    return self::substr($str, $pos, 1);
808
  }
809
810
  /**
811
   * Prepends BOM character to the string and returns the whole string.
812
   *
813
   * INFO: If BOM already existed there, the Input string is returned.
814
   *
815
   * @param    string $str The input string
816
   *
817
   * @return   string The output string that contains BOM
818
   */
819
  public static function add_bom_to_string($str)
820
  {
821
    if (!self::is_bom(substr($str, 0, 3))) {
822
      $str = self::bom() . $str;
823
    }
824
825
    return $str;
826
  }
827
828
  /**
829
   * Returns the Byte Order Mark Character.
830
   *
831
   * @return   string Byte Order Mark
832
   */
833 2
  public static function bom()
834
  {
835 2
    return "\xEF\xBB\xBF";
836
  }
837
838
  /**
839
   * @alias of UTF8::chr_map()
840
   *
841
   * @param $callback
842
   * @param $str
843
   *
844
   * @return array
845
   */
846 1
  public static function callback($callback, $str)
847
  {
848 1
    return self::chr_map($callback, $str);
849
  }
850
851
  /**
852
   * Returns an array of all lower and upper case UTF-8 encoded characters.
853
   *
854
   * @return   string An array with lower case chars as keys and upper chars as values.
855
   */
856
  protected static function case_table()
857
  {
858
    static $case = array(
859
860
      // lower => upper
861
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
862
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
863
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
864
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
865
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
866
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
867
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
868
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
869
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
870
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
871
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
872
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
873
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
874
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
875
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
876
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
877
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
878
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
879
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
880
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
881
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
882
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
883
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
884
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
885
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
886
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
887
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
888
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
889
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
890
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
891
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
892
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
893
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
894
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
895
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
896
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
897
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
898
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
899
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
900
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
901
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
902
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
903
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
904
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
905
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
906
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
907
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
908
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
909
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
910
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
911
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
912
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
913
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
914
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
915
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
916
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
917
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
918
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
919
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
920
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
921
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
922
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
923
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
924
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
925
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
926
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
927
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
928
      "\xea\x9e\x87"     => "\xea\x9e\x86",
929
      "\xea\x9e\x85"     => "\xea\x9e\x84",
930
      "\xea\x9e\x83"     => "\xea\x9e\x82",
931
      "\xea\x9e\x81"     => "\xea\x9e\x80",
932
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
933
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
934
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
935
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
936
      "\xea\x9d\xad"     => "\xea\x9d\xac",
937
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
938
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
939
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
940
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
941
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
942
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
943
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
944
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
945
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
946
      "\xea\x9d\x99"     => "\xea\x9d\x98",
947
      "\xea\x9d\x97"     => "\xea\x9d\x96",
948
      "\xea\x9d\x95"     => "\xea\x9d\x94",
949
      "\xea\x9d\x93"     => "\xea\x9d\x92",
950
      "\xea\x9d\x91"     => "\xea\x9d\x90",
951
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
952
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
953
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
954
      "\xea\x9d\x89"     => "\xea\x9d\x88",
955
      "\xea\x9d\x87"     => "\xea\x9d\x86",
956
      "\xea\x9d\x85"     => "\xea\x9d\x84",
957
      "\xea\x9d\x83"     => "\xea\x9d\x82",
958
      "\xea\x9d\x81"     => "\xea\x9d\x80",
959
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
960
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
961
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
962
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
963
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
964
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
965
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
966
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
967
      "\xea\x9c\xad"     => "\xea\x9c\xac",
968
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
969
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
970
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
971
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
972
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
973
      "\xea\x9a\x97"     => "\xea\x9a\x96",
974
      "\xea\x9a\x95"     => "\xea\x9a\x94",
975
      "\xea\x9a\x93"     => "\xea\x9a\x92",
976
      "\xea\x9a\x91"     => "\xea\x9a\x90",
977
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
978
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
979
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
980
      "\xea\x9a\x89"     => "\xea\x9a\x88",
981
      "\xea\x9a\x87"     => "\xea\x9a\x86",
982
      "\xea\x9a\x85"     => "\xea\x9a\x84",
983
      "\xea\x9a\x83"     => "\xea\x9a\x82",
984
      "\xea\x9a\x81"     => "\xea\x9a\x80",
985
      "\xea\x99\xad"     => "\xea\x99\xac",
986
      "\xea\x99\xab"     => "\xea\x99\xaa",
987
      "\xea\x99\xa9"     => "\xea\x99\xa8",
988
      "\xea\x99\xa7"     => "\xea\x99\xa6",
989
      "\xea\x99\xa5"     => "\xea\x99\xa4",
990
      "\xea\x99\xa3"     => "\xea\x99\xa2",
991
      "\xea\x99\x9f"     => "\xea\x99\x9e",
992
      "\xea\x99\x9d"     => "\xea\x99\x9c",
993
      "\xea\x99\x9b"     => "\xea\x99\x9a",
994
      "\xea\x99\x99"     => "\xea\x99\x98",
995
      "\xea\x99\x97"     => "\xea\x99\x96",
996
      "\xea\x99\x95"     => "\xea\x99\x94",
997
      "\xea\x99\x93"     => "\xea\x99\x92",
998
      "\xea\x99\x91"     => "\xea\x99\x90",
999
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1000
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1001
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1002
      "\xea\x99\x89"     => "\xea\x99\x88",
1003
      "\xea\x99\x87"     => "\xea\x99\x86",
1004
      "\xea\x99\x85"     => "\xea\x99\x84",
1005
      "\xea\x99\x83"     => "\xea\x99\x82",
1006
      "\xea\x99\x81"     => "\xea\x99\x80",
1007
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1008
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1009
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1010
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1011
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1012
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1013
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1014
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1015
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1016
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1017
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1018
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1019
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1020
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1021
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1022
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1023
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1024
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1025
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1026
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1027
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1028
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1029
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1030
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1031
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1032
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1033
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1034
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1035
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1036
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1037
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1038
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1039
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1040
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1041
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1042
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1043
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1044
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1045
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1046
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1047
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1048
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1049
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1050
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1051
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1052
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1053
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1054
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1055
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1056
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1057
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1058
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1059
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1060
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1061
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1062
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1063
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1064
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1065
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1066
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1067
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1068
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1069
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1070
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1071
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1072
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1073
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1074
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1075
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1076
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1077
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1078
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1079
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1080
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1081
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1082
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1083
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1084
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1085
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1086
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1087
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1088
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1089
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1090
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1091
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1092
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1093
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1094
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1095
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1096
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1097
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1098
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1099
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1100
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1101
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1102
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1103
      "\xe2\xb1\xa5"     => "\xc8\xba",
1104
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1105
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1106
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1107
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1108
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1109
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1110
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1111
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1112
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1113
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1114
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1115
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1116
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1117
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1118
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1119
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1120
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1121
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1122
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1123
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1124
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1125
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1126
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1127
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1128
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1129
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1130
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1131
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1132
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1133
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1134
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1135
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1136
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1137
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1138
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1139
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1140
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1141
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1142
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1143
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1144
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1145
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1146
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1147
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1148
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1149
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1150
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1151
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1152
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1153
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1154
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1155
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1156
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1157
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1158
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1159
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1160
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1161
      "\xe1\xbe\xbe"     => "\xce\x99",
1162
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1163
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1164
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1165
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1166
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1167
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1168
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1169
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1170
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1171
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1172
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1173
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1174
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1175
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1176
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1177
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1178
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1179
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1180
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1181
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1182
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1183
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1184
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1185
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1186
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1187
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1188
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1189
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1190
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1191
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1192
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1193
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1194
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1195
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1196
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1197
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1198
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1199
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1200
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1201
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1202
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1203
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1204
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1205
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1206
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1207
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1208
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1209
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1210
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1211
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1212
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1213
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1214
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1215
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1216
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1217
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1218
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1219
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1220
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1221
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1222
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1223
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1224
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1225
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1226
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1227
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1228
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1229
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1230
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1231
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1232
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1233
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1234
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1235
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1236
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1237
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1238
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1239
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1240
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1241
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1242
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1243
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1244
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1245
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1246
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1247
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1248
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1249
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1250
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1251
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1252
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1253
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1254
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1255
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1256
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1257
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1258
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1259
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1260
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1261
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1262
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1263
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1264
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1265
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1266
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1267
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1268
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1269
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1270
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1271
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1272
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1273
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1274
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1275
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1276
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1277
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1278
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1279
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1280
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1281
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1282
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1283
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1284
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1285
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1286
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1287
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1288
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1289
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1290
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1291
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1292
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1293
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1294
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1295
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1296
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1297
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1298
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1299
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1300
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1301
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1302
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1303
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1304
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1305
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1306
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1307
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1308
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1309
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1310
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1311
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1312
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1313
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1314
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1315
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1316
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1317
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1318
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1319
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1320
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1321
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1322
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1323
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1324
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1325
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1326
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1327
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1328
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1329
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1330
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1331
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1332
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1333
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1334
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1335
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1336
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1337
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1338
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1339
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1340
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1341
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1342
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1343
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1344
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1345
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1346
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1347
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1348
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1349
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1350
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1351
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1352
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1353
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1354
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1355
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1356
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1357
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1358
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1359
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1360
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1361
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1362
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1363
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1364
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1365
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1366
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1367
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1368
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1369
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1370
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1371
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1372
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1373
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1374
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1375
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1376
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1377
      "\xd6\x86"         => "\xd5\x96",
1378
      "\xd6\x85"         => "\xd5\x95",
1379
      "\xd6\x84"         => "\xd5\x94",
1380
      "\xd6\x83"         => "\xd5\x93",
1381
      "\xd6\x82"         => "\xd5\x92",
1382
      "\xd6\x81"         => "\xd5\x91",
1383
      "\xd6\x80"         => "\xd5\x90",
1384
      "\xd5\xbf"         => "\xd5\x8f",
1385
      "\xd5\xbe"         => "\xd5\x8e",
1386
      "\xd5\xbd"         => "\xd5\x8d",
1387
      "\xd5\xbc"         => "\xd5\x8c",
1388
      "\xd5\xbb"         => "\xd5\x8b",
1389
      "\xd5\xba"         => "\xd5\x8a",
1390
      "\xd5\xb9"         => "\xd5\x89",
1391
      "\xd5\xb8"         => "\xd5\x88",
1392
      "\xd5\xb7"         => "\xd5\x87",
1393
      "\xd5\xb6"         => "\xd5\x86",
1394
      "\xd5\xb5"         => "\xd5\x85",
1395
      "\xd5\xb4"         => "\xd5\x84",
1396
      "\xd5\xb3"         => "\xd5\x83",
1397
      "\xd5\xb2"         => "\xd5\x82",
1398
      "\xd5\xb1"         => "\xd5\x81",
1399
      "\xd5\xb0"         => "\xd5\x80",
1400
      "\xd5\xaf"         => "\xd4\xbf",
1401
      "\xd5\xae"         => "\xd4\xbe",
1402
      "\xd5\xad"         => "\xd4\xbd",
1403
      "\xd5\xac"         => "\xd4\xbc",
1404
      "\xd5\xab"         => "\xd4\xbb",
1405
      "\xd5\xaa"         => "\xd4\xba",
1406
      "\xd5\xa9"         => "\xd4\xb9",
1407
      "\xd5\xa8"         => "\xd4\xb8",
1408
      "\xd5\xa7"         => "\xd4\xb7",
1409
      "\xd5\xa6"         => "\xd4\xb6",
1410
      "\xd5\xa5"         => "\xd4\xb5",
1411
      "\xd5\xa4"         => "\xd4\xb4",
1412
      "\xd5\xa3"         => "\xd4\xb3",
1413
      "\xd5\xa2"         => "\xd4\xb2",
1414
      "\xd5\xa1"         => "\xd4\xb1",
1415
      "\xd4\xa5"         => "\xd4\xa4",
1416
      "\xd4\xa3"         => "\xd4\xa2",
1417
      "\xd4\xa1"         => "\xd4\xa0",
1418
      "\xd4\x9f"         => "\xd4\x9e",
1419
      "\xd4\x9d"         => "\xd4\x9c",
1420
      "\xd4\x9b"         => "\xd4\x9a",
1421
      "\xd4\x99"         => "\xd4\x98",
1422
      "\xd4\x97"         => "\xd4\x96",
1423
      "\xd4\x95"         => "\xd4\x94",
1424
      "\xd4\x93"         => "\xd4\x92",
1425
      "\xd4\x91"         => "\xd4\x90",
1426
      "\xd4\x8f"         => "\xd4\x8e",
1427
      "\xd4\x8d"         => "\xd4\x8c",
1428
      "\xd4\x8b"         => "\xd4\x8a",
1429
      "\xd4\x89"         => "\xd4\x88",
1430
      "\xd4\x87"         => "\xd4\x86",
1431
      "\xd4\x85"         => "\xd4\x84",
1432
      "\xd4\x83"         => "\xd4\x82",
1433
      "\xd4\x81"         => "\xd4\x80",
1434
      "\xd3\xbf"         => "\xd3\xbe",
1435
      "\xd3\xbd"         => "\xd3\xbc",
1436
      "\xd3\xbb"         => "\xd3\xba",
1437
      "\xd3\xb9"         => "\xd3\xb8",
1438
      "\xd3\xb7"         => "\xd3\xb6",
1439
      "\xd3\xb5"         => "\xd3\xb4",
1440
      "\xd3\xb3"         => "\xd3\xb2",
1441
      "\xd3\xb1"         => "\xd3\xb0",
1442
      "\xd3\xaf"         => "\xd3\xae",
1443
      "\xd3\xad"         => "\xd3\xac",
1444
      "\xd3\xab"         => "\xd3\xaa",
1445
      "\xd3\xa9"         => "\xd3\xa8",
1446
      "\xd3\xa7"         => "\xd3\xa6",
1447
      "\xd3\xa5"         => "\xd3\xa4",
1448
      "\xd3\xa3"         => "\xd3\xa2",
1449
      "\xd3\xa1"         => "\xd3\xa0",
1450
      "\xd3\x9f"         => "\xd3\x9e",
1451
      "\xd3\x9d"         => "\xd3\x9c",
1452
      "\xd3\x9b"         => "\xd3\x9a",
1453
      "\xd3\x99"         => "\xd3\x98",
1454
      "\xd3\x97"         => "\xd3\x96",
1455
      "\xd3\x95"         => "\xd3\x94",
1456
      "\xd3\x93"         => "\xd3\x92",
1457
      "\xd3\x91"         => "\xd3\x90",
1458
      "\xd3\x8f"         => "\xd3\x80",
1459
      "\xd3\x8e"         => "\xd3\x8d",
1460
      "\xd3\x8c"         => "\xd3\x8b",
1461
      "\xd3\x8a"         => "\xd3\x89",
1462
      "\xd3\x88"         => "\xd3\x87",
1463
      "\xd3\x86"         => "\xd3\x85",
1464
      "\xd3\x84"         => "\xd3\x83",
1465
      "\xd3\x82"         => "\xd3\x81",
1466
      "\xd2\xbf"         => "\xd2\xbe",
1467
      "\xd2\xbd"         => "\xd2\xbc",
1468
      "\xd2\xbb"         => "\xd2\xba",
1469
      "\xd2\xb9"         => "\xd2\xb8",
1470
      "\xd2\xb7"         => "\xd2\xb6",
1471
      "\xd2\xb5"         => "\xd2\xb4",
1472
      "\xd2\xb3"         => "\xd2\xb2",
1473
      "\xd2\xb1"         => "\xd2\xb0",
1474
      "\xd2\xaf"         => "\xd2\xae",
1475
      "\xd2\xad"         => "\xd2\xac",
1476
      "\xd2\xab"         => "\xd2\xaa",
1477
      "\xd2\xa9"         => "\xd2\xa8",
1478
      "\xd2\xa7"         => "\xd2\xa6",
1479
      "\xd2\xa5"         => "\xd2\xa4",
1480
      "\xd2\xa3"         => "\xd2\xa2",
1481
      "\xd2\xa1"         => "\xd2\xa0",
1482
      "\xd2\x9f"         => "\xd2\x9e",
1483
      "\xd2\x9d"         => "\xd2\x9c",
1484
      "\xd2\x9b"         => "\xd2\x9a",
1485
      "\xd2\x99"         => "\xd2\x98",
1486
      "\xd2\x97"         => "\xd2\x96",
1487
      "\xd2\x95"         => "\xd2\x94",
1488
      "\xd2\x93"         => "\xd2\x92",
1489
      "\xd2\x91"         => "\xd2\x90",
1490
      "\xd2\x8f"         => "\xd2\x8e",
1491
      "\xd2\x8d"         => "\xd2\x8c",
1492
      "\xd2\x8b"         => "\xd2\x8a",
1493
      "\xd2\x81"         => "\xd2\x80",
1494
      "\xd1\xbf"         => "\xd1\xbe",
1495
      "\xd1\xbd"         => "\xd1\xbc",
1496
      "\xd1\xbb"         => "\xd1\xba",
1497
      "\xd1\xb9"         => "\xd1\xb8",
1498
      "\xd1\xb7"         => "\xd1\xb6",
1499
      "\xd1\xb5"         => "\xd1\xb4",
1500
      "\xd1\xb3"         => "\xd1\xb2",
1501
      "\xd1\xb1"         => "\xd1\xb0",
1502
      "\xd1\xaf"         => "\xd1\xae",
1503
      "\xd1\xad"         => "\xd1\xac",
1504
      "\xd1\xab"         => "\xd1\xaa",
1505
      "\xd1\xa9"         => "\xd1\xa8",
1506
      "\xd1\xa7"         => "\xd1\xa6",
1507
      "\xd1\xa5"         => "\xd1\xa4",
1508
      "\xd1\xa3"         => "\xd1\xa2",
1509
      "\xd1\xa1"         => "\xd1\xa0",
1510
      "\xd1\x9f"         => "\xd0\x8f",
1511
      "\xd1\x9e"         => "\xd0\x8e",
1512
      "\xd1\x9d"         => "\xd0\x8d",
1513
      "\xd1\x9c"         => "\xd0\x8c",
1514
      "\xd1\x9b"         => "\xd0\x8b",
1515
      "\xd1\x9a"         => "\xd0\x8a",
1516
      "\xd1\x99"         => "\xd0\x89",
1517
      "\xd1\x98"         => "\xd0\x88",
1518
      "\xd1\x97"         => "\xd0\x87",
1519
      "\xd1\x96"         => "\xd0\x86",
1520
      "\xd1\x95"         => "\xd0\x85",
1521
      "\xd1\x94"         => "\xd0\x84",
1522
      "\xd1\x93"         => "\xd0\x83",
1523
      "\xd1\x92"         => "\xd0\x82",
1524
      "\xd1\x91"         => "\xd0\x81",
1525
      "\xd1\x90"         => "\xd0\x80",
1526
      "\xd1\x8f"         => "\xd0\xaf",
1527
      "\xd1\x8e"         => "\xd0\xae",
1528
      "\xd1\x8d"         => "\xd0\xad",
1529
      "\xd1\x8c"         => "\xd0\xac",
1530
      "\xd1\x8b"         => "\xd0\xab",
1531
      "\xd1\x8a"         => "\xd0\xaa",
1532
      "\xd1\x89"         => "\xd0\xa9",
1533
      "\xd1\x88"         => "\xd0\xa8",
1534
      "\xd1\x87"         => "\xd0\xa7",
1535
      "\xd1\x86"         => "\xd0\xa6",
1536
      "\xd1\x85"         => "\xd0\xa5",
1537
      "\xd1\x84"         => "\xd0\xa4",
1538
      "\xd1\x83"         => "\xd0\xa3",
1539
      "\xd1\x82"         => "\xd0\xa2",
1540
      "\xd1\x81"         => "\xd0\xa1",
1541
      "\xd1\x80"         => "\xd0\xa0",
1542
      "\xd0\xbf"         => "\xd0\x9f",
1543
      "\xd0\xbe"         => "\xd0\x9e",
1544
      "\xd0\xbd"         => "\xd0\x9d",
1545
      "\xd0\xbc"         => "\xd0\x9c",
1546
      "\xd0\xbb"         => "\xd0\x9b",
1547
      "\xd0\xba"         => "\xd0\x9a",
1548
      "\xd0\xb9"         => "\xd0\x99",
1549
      "\xd0\xb8"         => "\xd0\x98",
1550
      "\xd0\xb7"         => "\xd0\x97",
1551
      "\xd0\xb6"         => "\xd0\x96",
1552
      "\xd0\xb5"         => "\xd0\x95",
1553
      "\xd0\xb4"         => "\xd0\x94",
1554
      "\xd0\xb3"         => "\xd0\x93",
1555
      "\xd0\xb2"         => "\xd0\x92",
1556
      "\xd0\xb1"         => "\xd0\x91",
1557
      "\xd0\xb0"         => "\xd0\x90",
1558
      "\xcf\xbb"         => "\xcf\xba",
1559
      "\xcf\xb8"         => "\xcf\xb7",
1560
      "\xcf\xb5"         => "\xce\x95",
1561
      "\xcf\xb2"         => "\xcf\xb9",
1562
      "\xcf\xb1"         => "\xce\xa1",
1563
      "\xcf\xb0"         => "\xce\x9a",
1564
      "\xcf\xaf"         => "\xcf\xae",
1565
      "\xcf\xad"         => "\xcf\xac",
1566
      "\xcf\xab"         => "\xcf\xaa",
1567
      "\xcf\xa9"         => "\xcf\xa8",
1568
      "\xcf\xa7"         => "\xcf\xa6",
1569
      "\xcf\xa5"         => "\xcf\xa4",
1570
      "\xcf\xa3"         => "\xcf\xa2",
1571
      "\xcf\xa1"         => "\xcf\xa0",
1572
      "\xcf\x9f"         => "\xcf\x9e",
1573
      "\xcf\x9d"         => "\xcf\x9c",
1574
      "\xcf\x9b"         => "\xcf\x9a",
1575
      "\xcf\x99"         => "\xcf\x98",
1576
      "\xcf\x97"         => "\xcf\x8f",
1577
      "\xcf\x96"         => "\xce\xa0",
1578
      "\xcf\x95"         => "\xce\xa6",
1579
      "\xcf\x91"         => "\xce\x98",
1580
      "\xcf\x90"         => "\xce\x92",
1581
      "\xcf\x8e"         => "\xce\x8f",
1582
      "\xcf\x8d"         => "\xce\x8e",
1583
      "\xcf\x8c"         => "\xce\x8c",
1584
      "\xcf\x8b"         => "\xce\xab",
1585
      "\xcf\x8a"         => "\xce\xaa",
1586
      "\xcf\x89"         => "\xce\xa9",
1587
      "\xcf\x88"         => "\xce\xa8",
1588
      "\xcf\x87"         => "\xce\xa7",
1589
      "\xcf\x86"         => "\xce\xa6",
1590
      "\xcf\x85"         => "\xce\xa5",
1591
      "\xcf\x84"         => "\xce\xa4",
1592
      "\xcf\x83"         => "\xce\xa3",
1593
      "\xcf\x82"         => "\xce\xa3",
1594
      "\xcf\x81"         => "\xce\xa1",
1595
      "\xcf\x80"         => "\xce\xa0",
1596
      "\xce\xbf"         => "\xce\x9f",
1597
      "\xce\xbe"         => "\xce\x9e",
1598
      "\xce\xbd"         => "\xce\x9d",
1599
      "\xce\xbc"         => "\xce\x9c",
1600
      "\xce\xbb"         => "\xce\x9b",
1601
      "\xce\xba"         => "\xce\x9a",
1602
      "\xce\xb9"         => "\xce\x99",
1603
      "\xce\xb8"         => "\xce\x98",
1604
      "\xce\xb7"         => "\xce\x97",
1605
      "\xce\xb6"         => "\xce\x96",
1606
      "\xce\xb5"         => "\xce\x95",
1607
      "\xce\xb4"         => "\xce\x94",
1608
      "\xce\xb3"         => "\xce\x93",
1609
      "\xce\xb2"         => "\xce\x92",
1610
      "\xce\xb1"         => "\xce\x91",
1611
      "\xce\xaf"         => "\xce\x8a",
1612
      "\xce\xae"         => "\xce\x89",
1613
      "\xce\xad"         => "\xce\x88",
1614
      "\xce\xac"         => "\xce\x86",
1615
      "\xcd\xbd"         => "\xcf\xbf",
1616
      "\xcd\xbc"         => "\xcf\xbe",
1617
      "\xcd\xbb"         => "\xcf\xbd",
1618
      "\xcd\xb7"         => "\xcd\xb6",
1619
      "\xcd\xb3"         => "\xcd\xb2",
1620
      "\xcd\xb1"         => "\xcd\xb0",
1621
      "\xca\x92"         => "\xc6\xb7",
1622
      "\xca\x8c"         => "\xc9\x85",
1623
      "\xca\x8b"         => "\xc6\xb2",
1624
      "\xca\x8a"         => "\xc6\xb1",
1625
      "\xca\x89"         => "\xc9\x84",
1626
      "\xca\x88"         => "\xc6\xae",
1627
      "\xca\x83"         => "\xc6\xa9",
1628
      "\xca\x80"         => "\xc6\xa6",
1629
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1630
      "\xc9\xb5"         => "\xc6\x9f",
1631
      "\xc9\xb2"         => "\xc6\x9d",
1632
      "\xc9\xb1"         => "\xe2\xb1\xae",
1633
      "\xc9\xaf"         => "\xc6\x9c",
1634
      "\xc9\xab"         => "\xe2\xb1\xa2",
1635
      "\xc9\xa9"         => "\xc6\x96",
1636
      "\xc9\xa8"         => "\xc6\x97",
1637
      "\xc9\xa5"         => "\xea\x9e\x8d",
1638
      "\xc9\xa3"         => "\xc6\x94",
1639
      "\xc9\xa0"         => "\xc6\x93",
1640
      "\xc9\x9b"         => "\xc6\x90",
1641
      "\xc9\x99"         => "\xc6\x8f",
1642
      "\xc9\x97"         => "\xc6\x8a",
1643
      "\xc9\x96"         => "\xc6\x89",
1644
      "\xc9\x94"         => "\xc6\x86",
1645
      "\xc9\x93"         => "\xc6\x81",
1646
      "\xc9\x92"         => "\xe2\xb1\xb0",
1647
      "\xc9\x91"         => "\xe2\xb1\xad",
1648
      "\xc9\x90"         => "\xe2\xb1\xaf",
1649
      "\xc9\x8f"         => "\xc9\x8e",
1650
      "\xc9\x8d"         => "\xc9\x8c",
1651
      "\xc9\x8b"         => "\xc9\x8a",
1652
      "\xc9\x89"         => "\xc9\x88",
1653
      "\xc9\x87"         => "\xc9\x86",
1654
      "\xc9\x82"         => "\xc9\x81",
1655
      "\xc9\x80"         => "\xe2\xb1\xbf",
1656
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1657
      "\xc8\xbc"         => "\xc8\xbb",
1658
      "\xc8\xb3"         => "\xc8\xb2",
1659
      "\xc8\xb1"         => "\xc8\xb0",
1660
      "\xc8\xaf"         => "\xc8\xae",
1661
      "\xc8\xad"         => "\xc8\xac",
1662
      "\xc8\xab"         => "\xc8\xaa",
1663
      "\xc8\xa9"         => "\xc8\xa8",
1664
      "\xc8\xa7"         => "\xc8\xa6",
1665
      "\xc8\xa5"         => "\xc8\xa4",
1666
      "\xc8\xa3"         => "\xc8\xa2",
1667
      "\xc8\x9f"         => "\xc8\x9e",
1668
      "\xc8\x9d"         => "\xc8\x9c",
1669
      "\xc8\x9b"         => "\xc8\x9a",
1670
      "\xc8\x99"         => "\xc8\x98",
1671
      "\xc8\x97"         => "\xc8\x96",
1672
      "\xc8\x95"         => "\xc8\x94",
1673
      "\xc8\x93"         => "\xc8\x92",
1674
      "\xc8\x91"         => "\xc8\x90",
1675
      "\xc8\x8f"         => "\xc8\x8e",
1676
      "\xc8\x8d"         => "\xc8\x8c",
1677
      "\xc8\x8b"         => "\xc8\x8a",
1678
      "\xc8\x89"         => "\xc8\x88",
1679
      "\xc8\x87"         => "\xc8\x86",
1680
      "\xc8\x85"         => "\xc8\x84",
1681
      "\xc8\x83"         => "\xc8\x82",
1682
      "\xc8\x81"         => "\xc8\x80",
1683
      "\xc7\xbf"         => "\xc7\xbe",
1684
      "\xc7\xbd"         => "\xc7\xbc",
1685
      "\xc7\xbb"         => "\xc7\xba",
1686
      "\xc7\xb9"         => "\xc7\xb8",
1687
      "\xc7\xb5"         => "\xc7\xb4",
1688
      "\xc7\xb3"         => "\xc7\xb2",
1689
      "\xc7\xaf"         => "\xc7\xae",
1690
      "\xc7\xad"         => "\xc7\xac",
1691
      "\xc7\xab"         => "\xc7\xaa",
1692
      "\xc7\xa9"         => "\xc7\xa8",
1693
      "\xc7\xa7"         => "\xc7\xa6",
1694
      "\xc7\xa5"         => "\xc7\xa4",
1695
      "\xc7\xa3"         => "\xc7\xa2",
1696
      "\xc7\xa1"         => "\xc7\xa0",
1697
      "\xc7\x9f"         => "\xc7\x9e",
1698
      "\xc7\x9d"         => "\xc6\x8e",
1699
      "\xc7\x9c"         => "\xc7\x9b",
1700
      "\xc7\x9a"         => "\xc7\x99",
1701
      "\xc7\x98"         => "\xc7\x97",
1702
      "\xc7\x96"         => "\xc7\x95",
1703
      "\xc7\x94"         => "\xc7\x93",
1704
      "\xc7\x92"         => "\xc7\x91",
1705
      "\xc7\x90"         => "\xc7\x8f",
1706
      "\xc7\x8e"         => "\xc7\x8d",
1707
      "\xc7\x8c"         => "\xc7\x8b",
1708
      "\xc7\x89"         => "\xc7\x88",
1709
      "\xc7\x86"         => "\xc7\x85",
1710
      "\xc6\xbf"         => "\xc7\xb7",
1711
      "\xc6\xbd"         => "\xc6\xbc",
1712
      "\xc6\xb9"         => "\xc6\xb8",
1713
      "\xc6\xb6"         => "\xc6\xb5",
1714
      "\xc6\xb4"         => "\xc6\xb3",
1715
      "\xc6\xb0"         => "\xc6\xaf",
1716
      "\xc6\xad"         => "\xc6\xac",
1717
      "\xc6\xa8"         => "\xc6\xa7",
1718
      "\xc6\xa5"         => "\xc6\xa4",
1719
      "\xc6\xa3"         => "\xc6\xa2",
1720
      "\xc6\xa1"         => "\xc6\xa0",
1721
      "\xc6\x9e"         => "\xc8\xa0",
1722
      "\xc6\x9a"         => "\xc8\xbd",
1723
      "\xc6\x99"         => "\xc6\x98",
1724
      "\xc6\x95"         => "\xc7\xb6",
1725
      "\xc6\x92"         => "\xc6\x91",
1726
      "\xc6\x8c"         => "\xc6\x8b",
1727
      "\xc6\x88"         => "\xc6\x87",
1728
      "\xc6\x85"         => "\xc6\x84",
1729
      "\xc6\x83"         => "\xc6\x82",
1730
      "\xc6\x80"         => "\xc9\x83",
1731
      "\xc5\xbf"         => "\x53",
1732
      "\xc5\xbe"         => "\xc5\xbd",
1733
      "\xc5\xbc"         => "\xc5\xbb",
1734
      "\xc5\xba"         => "\xc5\xb9",
1735
      "\xc5\xb7"         => "\xc5\xb6",
1736
      "\xc5\xb5"         => "\xc5\xb4",
1737
      "\xc5\xb3"         => "\xc5\xb2",
1738
      "\xc5\xb1"         => "\xc5\xb0",
1739
      "\xc5\xaf"         => "\xc5\xae",
1740
      "\xc5\xad"         => "\xc5\xac",
1741
      "\xc5\xab"         => "\xc5\xaa",
1742
      "\xc5\xa9"         => "\xc5\xa8",
1743
      "\xc5\xa7"         => "\xc5\xa6",
1744
      "\xc5\xa5"         => "\xc5\xa4",
1745
      "\xc5\xa3"         => "\xc5\xa2",
1746
      "\xc5\xa1"         => "\xc5\xa0",
1747
      "\xc5\x9f"         => "\xc5\x9e",
1748
      "\xc5\x9d"         => "\xc5\x9c",
1749
      "\xc5\x9b"         => "\xc5\x9a",
1750
      "\xc5\x99"         => "\xc5\x98",
1751
      "\xc5\x97"         => "\xc5\x96",
1752
      "\xc5\x95"         => "\xc5\x94",
1753
      "\xc5\x93"         => "\xc5\x92",
1754
      "\xc5\x91"         => "\xc5\x90",
1755
      "\xc5\x8f"         => "\xc5\x8e",
1756
      "\xc5\x8d"         => "\xc5\x8c",
1757
      "\xc5\x8b"         => "\xc5\x8a",
1758
      "\xc5\x88"         => "\xc5\x87",
1759
      "\xc5\x86"         => "\xc5\x85",
1760
      "\xc5\x84"         => "\xc5\x83",
1761
      "\xc5\x82"         => "\xc5\x81",
1762
      "\xc5\x80"         => "\xc4\xbf",
1763
      "\xc4\xbe"         => "\xc4\xbd",
1764
      "\xc4\xbc"         => "\xc4\xbb",
1765
      "\xc4\xba"         => "\xc4\xb9",
1766
      "\xc4\xb7"         => "\xc4\xb6",
1767
      "\xc4\xb5"         => "\xc4\xb4",
1768
      "\xc4\xb3"         => "\xc4\xb2",
1769
      "\xc4\xb1"         => "\x49",
1770
      "\xc4\xaf"         => "\xc4\xae",
1771
      "\xc4\xad"         => "\xc4\xac",
1772
      "\xc4\xab"         => "\xc4\xaa",
1773
      "\xc4\xa9"         => "\xc4\xa8",
1774
      "\xc4\xa7"         => "\xc4\xa6",
1775
      "\xc4\xa5"         => "\xc4\xa4",
1776
      "\xc4\xa3"         => "\xc4\xa2",
1777
      "\xc4\xa1"         => "\xc4\xa0",
1778
      "\xc4\x9f"         => "\xc4\x9e",
1779
      "\xc4\x9d"         => "\xc4\x9c",
1780
      "\xc4\x9b"         => "\xc4\x9a",
1781
      "\xc4\x99"         => "\xc4\x98",
1782
      "\xc4\x97"         => "\xc4\x96",
1783
      "\xc4\x95"         => "\xc4\x94",
1784
      "\xc4\x93"         => "\xc4\x92",
1785
      "\xc4\x91"         => "\xc4\x90",
1786
      "\xc4\x8f"         => "\xc4\x8e",
1787
      "\xc4\x8d"         => "\xc4\x8c",
1788
      "\xc4\x8b"         => "\xc4\x8a",
1789
      "\xc4\x89"         => "\xc4\x88",
1790
      "\xc4\x87"         => "\xc4\x86",
1791
      "\xc4\x85"         => "\xc4\x84",
1792
      "\xc4\x83"         => "\xc4\x82",
1793
      "\xc4\x81"         => "\xc4\x80",
1794
      "\xc3\xbf"         => "\xc5\xb8",
1795
      "\xc3\xbe"         => "\xc3\x9e",
1796
      "\xc3\xbd"         => "\xc3\x9d",
1797
      "\xc3\xbc"         => "\xc3\x9c",
1798
      "\xc3\xbb"         => "\xc3\x9b",
1799
      "\xc3\xba"         => "\xc3\x9a",
1800
      "\xc3\xb9"         => "\xc3\x99",
1801
      "\xc3\xb8"         => "\xc3\x98",
1802
      "\xc3\xb6"         => "\xc3\x96",
1803
      "\xc3\xb5"         => "\xc3\x95",
1804
      "\xc3\xb4"         => "\xc3\x94",
1805
      "\xc3\xb3"         => "\xc3\x93",
1806
      "\xc3\xb2"         => "\xc3\x92",
1807
      "\xc3\xb1"         => "\xc3\x91",
1808
      "\xc3\xb0"         => "\xc3\x90",
1809
      "\xc3\xaf"         => "\xc3\x8f",
1810
      "\xc3\xae"         => "\xc3\x8e",
1811
      "\xc3\xad"         => "\xc3\x8d",
1812
      "\xc3\xac"         => "\xc3\x8c",
1813
      "\xc3\xab"         => "\xc3\x8b",
1814
      "\xc3\xaa"         => "\xc3\x8a",
1815
      "\xc3\xa9"         => "\xc3\x89",
1816
      "\xc3\xa8"         => "\xc3\x88",
1817
      "\xc3\xa7"         => "\xc3\x87",
1818
      "\xc3\xa6"         => "\xc3\x86",
1819
      "\xc3\xa5"         => "\xc3\x85",
1820
      "\xc3\xa4"         => "\xc3\x84",
1821
      "\xc3\xa3"         => "\xc3\x83",
1822
      "\xc3\xa2"         => "\xc3\x82",
1823
      "\xc3\xa1"         => "\xc3\x81",
1824
      "\xc3\xa0"         => "\xc3\x80",
1825
      "\xc2\xb5"         => "\xce\x9c",
1826
      "\x7a"             => "\x5a",
1827
      "\x79"             => "\x59",
1828
      "\x78"             => "\x58",
1829
      "\x77"             => "\x57",
1830
      "\x76"             => "\x56",
1831
      "\x75"             => "\x55",
1832
      "\x74"             => "\x54",
1833
      "\x73"             => "\x53",
1834
      "\x72"             => "\x52",
1835
      "\x71"             => "\x51",
1836
      "\x70"             => "\x50",
1837
      "\x6f"             => "\x4f",
1838
      "\x6e"             => "\x4e",
1839
      "\x6d"             => "\x4d",
1840
      "\x6c"             => "\x4c",
1841
      "\x6b"             => "\x4b",
1842
      "\x6a"             => "\x4a",
1843
      "\x69"             => "\x49",
1844
      "\x68"             => "\x48",
1845
      "\x67"             => "\x47",
1846
      "\x66"             => "\x46",
1847
      "\x65"             => "\x45",
1848
      "\x64"             => "\x44",
1849
      "\x63"             => "\x43",
1850
      "\x62"             => "\x42",
1851
      "\x61"             => "\x41",
1852
1853
    );
1854
1855
    return $case;
1856
  }
1857
1858
  /**
1859
   * check for UTF8-Support
1860
   */
1861 157
  public static function checkForSupport()
1862
  {
1863 157
    if (!isset(self::$support['mbstring'])) {
1864
1865 1
      self::$support['mbstring'] = self::mbstring_loaded();
1866 1
      self::$support['iconv'] = self::iconv_loaded();
1867 1
      self::$support['intl'] = self::intl_loaded();
1868 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1869 1
    }
1870 157
  }
1871
1872
  /**
1873
   * Generates a UTF-8 encoded character from the given code point.
1874
   *
1875
   * @param    int $code_point The code point for which to generate a character.
1876
   *
1877
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1878
   */
1879 8
  public static function chr($code_point)
1880
  {
1881 8
    self::checkForSupport();
1882
1883 8
    if (($i = (int)$code_point) !== $code_point) {
1884
      // $code_point is a string, lets extract int code point from it
1885
      if (!($i = (int)self::hex_to_int($code_point))) {
1886
        return '';
1887
      }
1888
    }
1889
1890 8
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1891
  }
1892
1893
  /**
1894
   * Applies callback to all characters of a string.
1895
   *
1896
   * @param    string $callback The callback function.
1897
   * @param    string $str      UTF-8 string to run callback on.
1898
   *
1899
   * @return   array The outcome of callback.
1900
   */
1901
1902 1
  public static function chr_map($callback, $str)
1903
  {
1904 1
    $chars = self::split($str);
1905
1906 1
    return array_map($callback, $chars);
1907
  }
1908
1909
  /**
1910
   * Generates an array of byte length of each character of a Unicode string.
1911
   *
1912
   * 1 byte => U+0000  - U+007F
1913
   * 2 byte => U+0080  - U+07FF
1914
   * 3 byte => U+0800  - U+FFFF
1915
   * 4 byte => U+10000 - U+10FFFF
1916
   *
1917
   * @param    string $str The original Unicode string.
1918
   *
1919
   * @return   array An array of byte lengths of each character.
1920
   */
1921 2
  public static function chr_size_list($str)
1922
  {
1923 2
    if (!$str) {
1924 2
      return array();
1925
    }
1926
1927 2
    return array_map('strlen', self::split($str));
1928
  }
1929
1930
  /**
1931
   * Get a decimal code representation of a specific character.
1932
   *
1933
   * @param   string $chr The input character
1934
   *
1935
   * @return  int
1936
   */
1937 2
  public static function chr_to_decimal($chr)
1938
  {
1939 2
    $chr = (string)$chr;
1940 2
    $code = self::ord($chr[0]);
1941 2
    $bytes = 1;
1942
1943 2
    if (!($code & 0x80)) {
1944
      // 0xxxxxxx
1945 2
      return $code;
1946
    }
1947
1948 2
    if (($code & 0xe0) === 0xc0) {
1949
      // 110xxxxx
1950 2
      $bytes = 2;
1951 2
      $code &= ~0xc0;
1952 2
    } elseif (($code & 0xf0) == 0xe0) {
1953
      // 1110xxxx
1954 1
      $bytes = 3;
1955 1
      $code &= ~0xe0;
1956 1
    } elseif (($code & 0xf8) === 0xf0) {
1957
      // 11110xxx
1958
      $bytes = 4;
1959
      $code &= ~0xf0;
1960
    }
1961
1962 2
    for ($i = 2; $i <= $bytes; $i++) {
1963
      // 10xxxxxx
1964 2
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1965 2
    }
1966
1967 2
    return $code;
1968
  }
1969
1970
  /**
1971
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1972
   *
1973
   * @param    string $chr The input character
1974
   * @param    string $pfix
1975
   *
1976
   * @return   string The code point encoded as U+xxxx
1977
   */
1978
  public static function chr_to_hex($chr, $pfix = 'U+')
1979
  {
1980
    return self::int_to_hex(self::ord($chr), $pfix);
1981
  }
1982
1983
  /**
1984
   * Splits a string into smaller chunks and multiple lines, using the specified
1985
   * line ending character.
1986
   *
1987
   * @param    string $body     The original string to be split.
1988
   * @param    int    $chunklen The maximum character length of a chunk.
1989
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1990
   *
1991
   * @return   string The chunked string
1992
   */
1993 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1994
  {
1995 1
    return implode($end, self::split($body, $chunklen));
1996
  }
1997
1998
  /**
1999
   * accepts a string and removes all non-UTF-8 characters from it.
2000
   *
2001
   * @param string $str                     The string to be sanitized.
2002
   * @param bool   $remove_bom
2003
   * @param bool   $normalize_whitespace
2004
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2005
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2006
   *
2007
   * @return string Clean UTF-8 encoded string
2008
   */
2009 35
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2010
  {
2011
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2012
    // caused connection reset problem on larger strings
2013
2014
    $regx = '/
2015
      (
2016
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2017
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2018
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2019
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2020
        ){1,100}                      # ...one or more times
2021
      )
2022
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2023
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2024 35
    /x';
2025 35
    $str = preg_replace($regx, '$1', $str);
2026
2027 35
    $str = self::replace_diamond_question_mark($str, '');
2028 35
    $str = self::remove_invisible_characters($str);
2029
2030 35
    if ($normalize_whitespace === true) {
2031 7
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2032 7
    }
2033
2034 35
    if ($normalize_msword === true) {
2035 1
      $str = self::normalize_msword($str);
2036 1
    }
2037
2038 35
    if ($remove_bom === true) {
2039 4
      $str = self::removeBOM($str);
2040 4
    }
2041
2042 35
    return $str;
2043
  }
2044
2045
  /**
2046
   * Clean-up a and show only printable UTF-8 chars at the end.
2047
   *
2048
   * @param string|false $str
2049
   *
2050
   * @return string
2051
   */
2052 3
  public static function cleanup($str)
2053
  {
2054 3
    $str = (string)$str;
2055
2056 3
    if (!isset($str[0])) {
2057 1
      return '';
2058
    }
2059
2060
    // fixed ISO <-> UTF-8 Errors
2061 3
    $str = self::fix_simple_utf8($str);
2062
2063
    // remove all none UTF-8 symbols
2064
    // && remove diamond question mark (�)
2065
    // && remove remove invisible characters (e.g. "\0")
2066
    // && remove BOM
2067
    // && normalize whitespace chars (but keep non-breaking-spaces)
2068 3
    $str = self::clean($str, true, true, false, true);
2069
2070 3
    return (string)$str;
2071
  }
2072
2073
  /**
2074
   * Accepts a string and returns an array of Unicode code points.
2075
   *
2076
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
2077
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
2078
   *                          default, code points will be returned as integers.
2079
   *
2080
   * @return   array The array of code points
2081
   */
2082 3
  public static function codepoints($arg, $u_style = false)
2083
  {
2084 3
    if (is_string($arg)) {
2085 3
      $arg = self::split($arg);
2086 3
    }
2087
2088 3
    $arg = array_map(
2089
        array(
2090 3
            '\\voku\\helper\\UTF8',
2091 3
            'ord',
2092 3
        ),
2093
        $arg
2094 3
    );
2095
2096 3
    if ($u_style) {
2097
      $arg = array_map(
2098
          array(
2099
              '\\voku\\helper\\UTF8',
2100
              'int_to_hex',
2101
          ),
2102
          $arg
2103
      );
2104
    }
2105
2106 3
    return $arg;
2107
  }
2108
2109
  /**
2110
   * Returns count of characters used in a string.
2111
   *
2112
   * @param    string $str The input string.
2113
   *
2114
   * @return   array An associative array of Character as keys and
2115
   *           their count as values.
2116
   */
2117 3
  public static function count_chars($str) // there is no $mode parameters
2118
  {
2119 3
    $array = array_count_values(self::split($str));
2120
2121 3
    ksort($array);
2122
2123 3
    return $array;
2124
  }
2125
2126
  /**
2127
   * Get a UTF-8 character from its decimal code representation.
2128
   *
2129
   * @param   int $code Code.
2130
   *
2131
   * @return  string
2132
   */
2133 1
  public static function decimal_to_chr($code)
2134
  {
2135 1
    self::checkForSupport();
2136
2137 1
    return mb_convert_encoding(
2138 1
        '&#x' . dechex($code) . ';',
2139 1
        'UTF-8',
2140
        'HTML-ENTITIES'
2141 1
    );
2142
  }
2143
2144
  /**
2145
   * Encode to UTF8 or LATIN1.
2146
   *
2147
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2148
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2149
   *
2150
   * @param string $encodingLabel ISO-8859-1 || UTF-8
2151
   * @param string $str
2152
   *
2153
   * @return false|string Will return false on error.
2154
   */
2155 11
  public static function encode($encodingLabel, $str)
2156
  {
2157 11
    $encodingLabel = self::normalizeEncoding($encodingLabel);
2158
2159 11
    if ($encodingLabel === 'UTF-8') {
2160 11
      return self::to_utf8($str);
2161
    }
2162
2163 1
    if ($encodingLabel === 'ISO-8859-1') {
2164 1
      return self::to_latin1($str);
2165
    }
2166
2167
    return false;
2168
  }
2169
2170
  /**
2171
   * Callback function for preg_replace_callback use.
2172
   *
2173
   * @param  array $matches PREG matches
2174
   *
2175
   * @return string
2176
   */
2177
  protected static function entityCallback($matches)
2178
  {
2179
    self::checkForSupport();
2180
2181
    $return = mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2182
2183
    if ($return === "'") {
2184
      return '&#x27;';
2185
    }
2186
2187
    return $return;
2188
  }
2189
2190
  /**
2191
   * Reads entire file into a string.
2192
   *
2193
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2194
   *
2195
   * @link http://php.net/manual/en/function.file-get-contents.php
2196
   *
2197
   * @param string   $filename      <p>
2198
   *                                Name of the file to read.
2199
   *                                </p>
2200
   * @param int      $flags         [optional] <p>
2201
   *                                Prior to PHP 6, this parameter is called
2202
   *                                use_include_path and is a bool.
2203
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2204
   *                                to trigger include path
2205
   *                                search.
2206
   *                                </p>
2207
   *                                <p>
2208
   *                                The value of flags can be any combination of
2209
   *                                the following flags (with some restrictions), joined with the
2210
   *                                binary OR (|)
2211
   *                                operator.
2212
   *                                </p>
2213
   *                                <p>
2214
   *                                <table>
2215
   *                                Available flags
2216
   *                                <tr valign="top">
2217
   *                                <td>Flag</td>
2218
   *                                <td>Description</td>
2219
   *                                </tr>
2220
   *                                <tr valign="top">
2221
   *                                <td>
2222
   *                                FILE_USE_INCLUDE_PATH
2223
   *                                </td>
2224
   *                                <td>
2225
   *                                Search for filename in the include directory.
2226
   *                                See include_path for more
2227
   *                                information.
2228
   *                                </td>
2229
   *                                </tr>
2230
   *                                <tr valign="top">
2231
   *                                <td>
2232
   *                                FILE_TEXT
2233
   *                                </td>
2234
   *                                <td>
2235
   *                                As of PHP 6, the default encoding of the read
2236
   *                                data is UTF-8. You can specify a different encoding by creating a
2237
   *                                custom context or by changing the default using
2238
   *                                stream_default_encoding. This flag cannot be
2239
   *                                used with FILE_BINARY.
2240
   *                                </td>
2241
   *                                </tr>
2242
   *                                <tr valign="top">
2243
   *                                <td>
2244
   *                                FILE_BINARY
2245
   *                                </td>
2246
   *                                <td>
2247
   *                                With this flag, the file is read in binary mode. This is the default
2248
   *                                setting and cannot be used with FILE_TEXT.
2249
   *                                </td>
2250
   *                                </tr>
2251
   *                                </table>
2252 2
   *                                </p>
2253
   * @param resource $context       [optional] <p>
2254
   *                                A valid context resource created with
2255 2
   *                                stream_context_create. If you don't need to use a
2256 2
   *                                custom context, you can skip this parameter by &null;.
2257
   *                                </p>
2258 2
   * @param int      $offset        [optional] <p>
2259 2
   *                                The offset where the reading starts.
2260
   *                                </p>
2261
   * @param int      $maxlen        [optional] <p>
2262
   *                                Maximum length of data read. The default is to read until end
2263 2
   *                                of file is reached.
2264 2
   *                                </p>
2265
   * @param int      $timeout
2266 2
   *
2267 2
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2268
   *                                default utf-8 chars
2269 2
   *
2270 1
   * @return string The function returns the read data or false on failure.
2271 1
   */
2272 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2273
  {
2274
    // init
2275
    $timeout = (int)$timeout;
2276 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2277
2278
    if ($timeout && $context === null) {
2279
      $context = stream_context_create(
2280 2
          array(
2281 2
              'http' =>
2282
                  array(
2283 2
                      'timeout' => $timeout,
2284
                  ),
2285 2
          )
2286 1
      );
2287 1
    }
2288 1
2289 1
    if (is_int($maxlen)) {
2290 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2291 1
    } else {
2292
      $data = file_get_contents($filename, $flags, $context, $offset);
2293 2
    }
2294 2
2295 2
    // return false on error
2296 2
    if ($data === false) {
2297
      return false;
2298
    }
2299 2
2300
    if ($convertToUtf8 === true) {
2301
      self::checkForSupport();
2302
2303
      $encoding = self::str_detect_encoding($data);
2304
2305
      if ($encoding && $encoding !== 'UTF-8') {
0 ignored issues
show
Bug Best Practice introduced by
The expression $encoding of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2306
        $data = mb_convert_encoding(
2307
            $data,
2308
            'UTF-8',
2309 1
            self::normalizeEncoding($encoding)
2310
        );
2311 1
      }
2312
2313
      $data = self::cleanup($data);
2314
    }
2315
2316
    // clean utf-8 string
2317
    return $data;
2318
  }
2319
2320
  /**
2321
   * Checks if a file starts with BOM character.
2322
   *
2323 7
   * @param    string $file_path Path to a valid file.
2324
   *
2325 7
   * @return   bool True if the file has BOM at the start, False otherwise.
2326 7
   */
2327 2
  public static function file_has_bom($file_path)
2328
  {
2329 1
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
2330 2
  }
2331 2
2332 7
  /**
2333 1
   * Normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2334 1
   *
2335 1
   * @param mixed  $var
2336 1
   * @param int    $normalization_form
2337 7
   * @param string $leading_combining
2338 7
   *
2339
   * @return mixed
2340
   */
2341
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2342 7
  {
2343 7
    switch (gettype($var)) {
2344 1 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2345 1
        foreach ($var as $k => $v) {
2346 7
          /** @noinspection AlterInForeachInspection */
2347
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2348 7
        }
2349 5
        break;
2350 5 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2351 4
        foreach ($var as $k => $v) {
2352
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2353
        }
2354
        break;
2355 7
      case 'string':
2356 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2357
          // Workaround https://bugs.php.net/65732
2358
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2359
        }
2360 7 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2361 7
          if (Normalizer::isNormalized($var, $normalization_form)) {
2362 7
            $n = '-';
2363
          } else {
2364 7
            $n = Normalizer::normalize($var, $normalization_form);
2365
2366
            if (isset($n[0])) {
2367
              $var = $n;
2368
            } else {
2369
              $var = self::encode('UTF-8', $var);
2370
            }
2371
2372
          }
2373
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2374
            // Prevent leading combining chars
2375
            // for NFC-safe concatenations.
2376
            $var = $leading_combining . $var;
2377
          }
2378
        }
2379
        break;
2380
    }
2381
2382
    return $var;
2383
  }
2384
2385
  /**
2386
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2387
   *
2388
   * @param int    $type
2389
   * @param string $var
2390
   * @param int    $filter
2391
   * @param mixed  $option
2392
   *
2393
   * @return mixed
2394
   */
2395 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2396
  {
2397
    if (4 > func_num_args()) {
2398
      $var = filter_input($type, $var, $filter);
2399
    } else {
2400
      $var = filter_input($type, $var, $filter, $option);
2401
    }
2402
2403
    return self::filter($var);
2404
  }
2405
2406
  /**
2407
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2408
   *
2409
   * @param int   $type
2410
   * @param mixed $definition
2411
   * @param bool  $add_empty
2412
   *
2413
   * @return mixed
2414
   */
2415 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2416
  {
2417 1
    if (2 > func_num_args()) {
2418
      $a = filter_input_array($type);
2419 1
    } else {
2420 1
      $a = filter_input_array($type, $definition, $add_empty);
2421 1
    }
2422 1
2423
    return self::filter($a);
2424
  }
2425 1
2426
  /**
2427
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2428
   *
2429
   * @param mixed $var
2430
   * @param int   $filter
2431
   * @param mixed $option
2432
   *
2433
   * @return mixed
2434
   */
2435 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2436
  {
2437 1
    if (3 > func_num_args()) {
2438
      $var = filter_var($var, $filter);
2439 1
    } else {
2440 1
      $var = filter_var($var, $filter, $option);
2441 1
    }
2442 1
2443
    return self::filter($var);
2444
  }
2445 1
2446
  /**
2447
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from CP-1252 when needed.
2448
   *
2449
   * @param array $data
2450
   * @param mixed $definition
2451
   * @param bool  $add_empty
2452
   *
2453
   * @return mixed
2454
   */
2455 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2456
  {
2457 1
    if (2 > func_num_args()) {
2458
      $a = filter_var_array($data);
2459 1
    } else {
2460
      $a = filter_var_array($data, $definition, $add_empty);
2461
    }
2462
2463
    return self::filter($a);
2464
  }
2465
2466
  /**
2467
   * Checks if the number of Unicode characters in a string are not
2468
   * more than the specified integer.
2469 8
   *
2470
   * @param    string $str      The original string to be checked.
2471 8
   * @param    int    $box_size The size in number of chars to be checked against string.
2472 8
   *
2473
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2474 8
   */
2475
  public static function fits_inside($str, $box_size)
2476 8
  {
2477 2
    return (self::strlen($str) <= $box_size);
2478
  }
2479
2480 8
  /**
2481 1
   * Fixing a broken UTF-8 string.
2482 1
   *
2483 1
   * @param string $str
2484
   *
2485 8
   * @return string
2486
   */
2487
  public static function fix_simple_utf8($str)
2488
  {
2489
    static $brokenUtf8ToUtf8Keys = null;
2490
    static $brokenUtf8ToUtf8Values = null;
2491
2492
    $str = (string)$str;
2493
2494
    if (!isset($str[0])) {
2495 1
      return '';
2496
    }
2497 1
2498
    if ($brokenUtf8ToUtf8Keys === null) {
2499
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2500
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2501
    }
2502
2503
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2504
  }
2505
2506
  /**
2507 1
   * Fix a double (or multiple) encoded UTF8 string.
2508 1
   *
2509 1
   * @param array|string $str
2510 1
   *
2511 1
   * @return string
2512
   */
2513 1
  public static function fix_utf8($str)
2514
  {
2515
    if (is_array($str)) {
2516
2517
      foreach ($str as $k => $v) {
2518
        /** @noinspection AlterInForeachInspection */
2519
        $str[$k] = self::fix_utf8($v);
2520
      }
2521
2522
      return $str;
2523 1
    }
2524
2525 1
    $last = '';
2526
    while ($last <> $str) {
2527 1
      $last = $str;
2528 1
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2528 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2529
    }
2530
2531 1
    return $str;
2532
  }
2533 1
2534 1
  /**
2535 1
   * Get character of a specific character.
2536 1
   *
2537 1
   * @param   string $chr Character.
2538 1
   *
2539 1
   * @return  string 'RTL' or 'LTR'
2540 1
   */
2541 1
  public static function getCharDirection($chr)
2542 1
  {
2543 1
    $c = static::chr_to_decimal($chr);
2544
2545
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2546
      return 'LTR';
2547
    }
2548
2549
    if (0x85e >= $c) {
2550
2551
      if (0x5be === $c ||
2552
          0x5c0 === $c ||
2553
          0x5c3 === $c ||
2554
          0x5c6 === $c ||
2555
          (0x5d0 <= $c && 0x5ea >= $c) ||
2556
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2557
          0x608 === $c ||
2558
          0x60b === $c ||
2559
          0x60d === $c ||
2560
          0x61b === $c ||
2561
          (0x61e <= $c && 0x64a >= $c) ||
2562
          (0x66d <= $c && 0x66f >= $c) ||
2563 1
          (0x671 <= $c && 0x6d5 >= $c) ||
2564 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2565
          (0x6ee <= $c && 0x6ef >= $c) ||
2566
          (0x6fa <= $c && 0x70d >= $c) ||
2567
          0x710 === $c ||
2568
          (0x712 <= $c && 0x72f >= $c) ||
2569
          (0x74d <= $c && 0x7a5 >= $c) ||
2570
          0x7b1 === $c ||
2571
          (0x7c0 <= $c && 0x7ea >= $c) ||
2572
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2573
          0x7fa === $c ||
2574
          (0x800 <= $c && 0x815 >= $c) ||
2575
          0x81a === $c ||
2576
          0x824 === $c ||
2577
          0x828 === $c ||
2578
          (0x830 <= $c && 0x83e >= $c) ||
2579
          (0x840 <= $c && 0x858 >= $c) ||
2580
          0x85e === $c
2581
      ) {
2582
        return 'RTL';
2583
      }
2584
2585
    } elseif (0x200f === $c) {
2586
2587
      return 'RTL';
2588
2589
    } elseif (0xfb1d <= $c) {
2590
2591
      if (0xfb1d === $c ||
2592
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2593
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2594
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2595
          0xfb3e === $c ||
2596
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2597
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2598
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2599
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2600
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2601
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2602
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2603
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2604
          (0xfe76 <= $c && 0xfefc >= $c) ||
2605
          (0x10800 <= $c && 0x10805 >= $c) ||
2606
          0x10808 === $c ||
2607
          (0x1080a <= $c && 0x10835 >= $c) ||
2608
          (0x10837 <= $c && 0x10838 >= $c) ||
2609
          0x1083c === $c ||
2610
          (0x1083f <= $c && 0x10855 >= $c) ||
2611
          (0x10857 <= $c && 0x1085f >= $c) ||
2612
          (0x10900 <= $c && 0x1091b >= $c) ||
2613
          (0x10920 <= $c && 0x10939 >= $c) ||
2614
          0x1093f === $c ||
2615
          0x10a00 === $c ||
2616
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2617
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2618
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2619
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2620
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2621
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2622
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2623 2
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2624
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2625 2
          (0x10b78 <= $c && 0x10b7f >= $c)
2626 2
      ) {
2627 2
        return 'RTL';
2628
      }
2629
    }
2630
2631
    return 'LTR';
2632
  }
2633
2634
  /**
2635
   * get data from "/data/*.ser"
2636
   *
2637
   * @param string $file
2638
   *
2639
   * @return bool|string|array|int false on error
2640 1
   */
2641
  protected static function getData($file)
2642 1
  {
2643 1
    $file = __DIR__ . '/data/' . $file . '.php';
2644
    if (file_exists($file)) {
2645 1
      /** @noinspection PhpIncludeInspection */
2646 1
      return require $file;
2647
    } else {
2648
      return false;
2649
    }
2650 1
  }
2651
2652 1
  /**
2653 1
   * Creates a random string of UTF-8 characters.
2654 1
   *
2655
   * @param    int $len The length of string in characters.
2656 1
   *
2657 1
   * @return   string String consisting of random characters.
2658 1
   */
2659 1
  public static function hash($len = 8)
2660 1
  {
2661
    static $chars = array();
2662 1
    static $chars_len = null;
2663
2664 1
    if ($len <= 0) {
2665 1
      return '';
2666
    }
2667
2668
    // init
2669 1
    self::checkForSupport();
2670 1
2671
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2672 1
      if (self::$support['pcre_utf8'] === true) {
2673
        $chars = array_map(
2674 1
            array(
2675 1
                '\\voku\\helper\\UTF8',
2676 1
                'chr',
2677
            ),
2678 1
            range(48, 79)
2679
        );
2680
2681
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2682
2683
        $chars = array_values(array_filter($chars));
2684
      } else {
2685
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2686
      }
2687
2688
      $chars_len = count($chars);
2689
    }
2690
2691
    $hash = '';
2692
2693
    for (; $len; --$len) {
2694
      $hash .= $chars[mt_rand() % $chars_len];
2695
    }
2696
2697
    return $hash;
2698
  }
2699
2700
  /**
2701
   * Converts hexadecimal U+xxxx code point representation to Integer.
2702
   *
2703
   * INFO: opposite to UTF8::int_to_hex( )
2704
   *
2705
   * @param    string $str The hexadecimal code point representation.
2706
   *
2707
   * @return   int The code point, or 0 on failure.
2708 1
   */
2709
  public static function hex_to_int($str)
2710 1
  {
2711 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2712
      return intval($match[1], 16);
2713 1
    }
2714 1
2715 1
    return 0;
2716 1
  }
2717 1
2718 1
  /**
2719
   * Converts a UTF-8 string to a series of HTML numbered entities.
2720
   *
2721
   * e.g.: &#123;&#39;&#1740;
2722
   *
2723
   * @param  string $str The Unicode string to be encoded as numbered entities.
2724
   *
2725
   * @return string HTML numbered entities.
2726
   */
2727
  public static function html_encode($str)
2728
  {
2729
    return implode(
2730
        array_map(
2731
            array(
2732
                '\\voku\\helper\\UTF8',
2733
                'single_chr_html_encode',
2734
            ),
2735
            self::split($str)
2736
        )
2737
    );
2738
  }
2739
2740
  /**
2741
   * UTF-8 version of html_entity_decode()
2742
   *
2743
   * The reason we are not using html_entity_decode() by itself is because
2744
   * while it is not technically correct to leave out the semicolon
2745
   * at the end of an entity most browsers will still interpret the entity
2746
   * correctly. html_entity_decode() does not convert entities without
2747
   * semicolons, so we are left with our own little solution here. Bummer.
2748
   *
2749
   * Convert all HTML entities to their applicable characters
2750
   *
2751
   * @link http://php.net/manual/en/function.html-entity-decode.php
2752
   *
2753
   * @param string $str      <p>
2754
   *                         The input string.
2755
   *                         </p>
2756
   * @param int    $flags    [optional] <p>
2757
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2758
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2759
   *                         <table>
2760
   *                         Available <i>flags</i> constants
2761
   *                         <tr valign="top">
2762
   *                         <td>Constant Name</td>
2763
   *                         <td>Description</td>
2764
   *                         </tr>
2765
   *                         <tr valign="top">
2766
   *                         <td><b>ENT_COMPAT</b></td>
2767
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2768
   *                         </tr>
2769
   *                         <tr valign="top">
2770
   *                         <td><b>ENT_QUOTES</b></td>
2771
   *                         <td>Will convert both double and single quotes.</td>
2772
   *                         </tr>
2773
   *                         <tr valign="top">
2774
   *                         <td><b>ENT_NOQUOTES</b></td>
2775
   *                         <td>Will leave both double and single quotes unconverted.</td>
2776
   *                         </tr>
2777
   *                         <tr valign="top">
2778
   *                         <td><b>ENT_HTML401</b></td>
2779
   *                         <td>
2780
   *                         Handle code as HTML 4.01.
2781
   *                         </td>
2782
   *                         </tr>
2783
   *                         <tr valign="top">
2784
   *                         <td><b>ENT_XML1</b></td>
2785
   *                         <td>
2786
   *                         Handle code as XML 1.
2787
   *                         </td>
2788
   *                         </tr>
2789
   *                         <tr valign="top">
2790 15
   *                         <td><b>ENT_XHTML</b></td>
2791
   *                         <td>
2792 15
   *                         Handle code as XHTML.
2793
   *                         </td>
2794 15
   *                         </tr>
2795 3
   *                         <tr valign="top">
2796
   *                         <td><b>ENT_HTML5</b></td>
2797
   *                         <td>
2798 15
   *                         Handle code as HTML 5.
2799 4
   *                         </td>
2800
   *                         </tr>
2801
   *                         </table>
2802 15
   *                         </p>
2803 3
   * @param string $encoding [optional] <p>
2804 3
   *                         Encoding to use.
2805 3
   *                         </p>
2806
   *
2807
   * @return string the decoded string.
2808 3
   */
2809
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2810
  {
2811 15
    $str = (string)$str;
2812
2813 15
    if (!isset($str[0])) {
2814
      return '';
2815
    }
2816 15
2817 15
    if (strpos($str, '&') === false) {
2818 15
      return $str;
2819
    }
2820 15
2821
    if ($flags === null) {
2822 15
      if (Bootup::is_php('5.4') === true) {
2823
        $flags = ENT_COMPAT | ENT_HTML5;
2824 15
      } else {
2825
        $flags = ENT_COMPAT;
2826
      }
2827
    }
2828
2829
    do {
2830
      $str_compare = $str;
2831
2832
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2833
2834 12
      // decode numeric & UTF16 two byte entities
2835
      $str = html_entity_decode(
2836 12
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2837
          $flags,
2838 12
          $encoding
2839
      );
2840 12
2841 5
    } while ($str_compare !== $str);
2842
2843
    return $str;
2844 11
  }
2845
2846
  /**
2847
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2848
   *
2849
   * @link http://php.net/manual/en/function.htmlentities.php
2850
   *
2851
   * @param string $str           <p>
2852
   *                              The input string.
2853
   *                              </p>
2854
   * @param int    $flags         [optional] <p>
2855
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2856
   *                              invalid code unit sequences and the used document type. The default is
2857
   *                              ENT_COMPAT | ENT_HTML401.
2858
   *                              <table>
2859
   *                              Available <i>flags</i> constants
2860
   *                              <tr valign="top">
2861
   *                              <td>Constant Name</td>
2862
   *                              <td>Description</td>
2863
   *                              </tr>
2864
   *                              <tr valign="top">
2865
   *                              <td><b>ENT_COMPAT</b></td>
2866
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2867
   *                              </tr>
2868
   *                              <tr valign="top">
2869
   *                              <td><b>ENT_QUOTES</b></td>
2870
   *                              <td>Will convert both double and single quotes.</td>
2871
   *                              </tr>
2872
   *                              <tr valign="top">
2873
   *                              <td><b>ENT_NOQUOTES</b></td>
2874
   *                              <td>Will leave both double and single quotes unconverted.</td>
2875
   *                              </tr>
2876
   *                              <tr valign="top">
2877
   *                              <td><b>ENT_IGNORE</b></td>
2878
   *                              <td>
2879
   *                              Silently discard invalid code unit sequences instead of returning
2880
   *                              an empty string. Using this flag is discouraged as it
2881
   *                              may have security implications.
2882
   *                              </td>
2883
   *                              </tr>
2884
   *                              <tr valign="top">
2885
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2886
   *                              <td>
2887
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2888
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2889
   *                              </td>
2890
   *                              </tr>
2891
   *                              <tr valign="top">
2892
   *                              <td><b>ENT_DISALLOWED</b></td>
2893
   *                              <td>
2894
   *                              Replace invalid code points for the given document type with a
2895
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2896
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2897
   *                              instance, to ensure the well-formedness of XML documents with
2898
   *                              embedded external content.
2899
   *                              </td>
2900
   *                              </tr>
2901
   *                              <tr valign="top">
2902
   *                              <td><b>ENT_HTML401</b></td>
2903
   *                              <td>
2904
   *                              Handle code as HTML 4.01.
2905
   *                              </td>
2906
   *                              </tr>
2907
   *                              <tr valign="top">
2908
   *                              <td><b>ENT_XML1</b></td>
2909
   *                              <td>
2910
   *                              Handle code as XML 1.
2911
   *                              </td>
2912
   *                              </tr>
2913
   *                              <tr valign="top">
2914
   *                              <td><b>ENT_XHTML</b></td>
2915
   *                              <td>
2916
   *                              Handle code as XHTML.
2917
   *                              </td>
2918
   *                              </tr>
2919
   *                              <tr valign="top">
2920
   *                              <td><b>ENT_HTML5</b></td>
2921
   *                              <td>
2922
   *                              Handle code as HTML 5.
2923
   *                              </td>
2924
   *                              </tr>
2925
   *                              </table>
2926
   *                              </p>
2927
   * @param string $encoding      [optional] <p>
2928
   *                              Like <b>htmlspecialchars</b>,
2929
   *                              <b>htmlentities</b> takes an optional third argument
2930
   *                              <i>encoding</i> which defines encoding used in
2931
   *                              conversion.
2932
   *                              Although this argument is technically optional, you are highly
2933
   *                              encouraged to specify the correct value for your code.
2934
   *                              </p>
2935
   * @param bool   $double_encode [optional] <p>
2936
   *                              When <i>double_encode</i> is turned off PHP will not
2937
   *                              encode existing html entities. The default is to convert everything.
2938
   *                              </p>
2939
   *
2940
   *
2941
   * @return string the encoded string.
2942
   * </p>
2943
   * <p>
2944
   * If the input <i>string</i> contains an invalid code unit
2945
   * sequence within the given <i>encoding</i> an empty string
2946
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2947
   * <b>ENT_SUBSTITUTE</b> flags are set.
2948
   */
2949
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2950 2
  {
2951
    return htmlentities($str, $flags, $encoding, $double_encode);
2952 2
  }
2953
2954
  /**
2955
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
2956
   *
2957
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2958
   *
2959
   * @param string $str           <p>
2960
   *                              The string being converted.
2961
   *                              </p>
2962
   * @param int    $flags         [optional] <p>
2963
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2964
   *                              invalid code unit sequences and the used document type. The default is
2965
   *                              ENT_COMPAT | ENT_HTML401.
2966
   *                              <table>
2967
   *                              Available <i>flags</i> constants
2968
   *                              <tr valign="top">
2969
   *                              <td>Constant Name</td>
2970
   *                              <td>Description</td>
2971
   *                              </tr>
2972
   *                              <tr valign="top">
2973
   *                              <td><b>ENT_COMPAT</b></td>
2974
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2975
   *                              </tr>
2976
   *                              <tr valign="top">
2977
   *                              <td><b>ENT_QUOTES</b></td>
2978
   *                              <td>Will convert both double and single quotes.</td>
2979
   *                              </tr>
2980
   *                              <tr valign="top">
2981
   *                              <td><b>ENT_NOQUOTES</b></td>
2982
   *                              <td>Will leave both double and single quotes unconverted.</td>
2983
   *                              </tr>
2984
   *                              <tr valign="top">
2985
   *                              <td><b>ENT_IGNORE</b></td>
2986
   *                              <td>
2987
   *                              Silently discard invalid code unit sequences instead of returning
2988
   *                              an empty string. Using this flag is discouraged as it
2989
   *                              may have security implications.
2990
   *                              </td>
2991
   *                              </tr>
2992
   *                              <tr valign="top">
2993
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2994
   *                              <td>
2995
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2996
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2997
   *                              </td>
2998
   *                              </tr>
2999
   *                              <tr valign="top">
3000
   *                              <td><b>ENT_DISALLOWED</b></td>
3001
   *                              <td>
3002
   *                              Replace invalid code points for the given document type with a
3003
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3004
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3005
   *                              instance, to ensure the well-formedness of XML documents with
3006
   *                              embedded external content.
3007
   *                              </td>
3008
   *                              </tr>
3009
   *                              <tr valign="top">
3010
   *                              <td><b>ENT_HTML401</b></td>
3011
   *                              <td>
3012
   *                              Handle code as HTML 4.01.
3013
   *                              </td>
3014
   *                              </tr>
3015
   *                              <tr valign="top">
3016
   *                              <td><b>ENT_XML1</b></td>
3017
   *                              <td>
3018
   *                              Handle code as XML 1.
3019
   *                              </td>
3020
   *                              </tr>
3021
   *                              <tr valign="top">
3022
   *                              <td><b>ENT_XHTML</b></td>
3023
   *                              <td>
3024
   *                              Handle code as XHTML.
3025
   *                              </td>
3026
   *                              </tr>
3027
   *                              <tr valign="top">
3028
   *                              <td><b>ENT_HTML5</b></td>
3029
   *                              <td>
3030
   *                              Handle code as HTML 5.
3031
   *                              </td>
3032
   *                              </tr>
3033
   *                              </table>
3034
   *                              </p>
3035
   * @param string $encoding      [optional] <p>
3036
   *                              Defines encoding used in conversion.
3037
   *                              </p>
3038
   *                              <p>
3039
   *                              For the purposes of this function, the encodings
3040
   *                              ISO-8859-1, ISO-8859-15,
3041
   *                              UTF-8, cp866,
3042
   *                              cp1251, cp1252, and
3043
   *                              KOI8-R are effectively equivalent, provided the
3044
   *                              <i>string</i> itself is valid for the encoding, as
3045
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3046
   *                              the same positions in all of these encodings.
3047
   *                              </p>
3048
   * @param bool   $double_encode [optional] <p>
3049
   *                              When <i>double_encode</i> is turned off PHP will not
3050
   *                              encode existing html entities, the default is to convert everything.
3051
   *                              </p>
3052
   *
3053
   * @return string The converted string.
3054
   * </p>
3055
   * <p>
3056
   * If the input <i>string</i> contains an invalid code unit
3057
   * sequence within the given <i>encoding</i> an empty string
3058
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3059
   * <b>ENT_SUBSTITUTE</b> flags are set.
3060
   */
3061
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3062 1
  {
3063
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3064 1
  }
3065
3066
  /**
3067
   * checks whether iconv is available on the server
3068
   *
3069
   * @return   bool True if available, False otherwise
3070
   */
3071
  public static function iconv_loaded()
3072 1
  {
3073
    return extension_loaded('iconv') ? true : false;
3074 1
  }
3075
3076
  /**
3077
   * Converts Integer to hexadecimal U+xxxx code point representation.
3078
   *
3079
   * @param    int    $int The integer to be converted to hexadecimal code point.
3080
   * @param    string $pfix
3081
   *
3082
   * @return   string The code point, or empty string on failure.
3083
   */
3084
  public static function int_to_hex($int, $pfix = 'U+')
3085
  {
3086
    if (ctype_digit((string)$int)) {
3087
      $hex = dechex((int)$int);
3088
3089
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3090
3091
      return $pfix . $hex;
3092
    }
3093
3094
    return '';
3095
  }
3096
3097
  /**
3098
   * checks whether intl is available on the server
3099
   *
3100
   * @return   bool True if available, False otherwise
3101
   */
3102
  public static function intl_loaded()
3103 1
  {
3104
    return extension_loaded('intl') ? true : false;
3105 1
  }
3106
3107
  /**
3108
   * alias for "UTF8::is_ascii()"
3109
   *
3110
   * @param string $str
3111
   *
3112
   * @return boolean
3113
   */
3114
  public static function isAscii($str)
3115 1
  {
3116
    return self::is_ascii($str);
3117 1
  }
3118
3119
  /**
3120
   * alias for "UTF8::is_base64"
3121
   *
3122
   * @param string $str
3123
   *
3124
   * @return bool
3125
   */
3126
  public static function isBase64($str)
3127 1
  {
3128
    return self::is_base64($str);
3129 1
  }
3130
3131
  /**
3132
   * alias for "UTF8::is_bom"
3133
   *
3134
   * @param string $utf8_chr
3135
   *
3136
   * @return boolean
3137
   */
3138
  public static function isBom($utf8_chr)
3139
  {
3140
    return self::is_bom($utf8_chr);
3141
  }
3142
3143
  /**
3144
   * Try to check if a string is a json-string...
3145
   *
3146
   * @param $str
3147
   *
3148
   * @return bool
3149
   *
3150
   * @deprecated
3151
   */
3152
  public static function isJson($str)
3153
  {
3154
    $str = (string)$str;
3155
3156
    if (!isset($str[0])) {
3157
      return false;
3158
    }
3159
3160
    if (
3161
        is_object(json_decode($str))
3162
        &&
3163
        json_last_error() == JSON_ERROR_NONE
3164
    ) {
3165
      return true;
3166
    } else {
3167
      return false;
3168
    }
3169
  }
3170
3171
  /**
3172
   * alias for "UTF8::is_utf8"
3173
   *
3174
   * @param string $str
3175
   *
3176
   * @return bool
3177
   */
3178
  public static function isUtf8($str)
3179 16
  {
3180
    return self::is_utf8($str);
3181 16
  }
3182
3183
  /**
3184
   * Checks if a string is 7 bit ASCII.
3185
   *
3186
   * @param    string $str The string to check.
3187
   *
3188
   * @return   bool <strong>true</strong> if it is ASCII<br />
3189
   *                <strong>false</strong> otherwise
3190
   */
3191
  public static function is_ascii($str)
3192 4
  {
3193
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3194 4
  }
3195
3196
  /**
3197
   * Returns true if the string is base64 encoded, false otherwise.
3198
   *
3199
   * @param string $str
3200
   *
3201
   * @return bool Whether or not $str is base64 encoded
3202
   */
3203
  public static function is_base64($str)
3204 1
  {
3205
    $str = (string)$str;
3206 1
3207
    if (!isset($str[0])) {
3208 1
      return false;
3209 1
    }
3210
3211
    if (base64_encode(base64_decode($str, true)) === $str) {
3212 1
      return true;
3213 1
    } else {
3214
      return false;
3215 1
    }
3216
  }
3217
3218
  /**
3219
   * Check if the input is binary... (is look like a hack)
3220
   *
3221
   * @param string $input
3222
   *
3223
   * @return bool
3224
   */
3225
  public static function is_binary($input)
3226 4
  {
3227
3228
    $testLength = strlen($input);
3229 4
3230
    if (
3231
        preg_match('~^[01]+$~', $input)
3232 4
        ||
3233
        substr_count($input, "\x00") > 0
3234 4
        ||
3235 4
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 == 0)
3236 4
    ) {
3237 4
      return true;
3238 3
    } else {
3239
      return false;
3240 4
    }
3241
  }
3242
3243
  /**
3244
   * Check if the file is binary.
3245
   *
3246
   * @param string $file
3247
   *
3248
   * @return boolean
3249
   */
3250
  public static function is_binary_file($file)
3251
  {
3252
    try {
3253
      $fp = fopen($file, 'r');
3254
      $block = fread($fp, 512);
3255
      fclose($fp);
3256
    } catch (\Exception $e) {
3257
      $block = '';
3258
    }
3259
3260
    return self::is_binary($block);
3261
  }
3262
3263
  /**
3264
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3265
   *
3266
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3267
   *
3268
   * @param    string $utf8_chr The input string.
3269
   *
3270
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3271
   */
3272
  public static function is_bom($utf8_chr)
3273 2
  {
3274
    return ($utf8_chr === self::bom());
3275 2
  }
3276
3277
  /**
3278
   * Check if the string is UTF-16.
3279
   *
3280
   * @param string $str
3281
   *
3282
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3283
   */
3284 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3285 2
  {
3286
    if (self::is_binary($str)) {
3287 2
      self::checkForSupport();
3288 2
3289
      $maybeUTF16LE = 0;
3290 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3291 2
      if ($test !== false && strlen($test) > 1) {
3292 2
        $test2 = mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3293 2
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3294 2
        if ($test3 == $test) {
3295 2
          $strChars = self::count_chars($str);
3296 2
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3297 2
            if (in_array($test3char, $strChars, true) === true) {
3298 2
              $maybeUTF16LE++;
3299 1
            }
3300 1
          }
3301 2
        }
3302 2
      }
3303 2
3304
      $maybeUTF16BE = 0;
3305 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3306 2
      if ($test !== false && strlen($test) > 1) {
3307 2
        $test2 = mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3308 2
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3309 2
        if ($test3 == $test) {
3310 2
          $strChars = self::count_chars($str);
3311 2
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3312 2
            if (in_array($test3char, $strChars, true) === true) {
3313 2
              $maybeUTF16BE++;
3314 1
            }
3315 1
          }
3316 2
        }
3317 2
      }
3318 2
3319
      if ($maybeUTF16BE != $maybeUTF16LE) {
3320 2
        if ($maybeUTF16LE > $maybeUTF16BE) {
3321 1
          return 1;
3322 1
        } else {
3323
          return 2;
3324 1
        }
3325
      }
3326
3327
    }
3328 2
3329
    return false;
3330 2
  }
3331
3332
  /**
3333
   * Check if the string is UTF-32.
3334
   *
3335
   * @param string $str
3336
   *
3337
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3338
   */
3339 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3340 2
  {
3341
    if (self::is_binary($str)) {
3342 2
      self::checkForSupport();
3343 2
3344
      $maybeUTF32LE = 0;
3345 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3346 2
      if ($test !== false && strlen($test) > 1) {
3347 2
        $test2 = mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3348 2
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3349 2
        if ($test3 == $test) {
3350 2
          $strChars = self::count_chars($str);
3351 2
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3352 2
            if (in_array($test3char, $strChars, true) === true) {
3353 2
              $maybeUTF32LE++;
3354
            }
3355
          }
3356 2
        }
3357 2
      }
3358 2
3359
      $maybeUTF32BE = 0;
3360 2
      $test = mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3361 2
      if ($test !== false && strlen($test) > 1) {
3362 2
        $test2 = mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3363 1
        $test3 = mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3364 1
        if ($test3 == $test) {
3365 1
          $strChars = self::count_chars($str);
3366 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3367 1
            if (in_array($test3char, $strChars, true) === true) {
3368 1
              $maybeUTF32BE++;
3369
            }
3370
          }
3371 1
        }
3372 1
      }
3373 1
3374
      if ($maybeUTF32BE != $maybeUTF32LE) {
3375 2
        if ($maybeUTF32LE > $maybeUTF32BE) {
3376
          return 1;
3377
        } else {
3378
          return 2;
3379
        }
3380
      }
3381
3382
    }
3383 2
3384
    return false;
3385 2
  }
3386
3387
  /**
3388
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3389
   *
3390
   * @see    http://hsivonen.iki.fi/php-utf8/
3391
   *
3392
   * @param    string $str The string to be checked.
3393
   *
3394
   * @return   bool
3395
   */
3396
  public static function is_utf8($str)
3397 34
  {
3398
    $str = (string)$str;
3399 34
3400
    if (!isset($str[0])) {
3401 34
      return true;
3402 3
    }
3403
3404
    if (self::pcre_utf8_support() !== true) {
3405 32
3406
      // If even just the first character can be matched, when the /u
3407
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3408
      // invalid, nothing at all will match, even if the string contains
3409
      // some valid sequences
3410
      return (preg_match('/^.{1}/us', $str, $ar) == 1);
3411
3412
    } else {
3413
3414
      $mState = 0; // cached expected number of octets after the current octet
3415 32
      // until the beginning of the next UTF8 character sequence
3416
      $mUcs4 = 0; // cached Unicode character
3417 32
      $mBytes = 1; // cached expected number of octets in the current sequence
3418 32
      $len = strlen($str);
3419 32
3420
      /** @noinspection ForeachInvariantsInspection */
3421
      for ($i = 0; $i < $len; $i++) {
3422 32
        $in = ord($str[$i]);
3423 32
        if ($mState == 0) {
3424 32
          // When mState is zero we expect either a US-ASCII character or a
3425
          // multi-octet sequence.
3426
          if (0 == (0x80 & $in)) {
3427 32
            // US-ASCII, pass straight through.
3428
            $mBytes = 1;
3429 30 View Code Duplication
          } elseif (0xC0 == (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3430 32
            // First octet of 2 octet sequence.
3431
            $mUcs4 = $in;
3432 28
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3433 28
            $mState = 1;
3434 28
            $mBytes = 2;
3435 28
          } elseif (0xE0 == (0xF0 & $in)) {
3436 30
            // First octet of 3 octet sequence.
3437
            $mUcs4 = $in;
3438 13
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3439 13
            $mState = 2;
3440 13
            $mBytes = 3;
3441 13 View Code Duplication
          } elseif (0xF0 == (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3442 23
            // First octet of 4 octet sequence.
3443
            $mUcs4 = $in;
3444 6
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3445 6
            $mState = 3;
3446 6
            $mBytes = 4;
3447 6
          } elseif (0xF8 == (0xFC & $in)) {
3448 12
            /* First octet of 5 octet sequence.
3449
            *
3450
            * This is illegal because the encoded codepoint must be either
3451
            * (a) not the shortest form or
3452
            * (b) outside the Unicode range of 0-0x10FFFF.
3453
            * Rather than trying to resynchronize, we will carry on until the end
3454
            * of the sequence and let the later error handling code catch it.
3455
            */
3456
            $mUcs4 = $in;
3457 3
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3458 3
            $mState = 4;
3459 3
            $mBytes = 5;
3460 3 View Code Duplication
          } elseif (0xFC == (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3461 7
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3462
            $mUcs4 = $in;
3463 3
            $mUcs4 = ($mUcs4 & 1) << 30;
3464 3
            $mState = 5;
3465 3
            $mBytes = 6;
3466 3
          } else {
3467 3
            /* Current octet is neither in the US-ASCII range nor a legal first
3468
             * octet of a multi-octet sequence.
3469
             */
3470
            return false;
3471 3
          }
3472
        } else {
3473 32
          // When mState is non-zero, we expect a continuation of the multi-octet
3474
          // sequence
3475
          if (0x80 == (0xC0 & $in)) {
3476 30
            // Legal continuation.
3477
            $shift = ($mState - 1) * 6;
3478 28
            $tmp = $in;
3479 28
            $tmp = ($tmp & 0x0000003F) << $shift;
3480 28
            $mUcs4 |= $tmp;
3481 28
            /**
3482
             * End of the multi-octet sequence. mUcs4 now contains the final
3483
             * Unicode code point to be output
3484
             */
3485
            if (0 == --$mState) {
3486 28
              /*
3487
              * Check for illegal sequences and code points.
3488
              */
3489
              // From Unicode 3.1, non-shortest form is illegal
3490
              if (
3491
                  ((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
3492 28
                  ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
3493 28
                  ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
3494 28
                  (4 < $mBytes) ||
3495 28
                  // From Unicode 3.2, surrogate characters are illegal.
3496
                  (($mUcs4 & 0xFFFFF800) == 0xD800) ||
3497 28
                  // Code points outside the Unicode range are illegal.
3498
                  ($mUcs4 > 0x10FFFF)
3499 28
              ) {
3500 28
                return false;
3501 5
              }
3502
              // initialize UTF8 cache
3503
              $mState = 0;
3504 28
              $mUcs4 = 0;
3505 28
              $mBytes = 1;
3506 28
            }
3507 28
          } else {
3508 28
            /**
3509
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3510
             * Incomplete multi-octet sequence.
3511
             */
3512
            return false;
3513 13
          }
3514
        }
3515
      }
3516 32
3517
      return true;
3518 14
    }
3519
  }
3520
3521
  /**
3522
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3523
   * Decodes a JSON string
3524
   *
3525
   * @link http://php.net/manual/en/function.json-decode.php
3526
   *
3527
   * @param string $json    <p>
3528
   *                        The <i>json</i> string being decoded.
3529
   *                        </p>
3530
   *                        <p>
3531
   *                        This function only works with UTF-8 encoded strings.
3532
   *                        </p>
3533
   *                        <p>PHP implements a superset of
3534
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3535
   *                        only supports these values when they are nested inside an array or an object.
3536
   *                        </p>
3537
   * @param bool   $assoc   [optional] <p>
3538
   *                        When <b>TRUE</b>, returned objects will be converted into
3539
   *                        associative arrays.
3540
   *                        </p>
3541
   * @param int    $depth   [optional] <p>
3542
   *                        User specified recursion depth.
3543
   *                        </p>
3544
   * @param int    $options [optional] <p>
3545
   *                        Bitmask of JSON decode options. Currently only
3546
   *                        <b>JSON_BIGINT_AS_STRING</b>
3547
   *                        is supported (default is to cast large integers as floats)
3548
   *                        </p>
3549
   *
3550
   * @return mixed the value encoded in <i>json</i> in appropriate
3551
   * PHP type. Values true, false and
3552
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3553
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3554
   * <i>json</i> cannot be decoded or if the encoded
3555
   * data is deeper than the recursion limit.
3556
   */
3557
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3558 2
  {
3559
    $json = self::filter($json);
3560 2
3561
    if (Bootup::is_php('5.4') === true) {
3562 2
      $json = json_decode($json, $assoc, $depth, $options);
3563 2
    } else {
3564 2
      $json = json_decode($json, $assoc, $depth);
3565
    }
3566
3567
    return $json;
3568 2
  }
3569
3570
  /**
3571
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3572
   * Returns the JSON representation of a value
3573
   *
3574
   * @link http://php.net/manual/en/function.json-encode.php
3575
   *
3576
   * @param mixed $value   <p>
3577
   *                       The <i>value</i> being encoded. Can be any type except
3578
   *                       a resource.
3579
   *                       </p>
3580
   *                       <p>
3581
   *                       All string data must be UTF-8 encoded.
3582
   *                       </p>
3583
   *                       <p>PHP implements a superset of
3584
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3585
   *                       only supports these values when they are nested inside an array or an object.
3586
   *                       </p>
3587
   * @param int   $options [optional] <p>
3588
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3589
   *                       <b>JSON_HEX_TAG</b>,
3590
   *                       <b>JSON_HEX_AMP</b>,
3591
   *                       <b>JSON_HEX_APOS</b>,
3592
   *                       <b>JSON_NUMERIC_CHECK</b>,
3593
   *                       <b>JSON_PRETTY_PRINT</b>,
3594
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3595
   *                       <b>JSON_FORCE_OBJECT</b>,
3596
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3597
   *                       constants is described on
3598
   *                       the JSON constants page.
3599
   *                       </p>
3600
   * @param int   $depth   [optional] <p>
3601
   *                       Set the maximum depth. Must be greater than zero.
3602
   *                       </p>
3603
   *
3604
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3605
   */
3606
  public static function json_encode($value, $options = 0, $depth = 512)
3607 1
  {
3608
    $value = self::filter($value);
3609 1
3610
    if (Bootup::is_php('5.5')) {
3611 1
      $json = json_encode($value, $options, $depth);
3612
    } else {
3613
      $json = json_encode($value, $options);
3614 1
    }
3615
3616
    return $json;
3617 1
  }
3618
3619
  /**
3620
   * Makes string's first char lowercase.
3621
   *
3622
   * @param    string $str The input string
3623
   *
3624
   * @return   string The resulting string
3625
   */
3626
  public static function lcfirst($str)
3627 6
  {
3628
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3629 6
  }
3630
3631
  /**
3632
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3633
   *
3634
   * WARNING: This is much slower then "ltrim()" !!!!
3635
   *
3636
   * @param    string $str   The string to be trimmed
3637
   * @param    string $chars Optional characters to be stripped
3638
   *
3639
   * @return   string The string with unwanted characters stripped from the left
3640
   */
3641 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3642 24
  {
3643
    $str = (string)$str;
3644 24
3645
    if (!isset($str[0])) {
3646 24
      return '';
3647 2
    }
3648
3649
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3650 23
3651
    return preg_replace("/^{$chars}+/u", '', $str);
3652 23
  }
3653
3654
  /**
3655
   * Returns the UTF-8 character with the maximum code point in the given data.
3656
   *
3657
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3658
   *
3659
   * @return   string The character with the highest code point than others.
3660
   */
3661 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3662 1
  {
3663
    if (is_array($arg)) {
3664 1
      $arg = implode($arg);
3665
    }
3666
3667
    return self::chr(max(self::codepoints($arg)));
3668 1
  }
3669
3670
  /**
3671
   * Calculates and returns the maximum number of bytes taken by any
3672
   * UTF-8 encoded character in the given string.
3673
   *
3674
   * @param    string $str The original Unicode string.
3675
   *
3676
   * @return   int An array of byte lengths of each character.
3677
   */
3678
  public static function max_chr_width($str)
3679 1
  {
3680
    $bytes = self::chr_size_list($str);
3681 1
    if (count($bytes) > 0) {
3682 1
      return (int)max($bytes);
3683 1
    } else {
3684
      return 0;
3685 1
    }
3686
  }
3687
3688
  /**
3689
   * checks whether mbstring is available on the server
3690
   *
3691
   * @return   bool True if available, False otherwise
3692
   */
3693
  public static function mbstring_loaded()
3694 2
  {
3695
    $return = extension_loaded('mbstring');
3696 2
3697
    if ($return === true) {
3698 2
      mb_internal_encoding('UTF-8');
3699 2
    }
3700 2
3701
    return $return;
3702 2
  }
3703
3704
  /**
3705
   * Returns the UTF-8 character with the minimum code point in the given data.
3706
   *
3707
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3708
   *
3709
   * @return   string The character with the lowest code point than others.
3710
   */
3711 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3712 1
  {
3713
    if (is_array($arg)) {
3714 1
      $arg = implode($arg);
3715
    }
3716
3717
    return self::chr(min(self::codepoints($arg)));
3718 1
  }
3719
3720
  /**
3721
   * Normalize the encoding-name input.
3722
   *
3723
   * @param string $encodingLabel e.g.: ISO, UTF8, WINDOWS-1251 etc.
3724
   *
3725
   * @return string e.g.: ISO-8859-1, UTF-8, ISO-8859-5 etc.
3726
   */
3727
  public static function normalizeEncoding($encodingLabel)
3728 13
  {
3729
    $encoding = strtoupper($encodingLabel);
3730 13
3731
    $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3732 13
3733
    $equivalences = array(
3734
        'ISO88591'    => 'ISO-8859-1',
3735 13
        'ISO8859'     => 'ISO-8859-1',
3736 13
        'ISO'         => 'ISO-8859-1',
3737 13
        'LATIN1'      => 'ISO-8859-1',
3738 13
        'LATIN'       => 'ISO-8859-1',
3739 13
        'UTF16'       => 'UTF-16',
3740 13
        'UTF32'       => 'UTF-32',
3741 13
        'UTF8'        => 'UTF-8',
3742 13
        'UTF'         => 'UTF-8',
3743 13
        'UTF7'        => 'UTF-7',
3744 13
        'WIN1252'     => 'ISO-8859-1',
3745 13
        'WINDOWS1252' => 'ISO-8859-1',
3746 13
        'WINDOWS1251' => 'ISO-8859-5',
3747 13
    );
3748 13
3749
    if (empty($equivalences[$encoding])) {
3750 13
      return $encodingLabel;
3751 2
    }
3752
3753
    return $equivalences[$encoding];
3754 13
  }
3755
3756
  /**
3757
   * Normalize MS Word special characters.
3758
   *
3759
   * @param string $str The string to be normalized.
3760
   *
3761
   * @return string
3762
   */
3763
  public static function normalize_msword($str)
3764 2
  {
3765
    static $utf8MSWordKeys = null;
3766 2
    static $utf8MSWordValues = null;
3767 2
3768
    if ($utf8MSWordKeys === null) {
3769 2
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3770 1
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3771 1
    }
3772 1
3773
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3774 2
  }
3775
3776
  /**
3777
   * Normalize the whitespace.
3778
   *
3779
   * @param string $str                     The string to be normalized.
3780
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3781
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3782
   *
3783
   * @return string
3784
   */
3785
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3786 8
  {
3787
    static $whitespaces = array();
3788 8
    static $bidiUniCodeControls = null;
3789 8
3790
    $cacheKey = (int)$keepNonBreakingSpace;
3791 8
3792
    if (!isset($whitespaces[$cacheKey])) {
3793 8
3794
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3795 2
3796
      if ($keepNonBreakingSpace === true) {
3797 2
        /** @noinspection OffsetOperationsInspection */
3798
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3799 1
      }
3800 1
3801
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3802 2
    }
3803 2
3804
    if ($keepBidiUnicodeControls === false) {
3805 8
      if ($bidiUniCodeControls === null) {
3806 8
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3807 1
      }
3808 1
3809
      $str = str_replace($bidiUniCodeControls, '', $str);
3810 8
    }
3811 8
3812
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3813 8
  }
3814
3815
  /**
3816
   * Format a number with grouped thousands.
3817
   *
3818
   * @param float  $number
3819
   * @param int    $decimals
3820
   * @param string $dec_point
3821
   * @param string $thousands_sep
3822
   *
3823
   * @return string
3824
   */
3825
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3826 1
  {
3827
    if (Bootup::is_php('5.4') === true) {
3828 1
      if (isset($thousands_sep[1]) || isset($dec_point[1])) {
3829 1
        return str_replace(
3830
            array(
3831
                '.',
3832
                ',',
3833
            ),
3834
            array(
3835
                $dec_point,
3836
                $thousands_sep,
3837
            ),
3838
            number_format($number, $decimals, '.', ',')
3839
        );
3840
      }
3841
    }
3842 1
3843
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3844 1
  }
3845
3846
  /**
3847
   * Calculates Unicode code point of the given UTF-8 encoded character.
3848
   *
3849
   * @param    string $s The character of which to calculate code point.
3850
   *
3851
   * @return   int Unicode code point of the given character,<br />
3852
   *           0 on invalid UTF-8 byte sequence.
3853
   */
3854
  public static function ord($s)
3855 15
  {
3856
    if (!$s) {
3857 15
      return 0;
3858 2
    }
3859
3860
    $s = unpack('C*', substr($s, 0, 4));
3861 14
    $a = $s ? $s[1] : 0;
3862 14
3863
    if (0xF0 <= $a && isset($s[4])) {
3864 14
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3865 2
    }
3866
3867
    if (0xE0 <= $a && isset($s[3])) {
3868 13
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3869 7
    }
3870
3871
    if (0xC0 <= $a && isset($s[2])) {
3872 12
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3873 8
    }
3874
3875
    return $a;
3876 10
  }
3877
3878
  /**
3879
   * Parses the string into variables.
3880
   *
3881
   * WARNING: This differs from parse_str() by returning the results
3882
   *    instead of placing them in the local scope!
3883
   *
3884
   * @link http://php.net/manual/en/function.parse-str.php
3885
   *
3886
   * @param string $str     <p>
3887
   *                        The input string.
3888
   *                        </p>
3889
   * @param array  $result  <p>
3890
   *                        If the second parameter arr is present,
3891
   *                        variables are stored in this variable as array elements instead.
3892
   *                        </p>
3893
   *
3894
   * @return void
3895
   */
3896
  public static function parse_str($str, &$result)
3897 1
  {
3898
    // init
3899
    self::checkForSupport();
3900 1
3901
    $str = self::filter($str);
3902 1
3903
    mb_parse_str($str, $result);
3904 1
  }
3905 1
3906
  /**
3907
   * checks if \u modifier is available that enables Unicode support in PCRE.
3908
   *
3909
   * @return   bool True if support is available, false otherwise
3910
   */
3911
  public static function pcre_utf8_support()
3912 33
  {
3913
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3914
    return (bool)@preg_match('//u', '');
3915 33
  }
3916
3917
  /**
3918
   * Create an array containing a range of UTF-8 characters.
3919
   *
3920
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3921
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3922
   *
3923
   * @return   array
3924
   */
3925
  public static function range($var1, $var2)
3926 1
  {
3927
    if (!$var1 || !$var2) {
3928 1
      return array();
3929 1
    }
3930
3931 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3932 1
      $start = (int)$var1;
3933
    } elseif (ctype_xdigit($var1)) {
3934 1
      $start = (int)self::hex_to_int($var1);
3935
    } else {
3936
      $start = self::ord($var1);
3937 1
    }
3938
3939
    if (!$start) {
3940 1
      return array();
3941
    }
3942
3943 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3944 1
      $end = (int)$var2;
3945
    } elseif (ctype_xdigit($var2)) {
3946 1
      $end = (int)self::hex_to_int($var2);
3947
    } else {
3948
      $end = self::ord($var2);
3949 1
    }
3950
3951
    if (!$end) {
3952 1
      return array();
3953
    }
3954
3955
    return array_map(
3956 1
        array(
3957
            '\\voku\\helper\\UTF8',
3958 1
            'chr',
3959 1
        ),
3960 1
        range($start, $end)
3961 1
    );
3962 1
  }
3963
3964
  /**
3965
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3966
   *
3967
   * @param string $str
3968
   *
3969
   * @return string
3970
   */
3971
  public static function removeBOM($str = '')
3972
  {
3973
    // INFO: https://en.wikipedia.org/wiki/Byte_order_mark
3974
3975 7
    if (0 === strpos($str, "\xef\xbb\xbf")) { // UTF-8 BOM
3976
      $str = substr($str, 3);
3977 7
    } elseif (0 === strpos($str, '')) { // UTF-8 BOM as "Windows-1252"
3978
      $str = substr($str, 6); // INFO: one char has (maybe) more then one byte ...
3979
    } elseif (0 === strpos($str, "\x00\x00\xfe\xff")) { // UTF-32 (BE) BOM
3980 7
      $str = substr($str, 4);
3981 2
    } elseif (0 === strpos($str, "\xff\xfe\x00\x00")) { // UTF-32 (LE) BOM
3982 2
      $str = substr($str, 4);
3983 7
    } elseif (0 === strpos($str, "\xfe\xff")) { // UTF-16 (BE) BOM
3984
      $str = substr($str, 2);
3985 7
    } elseif (0 === strpos($str, 'þÿ')) { // UTF-16 (BE) BOM as "Windows-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3986
      $str = substr($str, 4);
3987
    } elseif (0 === strpos($str, "\xff\xfe")) { // UTF-16 (LE)
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3988 3
      $str = substr($str, 2);
3989 1
    } elseif (0 === strpos($str, 'ÿþ')) { // UTF-16 (LE) as "Windows-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
42% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3990 1
      $str = substr($str, 4);
3991
    }
3992
3993
    return $str;
3994 3
  }
3995 1
3996 1
  /**
3997 3
   * Removes duplicate occurrences of a string in another string.
3998
   *
3999 7
   * @param    string       $str  The base string
4000
   * @param    string|array $what String to search for in the base string
4001
   *
4002 3
   * @return   string The result string with removed duplicates
4003 1
   */
4004 1
  public static function remove_duplicates($str, $what = ' ')
4005
  {
4006
    if (is_string($what)) {
4007
      $what = array($what);
4008 3
    }
4009 1
4010 1
    if (is_array($what)) {
4011 3
      foreach ($what as $item) {
4012
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4013 7
      }
4014
    }
4015
4016
    return $str;
4017
  }
4018
4019
  /**
4020
   * Remove Invisible Characters
4021
   *
4022
   * This prevents sandwiching null characters
4023
   * between ascii characters, like Java\0script.
4024 1
   *
4025
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4026 1
   *
4027 1
   * @param  string $str
4028 1
   * @param  bool   $url_encoded
4029
   *
4030 1
   * @return  string
4031 1
   */
4032 1
  public static function remove_invisible_characters($str, $url_encoded = true)
4033 1
  {
4034 1
    // init
4035
    $non_displayables = array();
4036 1
4037
    // every control character except newline (dec 10),
4038
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4039
    if ($url_encoded) {
4040
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4041
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4042
    }
4043
4044
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4045
4046
    do {
4047
      $str = preg_replace($non_displayables, '', $str, -1, $count);
4048
    } while ($count !== 0);
4049
4050
    return $str;
4051
  }
4052 36
4053
  /**
4054
   * replace diamond question mark (�)
4055 36
   *
4056
   * @param string $str
4057
   * @param string $unknown
4058
   *
4059 36
   * @return string
4060 36
   */
4061 36
  public static function replace_diamond_question_mark($str, $unknown = '?')
4062 36
  {
4063
    return str_replace(
4064 36
        array(
4065
            "\xEF\xBF\xBD",
4066
            '�',
4067 36
        ),
4068 36
        array(
4069
            $unknown,
4070 36
            $unknown,
4071
        ),
4072
        $str
4073
    );
4074
  }
4075
4076
  /**
4077
   * Strip whitespace or other characters from end of a UTF-8 string.
4078
   *
4079
   * WARNING: This is much slower then "rtrim()" !!!!
4080
   *
4081 36
   * @param    string $str   The string to be trimmed
4082
   * @param    string $chars Optional characters to be stripped
4083 36
   *
4084
   * @return   string The string with unwanted characters stripped from the right
4085 36
   */
4086 36 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4087 36
  {
4088
    $str = (string)$str;
4089 36
4090 36
    if (!isset($str[0])) {
4091 36
      return '';
4092
    }
4093 36
4094
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4095
4096
    return preg_replace("/{$chars}+$/u", '', $str);
4097
  }
4098
4099
  /**
4100
   * rxClass
4101
   *
4102
   * @param string $s
4103
   * @param string $class
4104
   *
4105
   * @return string
4106 23
   */
4107
  protected static function rxClass($s, $class = '')
4108 23
  {
4109
    static $rxClassCache = array();
4110 23
4111 5
    $cacheKey = $s . $class;
4112
4113
    if (isset($rxClassCache[$cacheKey])) {
4114 19
      return $rxClassCache[$cacheKey];
4115
    }
4116 19
4117
    $class = array($class);
4118
4119
    /** @noinspection SuspiciousLoopInspection */
4120
    foreach (self::str_split($s) as $s) {
4121
      if ('-' === $s) {
4122
        $class[0] = '-' . $class[0];
4123
      } elseif (!isset($s[2])) {
4124
        $class[0] .= preg_quote($s, '/');
4125
      } elseif (1 === self::strlen($s)) {
4126
        $class[0] .= $s;
4127 40
      } else {
4128
        $class[] = $s;
4129 40
      }
4130
    }
4131 40
4132
    $class[0] = '[' . $class[0] . ']';
4133 40
4134 30
    if (1 === count($class)) {
4135
      $return = $class[0];
4136
    } else {
4137 16
      $return = '(?:' . implode('|', $class) . ')';
4138
    }
4139 16
4140 15
    $rxClassCache[$cacheKey] = $return;
4141
4142 15
    return $return;
4143 14
  }
4144 15
4145 1
  /**
4146 1
   * Echo native UTF8-Support libs, e.g. for debugging.
4147
   */
4148
  public static function showSupport()
4149 16
  {
4150
    foreach (self::$support as $utf8Support) {
4151 16
      echo $utf8Support . "\n<br>";
4152
    }
4153 16
  }
4154 16
4155 16
  /**
4156
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4157
   *
4158
   * @param    string $chr The Unicode character to be encoded as numbered entity.
4159 16
   *
4160
   * @return   string The HTML numbered entity.
4161 16
   */
4162
  public static function single_chr_html_encode($chr)
4163
  {
4164
    if (!$chr) {
4165
      return '';
4166
    }
4167
4168
    return '&#' . self::ord($chr) . ';';
4169
  }
4170
4171
  /**
4172
   * Convert a string to an array of Unicode characters.
4173
   *
4174
   * @param    string  $str       The string to split into array.
4175
   * @param    int     $length    Max character length of each array element.
4176
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4177
   *
4178
   * @return   array An array containing chunks of the string.
4179
   */
4180
  public static function split($str, $length = 1, $cleanUtf8 = false)
4181 2
  {
4182
    $str = (string)$str;
4183 2
4184 1
    if (!isset($str[0])) {
4185
      return array();
4186
    }
4187 2
4188
    // init
4189
    self::checkForSupport();
4190
    $str = (string)$str;
4191
    $ret = array();
4192
4193
    if (self::$support['pcre_utf8'] === true) {
4194
4195
      if ($cleanUtf8 === true) {
4196
        $str = self::clean($str);
4197
      }
4198
4199 25
      preg_match_all('/./us', $str, $retArray);
4200
      if (isset($retArray[0])) {
4201 25
        $ret = $retArray[0];
4202
      }
4203 25
      unset($retArray);
4204 5
4205
    } else {
4206
4207
      // fallback
4208 24
4209 24
      $len = strlen($str);
4210 24
4211
      /** @noinspection ForeachInvariantsInspection */
4212 24
      for ($i = 0; $i < $len; $i++) {
4213
        if (($str[$i] & "\x80") === "\x00") {
4214 24
          $ret[] = $str[$i];
4215
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4216
          if (($str[$i + 1] & "\xC0") === "\x80") {
4217
            $ret[] = $str[$i] . $str[$i + 1];
4218 24
4219 24
            $i++;
4220 24
          }
4221 24 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4222 24
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4223
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4224 24
4225
            $i += 2;
4226
          }
4227
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4228 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4229
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4230
4231
            $i += 3;
4232
          }
4233
        }
4234
      }
4235
    }
4236
4237
    if ($length > 1) {
4238
      $ret = array_chunk($ret, $length);
4239
4240
      $ret = array_map('implode', $ret);
4241
    }
4242
4243
    if (isset($ret[0]) && $ret[0] === '') {
4244
      return array();
4245
    }
4246
4247
    return $ret;
4248
  }
4249
4250
  /**
4251
   * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4252
   *
4253
   * @param string $str
4254
   *
4255
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4256 24
   *                      otherwise it will return false.
4257 5
   */
4258
  public static function str_detect_encoding($str)
4259 5
  {
4260 5
4261
    //
4262 24
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4263
    //
4264
4265
    if (self::is_binary($str)) {
4266 24
      if (self::is_utf16($str) === 1) {
4267
        return 'UTF-16LE';
4268
      } elseif (self::is_utf16($str) === 2) {
4269
        return 'UTF-16BE';
4270
      } elseif (self::is_utf32($str) === 1) {
4271
        return 'UTF-32LE';
4272
      } elseif (self::is_utf32($str) === 2) {
4273
        return 'UTF-32BE';
4274
      }
4275
    }
4276
4277 3
    //
4278
    // 2.) simple check for ASCII chars
4279
    //
4280
4281
    if (self::is_ascii($str) === true) {
4282
      return 'ASCII';
4283
    }
4284 3
4285 2
    //
4286 1
    // 3.) check via "mb_detect_encoding()"
4287 2
    //
4288 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
4289 2
4290
    $detectOrder = array(
4291 2
        'UTF-8',
4292
        'windows-1251',
4293
        'ISO-8859-1',
4294 2
        'ASCII',
4295
    );
4296
4297
    self::checkForSupport();
4298
4299
    $encoding = mb_detect_encoding($str, $detectOrder, true);
4300 3
    if (
4301 1
        $encoding
4302
        &&
4303
        (
4304
            $encoding !== 'UTF-8'
4305
            ||
4306
            ($encoding === 'UTF-8' && self::is_utf8($str) === true)
4307
        )
4308
    ) {
4309
      return $encoding;
4310 3
    }
4311 3
4312 3
    //
4313 3
    // 4.) check via "iconv()"
4314 3
    //
4315 3
4316 3
    $md5 = md5($str);
4317 3
    foreach (self::$iconvEncoding as $encodingTmp) {
4318
      # INFO: //IGNORE and //TRANSLIT still throw notice
4319
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4320 3
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4321 3
        return $encodingTmp;
4322 3
      }
4323 3
    }
4324
4325
    return false;
4326
  }
4327
4328
  /**
4329
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4330
   *
4331
   * @link  http://php.net/manual/en/function.str-ireplace.php
4332
   *
4333
   * @param mixed $search  <p>
4334
   *                       Every replacement with search array is
4335
   *                       performed on the result of previous replacement.
4336
   *                       </p>
4337
   * @param mixed $replace <p>
4338
   *                       </p>
4339
   * @param mixed $subject <p>
4340
   *                       If subject is an array, then the search and
4341
   *                       replace is performed with every entry of
4342
   *                       subject, and the return value is an array as
4343
   *                       well.
4344
   *                       </p>
4345
   * @param int   $count   [optional] <p>
4346
   *                       The number of matched and replaced needles will
4347
   *                       be returned in count which is passed by
4348
   *                       reference.
4349
   *                       </p>
4350
   *
4351
   * @return mixed a string or an array of replacements.
4352
   * @since 5.0
4353 13
   */
4354
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4355 13
  {
4356
    $search = (array)$search;
4357
4358 13
    /** @noinspection AlterInForeachInspection */
4359 13
    foreach ($search as &$s) {
4360 1
      if ('' === $s .= '') {
4361 1
        $s = '/^(?<=.)$/';
4362 12
      } else {
4363
        $s = '/' . preg_quote($s, '/') . '/ui';
4364 13
      }
4365
    }
4366 13
4367 13
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4368
    $count = $replace;
4369 13
4370
    return $subject;
4371
  }
4372
4373
  /**
4374
   * Limit the number of characters in a string, but also after the next word.
4375
   *
4376
   * @param  string $str
4377
   * @param  int    $length
4378
   * @param  string $strAddOn
4379
   *
4380
   * @return string
4381 1
   */
4382
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4383 1
  {
4384
    if (!isset($str[0])) {
4385
      return '';
4386
    }
4387 1
4388
    $length = (int)$length;
4389 1
4390
    if (self::strlen($str) <= $length) {
4391
      return $str;
4392
    }
4393 1
4394 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4395
      return self::substr($str, 0, $length - 1) . $strAddOn;
4396
    }
4397 1
4398 1
    $str = self::substr($str, 0, $length);
4399 1
    $array = explode(' ', $str);
4400 1
    array_pop($array);
4401
    $new_str = implode(' ', $array);
4402 1
4403
    if ($new_str === '') {
4404
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4405 1
    } else {
4406
      $str = $new_str . $strAddOn;
4407
    }
4408 1
4409
    return $str;
4410
  }
4411
4412
  /**
4413
   * Pad a UTF-8 string to given length with another string.
4414
   *
4415
   * @param    string $input      The input string
4416
   * @param    int    $pad_length The length of return string
4417
   * @param    string $pad_string String to use for padding the input string
4418
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4419
   *
4420
   * @return   string Returns the padded string
4421 2
   */
4422
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4423 2
  {
4424
    $input_length = self::strlen($input);
4425 2
4426 2
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4427
      $ps_length = self::strlen($pad_string);
4428 2
4429
      $diff = $pad_length - $input_length;
4430
4431 2
      switch ($pad_type) {
4432 2 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4433 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4434 2
          $pre = self::substr($pre, 0, $diff);
4435 2
          $post = '';
4436
          break;
4437 2
4438 2
        case STR_PAD_BOTH:
4439 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4440 2
          $pre = self::substr($pre, 0, (int)$diff / 2);
4441 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4442 2
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4443
          break;
4444 2
4445 2
        case STR_PAD_RIGHT:
4446 2 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4447 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4448 2
          $post = self::substr($post, 0, $diff);
4449 2
          $pre = '';
4450
      }
4451 2
4452
      return $pre . $input . $post;
4453
    }
4454 2
4455
    return $input;
4456
  }
4457
4458
  /**
4459
   * Repeat a string.
4460
   *
4461
   * @param string $input      <p>
4462
   *                           The string to be repeated.
4463
   *                           </p>
4464
   * @param int    $multiplier <p>
4465
   *                           Number of time the input string should be
4466
   *                           repeated.
4467
   *                           </p>
4468
   *                           <p>
4469
   *                           multiplier has to be greater than or equal to 0.
4470
   *                           If the multiplier is set to 0, the function
4471
   *                           will return an empty string.
4472
   *                           </p>
4473
   *
4474
   * @return string the repeated string.
4475 1
   */
4476
  public static function str_repeat($input, $multiplier)
4477 1
  {
4478
    $input = self::filter($input);
4479 1
4480
    return str_repeat($input, $multiplier);
4481
  }
4482
4483
  /**
4484
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4485
   *
4486
   * (PHP 4, PHP 5)<br/>
4487
   * Replace all occurrences of the search string with the replacement string
4488
   *
4489
   * @link http://php.net/manual/en/function.str-replace.php
4490
   *
4491
   * @param mixed $search  <p>
4492
   *                       The value being searched for, otherwise known as the needle.
4493
   *                       An array may be used to designate multiple needles.
4494
   *                       </p>
4495
   * @param mixed $replace <p>
4496
   *                       The replacement value that replaces found search
4497
   *                       values. An array may be used to designate multiple replacements.
4498
   *                       </p>
4499
   * @param mixed $subject <p>
4500
   *                       The string or array being searched and replaced on,
4501
   *                       otherwise known as the haystack.
4502
   *                       </p>
4503
   *                       <p>
4504
   *                       If subject is an array, then the search and
4505
   *                       replace is performed with every entry of
4506
   *                       subject, and the return value is an array as
4507
   *                       well.
4508
   *                       </p>
4509
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4510
   *
4511
   * @return mixed This function returns a string or an array with the replaced values.
4512 12
   */
4513
  public static function str_replace($search, $replace, $subject, &$count = null)
4514 12
  {
4515
    return str_replace($search, $replace, $subject, $count);
4516
  }
4517
4518
  /**
4519
   * Shuffles all the characters in the string.
4520
   *
4521
   * @param    string $str The input string
4522
   *
4523
   * @return   string The shuffled string.
4524
   */
4525
  public static function str_shuffle($str)
4526
  {
4527
    $array = self::split($str);
4528
4529
    shuffle($array);
4530
4531
    return implode('', $array);
4532
  }
4533
4534
  /**
4535
   * Sort all characters according to code points.
4536
   *
4537
   * @param    string $str    A UTF-8 string.
4538
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4539
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4540
   *
4541
   * @return   string String of sorted characters
4542 1
   */
4543
  public static function str_sort($str, $unique = false, $desc = false)
4544 1
  {
4545
    $array = self::codepoints($str);
4546 1
4547 1
    if ($unique) {
4548 1
      $array = array_flip(array_flip($array));
4549
    }
4550 1
4551 1
    if ($desc) {
4552 1
      arsort($array);
4553 1
    } else {
4554
      asort($array);
4555
    }
4556 1
4557
    return self::string($array);
4558
  }
4559
4560
  /**
4561
   * Convert a string to an array.
4562
   *
4563
   * @param string $str
4564
   * @param int    $len
4565
   *
4566
   * @return array
4567 17
   */
4568
  public static function str_split($str, $len = 1)
4569
  {
4570 17
    // init
4571
    self::checkForSupport();
4572 17
    $len = (int)$len;
4573
4574
    if ($len < 1) {
4575
      return str_split($str, $len);
4576
    }
4577
4578 17
    if (self::$support['intl'] === true) {
4579 17
      $a = array();
4580 17
      $p = 0;
4581 17
      $l = strlen($str);
4582 17
      while ($p < $l) {
4583 16
        $a[] = grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4584 16
      }
4585 17
    } else {
4586
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4587
      $a = $a[0];
4588
    }
4589
4590 17
    if ($len === 1) {
4591 17
      return $a;
4592
    }
4593
4594 1
    $arrayOutput = array();
4595 1
    $p = -1;
4596
4597
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4598 1
    foreach ($a as $l => $a) {
4599 1
      if ($l % $len) {
4600 1
        $arrayOutput[$p] .= $a;
4601 1
      } else {
4602 1
        $arrayOutput[++$p] = $a;
4603
      }
4604 1
    }
4605
4606 1
    return $arrayOutput;
4607
  }
4608
4609
  /**
4610
   * Get a binary representation of a specific character.
4611
   *
4612
   * @param   string $str The input character.
4613
   *
4614
   * @return  string
4615
   */
4616 1
  public static function str_to_binary($str)
4617
  {
4618 1
    $str = (string)$str;
4619
4620 1
    if (!isset($str[0])) {
4621
      return '';
4622
    }
4623
4624
    // init
4625 1
    $out = null;
4626 1
    $max = strlen($str);
4627
4628
    /** @noinspection ForeachInvariantsInspection */
4629 1
    for ($i = 0; $i < $max; ++$i) {
4630 1
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4631 1
    }
4632
4633 1
    return $out;
4634
  }
4635
4636
  /**
4637
   * US-ASCII transliterations of Unicode text.
4638
   *
4639
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4640
   * Warning: you should only pass this well formed UTF-8!
4641
   * Be aware it works by making a copy of the input string which it appends transliterated
4642
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4643
   * requiring up to the same amount again as the input string
4644
   *
4645
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4646
   *
4647
   * @author <[email protected]>
4648
   *
4649
   * @param string $str     UTF-8 string to convert
4650
   * @param string $unknown Character use if character unknown. (default is ?)
4651
   *
4652
   * @return string US-ASCII string
4653
   */
4654 8
  public static function str_transliterate($str, $unknown = '?')
4655
  {
4656 8
    static $UTF8_TO_ASCII;
4657
4658 8
    $str = (string)$str;
4659
4660 8
    if (!isset($str[0])) {
4661 2
      return '';
4662
    }
4663
4664 7
    $str = self::clean($str);
4665
4666 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4667 7
    $chars = $ar[0];
4668 7
    foreach ($chars as &$c) {
4669
4670 7
      $ordC0 = ord($c[0]);
4671
4672 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4673 6
        continue;
4674
      }
4675
4676 4
      $ordC1 = ord($c[1]);
4677
4678
      // ASCII - next please
4679 4
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4680 4
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4681 4
      }
4682
4683 4
      if ($ordC0 >= 224) {
4684 3
        $ordC2 = ord($c[2]);
4685
4686 3
        if ($ordC0 <= 239) {
4687 3
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4688 3
        }
4689
4690 3
        if ($ordC0 >= 240) {
4691 1
          $ordC3 = ord($c[3]);
4692
4693 1
          if ($ordC0 <= 247) {
4694 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4695 1
          }
4696
4697 1
          if ($ordC0 >= 248) {
4698
            $ordC4 = ord($c[4]);
4699
4700 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4701
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4702
            }
4703
4704
            if ($ordC0 >= 252) {
4705
              $ordC5 = ord($c[5]);
4706
4707 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4708
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4709
              }
4710
            }
4711
          }
4712 1
        }
4713 3
      }
4714
4715 4
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4716
        $c = $unknown;
4717
        continue;
4718
      }
4719
4720 4
      if (!isset($ord)) {
4721
        $c = $unknown;
4722
        continue;
4723
      }
4724
4725 4
      $bank = $ord >> 8;
4726 4
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4727 2
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4728 2
        if (file_exists($bankfile)) {
4729
          /** @noinspection PhpIncludeInspection */
4730 2
          require $bankfile;
4731 2
        } else {
4732 1
          $UTF8_TO_ASCII[$bank] = array();
4733
        }
4734 2
      }
4735
4736 4
      $newchar = $ord & 255;
4737 4
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4738 4
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4739 4
      } else {
4740 1
        $c = $unknown;
4741
      }
4742 7
    }
4743
4744 7
    return implode('', $chars);
4745
  }
4746
4747
  /**
4748
   * Counts number of words in the UTF-8 string.
4749
   *
4750
   * @param string $str The input string.
4751
   * @param int    $format
4752
   * @param string $charlist
4753
   *
4754
   * @return array|float The number of words in the string
4755
   */
4756 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4757
  {
4758 1
    $charlist = self::rxClass($charlist, '\pL');
4759 1
    $strParts = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4760 1
4761 1
    $len = count($strParts);
4762
4763 1
    if ($format == 1) {
4764
4765
      $numberOfWords = array();
4766
      for ($i = 1; $i < $len; $i += 2) {
4767 1
        $numberOfWords[] = $strParts[$i];
4768
      }
4769
4770
    } elseif ($format == 2) {
4771
4772
      self::checkForSupport();
4773
4774
      $numberOfWords = array();
4775
      $offset = self::strlen($strParts[0]);
4776 1
      for ($i = 1; $i < $len; $i += 2) {
4777
        $numberOfWords[$offset] = $strParts[$i];
4778
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4779 1
      }
4780
4781
    } else {
4782
4783
      $numberOfWords = ($len - 1) / 2;
4784
4785
    }
4786
4787
    return $numberOfWords;
4788
  }
4789
4790 8
  /**
4791
   * Case-insensitive string comparison.
4792 8
   *
4793
   * @param string $str1
4794
   * @param string $str2
4795
   *
4796
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4797
   */
4798
  public static function strcasecmp($str1, $str2)
4799
  {
4800
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4801
  }
4802
4803
  /**
4804
   * String comparison.
4805 8
   *
4806
   * @param string $str1
4807 8
   * @param string $str2
4808 5
   *
4809 5
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4810 8
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4811
   *              <strong>0</strong> if they are equal.
4812
   */
4813
  public static function strcmp($str1, $str2)
4814
  {
4815
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4816
        Normalizer::normalize($str1, Normalizer::NFD),
4817
        Normalizer::normalize($str2, Normalizer::NFD)
4818
    );
4819
  }
4820
4821
  /**
4822
   * Find length of initial segment not matching mask.
4823 5
   *
4824
   * @param string $str
4825 5
   * @param string $charlist
4826
   * @param int    $start
4827
   * @param int    $len
4828
   *
4829 5
   * @return int|null
4830
   */
4831
  public static function strcspn($str, $charlist, $start = 0, $len = 2147483647)
4832 5
  {
4833
    if ('' === $charlist .= '') {
4834
      return null;
4835
    }
4836 5
4837 5
    if ($start || 2147483647 !== $len) {
4838
      $str = (string)self::substr($str, $start, $len);
4839
    } else {
4840
      $str = (string)$str;
4841
    }
4842
4843
    /* @var $len array */
4844
    if (preg_match('/^(.*?)' . self::rxClass($charlist) . '/us', $str, $len)) {
4845
      return self::strlen($len[1]);
4846
    } else {
4847
      return self::strlen($str);
4848
    }
4849
  }
4850 2
4851
  /**
4852 2
   * Makes a UTF-8 string from code points.
4853 2
   *
4854
   * @param    array $array Integer or Hexadecimal codepoints
4855 2
   *
4856 2
   * @return   string UTF-8 encoded string
4857 2
   */
4858
  public static function string($array)
4859 2
  {
4860 2
    return implode(
4861
        array_map(
4862
            array(
4863
                '\\voku\\helper\\UTF8',
4864
                'chr',
4865
            ),
4866
            $array
4867
        )
4868
    );
4869
  }
4870 1
4871
  /**
4872 1
   * Checks if string starts with "UTF-8 BOM" character.
4873
   *
4874
   * @param    string $str The input string.
4875
   *
4876
   * @return   bool True if the string has BOM at the start, False otherwise.
4877
   */
4878
  public static function string_has_bom($str)
4879
  {
4880
    return self::is_bom(substr($str, 0, 3));
4881
  }
4882
4883
  /**
4884
   * Strip HTML and PHP tags from a string.
4885
   *
4886
   * @link http://php.net/manual/en/function.strip-tags.php
4887
   *
4888
   * @param string $str            <p>
4889
   *                               The input string.
4890
   *                               </p>
4891
   * @param string $allowable_tags [optional] <p>
4892
   *                               You can use the optional second parameter to specify tags which should
4893
   *                               not be stripped.
4894 2
   *                               </p>
4895
   *                               <p>
4896
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4897 2
   *                               can not be changed with allowable_tags.
4898
   *                               </p>
4899 2
   *
4900
   * @return string the stripped string.
4901
   */
4902
  public static function strip_tags($str, $allowable_tags = null)
4903
  {
4904
    //clean broken utf8
4905
    $str = self::clean($str);
4906
4907
    return strip_tags($str, $allowable_tags);
4908
  }
4909
4910
  /**
4911
   * Finds position of first occurrence of a string within another, case insensitive.
4912
   *
4913
   * @link http://php.net/manual/en/function.mb-stripos.php
4914
   *
4915
   * @param string  $haystack  <p>
4916
   *                           The string from which to get the position of the first occurrence
4917
   *                           of needle
4918
   *                           </p>
4919
   * @param string  $needle    <p>
4920
   *                           The string to find in haystack
4921
   *                           </p>
4922
   * @param int     $offset    [optional] <p>
4923
   *                           The position in haystack
4924
   *                           to start searching
4925 8
   *                           </p>
4926
   * @param string  $encoding
4927 8
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4928 8
   *
4929
   * @return int Return the numeric position of the first occurrence of
4930 8
   * needle in the haystack
4931 2
   * string, or false if needle is not found.
4932
   */
4933
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4934
  {
4935 7
    $haystack = (string)$haystack;
4936
    $needle = (string)$needle;
4937 7
4938 1
    if (!isset($haystack[0], $needle[0])) {
4939 1
      return false;
4940 1
    }
4941
4942
    // init
4943 7
    self::checkForSupport();
4944 1
4945 1
    if ($cleanUtf8 === true) {
4946
      $haystack = self::clean($haystack);
4947 7
      $needle = self::clean($needle);
4948
    }
4949
4950
    // INFO: this is only a fallback for old versions
4951
    if ($encoding === true || $encoding === false) {
4952
      $encoding = 'UTF-8';
4953
    }
4954
4955
    return mb_stripos($haystack, $needle, $offset, $encoding);
4956
  }
4957
4958
  /**
4959 7
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4960
   *
4961 7
   * @param string $str
4962 2
   * @param string $needle
4963
   * @param bool   $before_needle
4964
   *
4965
   * @return false|string
4966 5
   */
4967
  public static function stristr($str, $needle, $before_needle = false)
4968 5
  {
4969
    if ('' === $needle .= '') {
4970
      return false;
4971
    }
4972
4973
    // init
4974
    self::checkForSupport();
4975
4976
    return mb_stristr($str, $needle, $before_needle, 'UTF-8');
4977
  }
4978
4979
  /**
4980
   * Get the string length, not the byte-length!
4981
   *
4982
   * @link     http://php.net/manual/en/function.mb-strlen.php
4983
   *
4984
   * @param string  $str       The string being checked for length.
4985 66
   * @param string  $encoding  Set the charset for e.g. "mb_" function
4986
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4987 66
   *
4988
   * @return int the number of characters in
4989 66
   *           string str having character encoding
4990 4
   *           encoding. A multi-byte character is
4991
   *           counted as 1.
4992
   */
4993
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4994 65
  {
4995
    $str = (string)$str;
4996
4997 65
    if (!isset($str[0])) {
4998
      return 0;
4999
    }
5000
5001 65
    // init
5002
    self::checkForSupport();
5003
5004
    // INFO: this is only a fallback for old versions
5005 65
    if ($encoding === true || $encoding === false) {
5006
      $encoding = 'UTF-8';
5007
    }
5008
5009
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5010
      $str = self::clean($str);
5011
    }
5012
5013
    return mb_strlen($str, $encoding);
5014
  }
5015
5016
  /**
5017 1
   * Case insensitive string comparisons using a "natural order" algorithm.
5018
   *
5019 1
   * @param string $str1
5020
   * @param string $str2
5021
   *
5022
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2 > 0 if
5023
   *             str1 is greater than str2, and 0 if they are equal.
5024
   */
5025
  public static function strnatcasecmp($str1, $str2)
5026
  {
5027
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5028
  }
5029
5030
  /**
5031 2
   * String comparisons using a "natural order" algorithm.
5032
   *
5033 2
   * @param string $str1
5034
   * @param string $str2
5035
   *
5036
   * @return int Similar to other string comparison functions, this one returns < 0 if str1 is less than str2; > 0 if
5037
   *             str1 is greater than str2, and 0 if they are equal.
5038
   */
5039
  public static function strnatcmp($str1, $str2)
5040
  {
5041
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5042
  }
5043
5044
  /**
5045
   * Case-insensitive string comparison of the first n characters.
5046
   *
5047
   * @param string $str1
5048
   * @param string $str2
5049
   * @param int    $len
5050
   *
5051
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5052
   */
5053
  public static function strncasecmp($str1, $str2, $len)
5054
  {
5055
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5056
  }
5057
5058
  /**
5059
   * Comparison of the first n characters.
5060
   *
5061
   * @param string $str1
5062
   * @param string $str2
5063
   * @param int    $len
5064
   *
5065
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
5066
   *              <strong>> 0</strong> if str1 is greater than str2<br />
5067
   *              <strong>0</strong> if they are equal
5068
   */
5069
  public static function strncmp($str1, $str2, $len)
5070
  {
5071
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5072
  }
5073
5074
  /**
5075
   * Search a string for any of a set of characters.
5076
   *
5077
   * @param string $s
5078
   * @param string $charList
5079
   *
5080
   * @return string|false
5081
   */
5082
  public static function strpbrk($s, $charList)
5083
  {
5084
    if (preg_match('/' . self::rxClass($charList) . '/us', $s, $m)) {
5085
      return substr($s, strpos($s, $m[0]));
5086
    } else {
5087
      return false;
5088
    }
5089
  }
5090
5091
  /**
5092
   * Find position of first occurrence of string in a string.
5093
   *
5094
   * @link http://php.net/manual/en/function.mb-strpos.php
5095
   *
5096
   * @param string  $haystack     <p>
5097
   *                              The string being checked.
5098
   *                              </p>
5099
   * @param string  $needle       <p>
5100
   *                              The position counted from the beginning of haystack.
5101
   *                              </p>
5102
   * @param int     $offset       [optional] <p>
5103 11
   *                              The search offset. If it is not specified, 0 is used.
5104
   *                              </p>
5105 11
   * @param string  $encoding
5106 11
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5107
   *
5108 11
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5109 2
   *             If needle is not found it returns false.
5110
   */
5111
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5112
  {
5113 10
    $haystack = (string)$haystack;
5114 10
    $needle = (string)$needle;
5115
5116
    if (!isset($haystack[0], $needle[0])) {
5117
      return false;
5118 10
    }
5119
5120
    // init
5121
    self::checkForSupport();
5122 10
    $offset = (int)$offset;
5123
5124
    // iconv and mbstring do not support integer $needle
5125
5126 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5127 1
      $needle = self::chr($needle);
5128 1
    }
5129
5130 10
    if ($cleanUtf8 === true) {
5131
      // mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5132
      // iconv_strpos is not tolerant to invalid characters
5133 10
5134 1
      $needle = self::clean((string)$needle);
5135 1
      $haystack = self::clean($haystack);
5136
    }
5137 10
5138
    if (self::$support['mbstring'] === true) {
5139
5140
      // INFO: this is only a fallback for old versions
5141
      if ($encoding === true || $encoding === false) {
5142
        $encoding = 'UTF-8';
5143
      }
5144
5145
      return mb_strpos($haystack, $needle, $offset, $encoding);
5146
    }
5147
5148
    if (self::$support['iconv'] === true) {
5149
      // ignore invalid negative offset to keep compatility
5150
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5151
      return grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5152
    }
5153
5154
    if ($offset > 0) {
5155
      $haystack = self::substr($haystack, $offset);
5156
    }
5157
5158 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5159
      $left = substr($haystack, 0, $pos);
5160
5161
      // negative offset not supported in PHP strpos(), ignoring
5162
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5163
    }
5164
5165
    return false;
5166
  }
5167
5168
  /**
5169
   * Finds the last occurrence of a character in a string within another.
5170
   *
5171
   * @link http://php.net/manual/en/function.mb-strrchr.php
5172
   *
5173
   * @param string $haystack <p>
5174
   *                         The string from which to get the last occurrence
5175
   *                         of needle
5176
   *                         </p>
5177
   * @param string $needle   <p>
5178
   *                         The string to find in haystack
5179
   *                         </p>
5180
   * @param bool   $part     [optional] <p>
5181
   *                         Determines which portion of haystack
5182
   *                         this function returns.
5183
   *                         If set to true, it returns all of haystack
5184
   *                         from the beginning to the last occurrence of needle.
5185
   *                         If set to false, it returns all of haystack
5186 1
   *                         from the last occurrence of needle to the end,
5187
   *                         </p>
5188 1
   * @param string $encoding [optional] <p>
5189
   *                         Character encoding name to use.
5190 1
   *                         If it is omitted, internal character encoding is used.
5191
   *                         </p>
5192
   *
5193
   * @return string the portion of haystack.
5194
   * or false if needle is not found.
5195
   */
5196
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5197
  {
5198
    self::checkForSupport();
5199
5200 4
    return mb_strrchr($haystack, $needle, $part, $encoding);
5201
  }
5202 4
5203
  /**
5204
   * Reverses characters order in the string.
5205
   *
5206
   * @param    string $str The input string
5207
   *
5208
   * @return   string The string with characters in the reverse sequence
5209
   */
5210
  public static function strrev($str)
5211
  {
5212
    return implode(array_reverse(self::split($str)));
5213
  }
5214
5215
  /**
5216
   * Finds the last occurrence of a character in a string within another, case insensitive.
5217
   *
5218
   * @link http://php.net/manual/en/function.mb-strrichr.php
5219
   *
5220
   * @param string $haystack <p>
5221
   *                         The string from which to get the last occurrence
5222
   *                         of needle
5223
   *                         </p>
5224
   * @param string $needle   <p>
5225
   *                         The string to find in haystack
5226
   *                         </p>
5227
   * @param bool   $part     [optional] <p>
5228
   *                         Determines which portion of haystack
5229
   *                         this function returns.
5230
   *                         If set to true, it returns all of haystack
5231
   *                         from the beginning to the last occurrence of needle.
5232
   *                         If set to false, it returns all of haystack
5233 1
   *                         from the last occurrence of needle to the end,
5234
   *                         </p>
5235 1
   * @param string $encoding [optional] <p>
5236
   *                         Character encoding name to use.
5237 1
   *                         If it is omitted, internal character encoding is used.
5238
   *                         </p>
5239
   *
5240
   * @return string the portion of haystack.
5241
   * or false if needle is not found.
5242
   */
5243
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5244
  {
5245
    self::checkForSupport();
5246
5247
    return mb_strrichr($haystack, $needle, $part, $encoding);
5248
  }
5249 1
5250
  /**
5251 1
   * Find position of last occurrence of a case-insensitive string.
5252
   *
5253
   * @param    string $haystack The string to look in
5254
   * @param    string $needle   The string to look for
5255
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5256
   *
5257
   * @return   int The position of offset
5258
   */
5259
  public static function strripos($haystack, $needle, $offset = 0)
5260
  {
5261
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5262
  }
5263
5264
  /**
5265
   * Find position of last occurrence of a string in a string.
5266
   *
5267
   * @link http://php.net/manual/en/function.mb-strrpos.php
5268
   *
5269
   * @param string  $haystack     <p>
5270
   *                              The string being checked, for the last occurrence
5271
   *                              of needle
5272
   *                              </p>
5273
   * @param string  $needle       <p>
5274
   *                              The string to find in haystack.
5275
   *                              </p>
5276 10
   * @param int     $offset       [optional] May be specified to begin searching an arbitrary number of characters into
5277
   *                              the string. Negative values will stop searching at an arbitrary point
5278 10
   *                              prior to the end of the string.
5279 10
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string
5280
   *
5281 10
   * @return int the numeric position of
5282 2
   * the last occurrence of needle in the
5283
   * haystack string. If
5284
   * needle is not found, it returns false.
5285
   */
5286 9
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5287
  {
5288 9
    $haystack = (string)$haystack;
5289
    $needle = (string)$needle;
5290
5291
    if (!isset($haystack[0], $needle[0])) {
5292 9
      return false;
5293 9
    }
5294
5295 9
    // init
5296
    self::checkForSupport();
5297
5298 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5299 1
      $needle = self::chr($needle);
5300 1
    }
5301
5302 9
    $needle = (string)$needle;
5303 9
    $offset = (int)$offset;
5304
5305
    if ($cleanUtf8 === true) {
5306
      // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5307
5308
      $needle = self::clean($needle);
5309
      $haystack = self::clean($haystack);
5310
    }
5311
5312
    if (self::$support['mbstring'] === true) {
5313
      return mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5314
    }
5315
5316
    if (self::$support['iconv'] === true) {
5317
      return grapheme_strrpos($haystack, $needle, $offset);
5318
    }
5319
5320
    // fallback
5321
5322
    if ($offset > 0) {
5323
      $haystack = self::substr($haystack, $offset);
5324
    } elseif ($offset < 0) {
5325
      $haystack = self::substr($haystack, 0, $offset);
5326
    }
5327
5328 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5329
      $left = substr($haystack, 0, $pos);
5330
5331
      // negative offset not supported in PHP strpos(), ignoring
5332
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5333
    }
5334
5335
    return false;
5336
  }
5337
5338
  /**
5339 6
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5340
   * mask.
5341 6
   *
5342
   * @param string $s
5343
   * @param string $mask
5344
   * @param int    $start
5345 6
   * @param int    $len
5346
   *
5347
   * @return int|null
5348
   */
5349
  public static function strspn($s, $mask, $start = 0, $len = 2147483647)
5350
  {
5351
    if ($start || 2147483647 !== $len) {
5352
      $s = self::substr($s, $start, $len);
5353
    }
5354
5355
    return preg_match('/^' . self::rxClass($mask) . '+/u', $s, $s) ? self::strlen($s[0]) : 0;
5356
  }
5357
5358
  /**
5359
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5360
   *
5361
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5362
   *
5363
   * @param string $haystack      <p>
5364
   *                              The input string. Must be valid UTF-8.
5365
   *                              </p>
5366 1
   * @param string $needle        <p>
5367
   *                              The string to look for. Must be valid UTF-8.
5368 1
   *                              </p>
5369
   * @param bool   $before_needle [optional] <p>
5370 1
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5371
   *                              haystack before the first occurrence of the needle (excluding the needle).
5372
   *                              </p>
5373
   *
5374
   * @return string the portion of string, or FALSE if needle is not found.
5375
   */
5376
  public static function strstr($haystack, $needle, $before_needle = false)
5377
  {
5378
    self::checkForSupport();
5379
5380
    return grapheme_strstr($haystack, $needle, $before_needle);
5381
  }
5382
5383 10
  /**
5384
   * Unicode transformation for case-less matching.
5385 10
   *
5386 10
   * @link http://unicode.org/reports/tr21/tr21-5.html
5387 10
   *
5388
   * @param string $str
5389 10
   * @param bool   $full
5390 1
   *
5391 1
   * @return string
5392 1
   */
5393
  public static function strtocasefold($str, $full = true)
5394 10
  {
5395
    static $fullCaseFold = null;
5396 10
    static $commonCaseFoldKeys = null;
5397
    static $commonCaseFoldValues = null;
5398 10
5399 1
    if ($commonCaseFoldKeys === null) {
5400 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5401
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5402
    }
5403 10
5404 10
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5405
5406 10
    if ($full) {
5407
5408 10
      if ($fullCaseFold === null) {
5409
        $fullCaseFold = self::getData('caseFolding_full');
5410
      }
5411
5412
      /** @noinspection OffsetOperationsInspection */
5413
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5414
    }
5415
5416
    $str = self::clean($str);
5417
5418
    return self::strtolower($str);
5419
  }
5420
5421
  /**
5422
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5423
   * Make a string lowercase.
5424 20
   *
5425
   * @link http://php.net/manual/en/function.mb-strtolower.php
5426 20
   *
5427
   * @param string $str <p>
5428 20
   *                    The string being lowercased.
5429 5
   *                    </p>
5430
   * @param string $encoding
5431
   *
5432
   * @return string str with all alphabetic characters converted to lowercase.
5433 18
   */
5434
  public static function strtolower($str, $encoding = 'UTF-8')
5435 18
  {
5436
    $str = (string)$str;
5437
5438
    if (!isset($str[0])) {
5439
      return '';
5440
    }
5441
5442
    // init
5443
    self::checkForSupport();
5444
5445 3
    return mb_strtolower($str, $encoding);
5446
  }
5447 3
5448
  /**
5449
   * Generic case sensitive transformation for collation matching.
5450
   *
5451
   * @param string $s
5452
   *
5453
   * @return string
5454
   */
5455
  protected static function strtonatfold($s)
5456
  {
5457
    return preg_replace('/\p{Mn}+/u', '', Normalizer::normalize($s, Normalizer::NFD));
5458
  }
5459
5460
  /**
5461
   * Make a string uppercase.
5462 16
   *
5463
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5464 16
   *
5465
   * @param string $str <p>
5466 16
   *                    The string being uppercased.
5467 4
   *                    </p>
5468
   * @param string $encoding
5469
   *
5470
   * @return string str with all alphabetic characters converted to uppercase.
5471 15
   */
5472
  public static function strtoupper($str, $encoding = 'UTF-8')
5473 15
  {
5474 15
    $str = (string)$str;
5475
5476
    if (!isset($str[0])) {
5477
      return '';
5478
    }
5479
5480
    // init
5481
    self::checkForSupport();
5482
5483
    if (self::$support['mbstring'] === true) {
5484
      return mb_strtoupper($str, $encoding);
5485
    } else {
5486
5487
      // fallback
5488
5489
      static $caseTableKeys = null;
5490
      static $caseTableValues = null;
5491
5492
      if ($caseTableKeys === null) {
5493
        $caseTable = self::case_table();
5494
        $caseTableKeys = array_keys($caseTable);
5495
        $caseTableValues = array_values($caseTable);
5496
      }
5497
5498
      $str = self::clean($str);
5499
5500
      return str_replace($caseTableKeys, $caseTableValues, $str);
5501
    }
5502
  }
5503 1
5504
  /**
5505 1
   * Translate characters or replace sub-strings.
5506
   *
5507
   * @link  http://php.net/manual/en/function.strtr.php
5508
   *
5509
   * @param string       $str  <p>
5510
   *                           The string being translated.
5511
   *                           </p>
5512
   * @param string|array $from <p>
5513
   *                           The string replacing from.
5514
   *                           </p>
5515
   * @param string|array $to   <p>
5516
   *                           The string being translated to to.
5517
   *                           </p>
5518
   *
5519
   * @return string This function returns a copy of str,
5520 1
   * translating all occurrences of each character in
5521
   * from to the corresponding character in
5522
   * to.
5523
   * @since 4.0
5524
   * @since 5.0
5525
   */
5526
  public static function strtr($str, $from, $to = INF)
5527
  {
5528
    if (INF !== $to) {
5529
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5529 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5530 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5530 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5531
      $countFrom = count($from);
5532
      $countTo = count($to);
5533 1
5534
      if ($countFrom > $countTo) {
5535 1
        $from = array_slice($from, 0, $countTo);
5536
      } elseif ($countFrom < $countTo) {
5537
        $to = array_slice($to, 0, $countFrom);
5538
      }
5539
5540
      $from = array_combine($from, $to);
5541
    }
5542
5543
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5526 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5544
  }
5545
5546
  /**
5547
   * Return the width of a string.
5548
   *
5549
   * @param string $s
5550
   *
5551
   * @return int
5552
   */
5553
  public static function strwidth($s)
5554
  {
5555
    // init
5556
    self::checkForSupport();
5557
5558 39
    return mb_strwidth($s, 'UTF-8');
5559
  }
5560 39
5561
  /**
5562 39
   * Get part of a string.
5563 9
   *
5564
   * @link http://php.net/manual/en/function.mb-substr.php
5565
   *
5566
   * @param string  $str       <p>
5567 37
   *                           The string being checked.
5568
   *                           </p>
5569 37
   * @param int     $start     <p>
5570
   *                           The first position used in str.
5571
   *                           </p>
5572
   * @param int     $length    [optional] <p>
5573 1
   *                           The maximum length of the returned string.
5574 1
   *                           </p>
5575
   * @param string  $encoding
5576 37
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5577 22
   *
5578 22
   * @return string mb_substr returns the portion of
5579 33
   * str specified by the start and length parameters.
5580
   */
5581
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5582 37
  {
5583
    $str = (string)$str;
5584
5585 37
    if (!isset($str[0])) {
5586 1
      return '';
5587 1
    }
5588
5589 37
    // init
5590
    self::checkForSupport();
5591
5592
    if ($cleanUtf8 === true) {
5593
      // iconv and mbstring are not tolerant to invalid encoding
5594
      // further, their behaviour is inconsistent with that of PHP's substr
5595
5596
      $str = self::clean($str);
5597
    }
5598
5599
    if ($length === null) {
5600
      $length = (int)self::strlen($str);
5601
    } else {
5602
      $length = (int)$length;
5603
    }
5604
5605
    if (self::$support['mbstring'] === true) {
5606
5607
      // INFO: this is only a fallback for old versions
5608
      if ($encoding === true || $encoding === false) {
5609
        $encoding = 'UTF-8';
5610
      }
5611
5612
      return mb_substr($str, $start, $length, $encoding);
5613
    }
5614
5615
    if (self::$support['iconv'] === true) {
5616
      return (string)grapheme_substr($str, $start, $length);
5617
    }
5618 1
5619
    // fallback
5620 1
5621 1
    // split to array, and remove invalid characters
5622
    $array = self::split($str);
5623 1
5624
    // extract relevant part, and join to make sting again
5625
    return implode(array_slice($array, $start, $length));
5626
  }
5627
5628
  /**
5629
   * Binary safe comparison of two strings from an offset, up to length characters.
5630
   *
5631
   * @param string  $main_str           The main string being compared.
5632
   * @param string  $str                The secondary string being compared.
5633
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5634
   *                                    end of the string.
5635
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5636
   *                                    the str compared to the length of main_str less the offset.
5637
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5638
   *
5639
   * @return int
5640
   */
5641
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5642
  {
5643
    $main_str = self::substr($main_str, $offset, $length);
5644
    $str = self::substr($str, 0, self::strlen($main_str));
5645
5646
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5647
  }
5648
5649
  /**
5650
   * Count the number of substring occurrences
5651
   *
5652
   * @link  http://php.net/manual/en/function.substr-count.php
5653
   *
5654
   * @param string $haystack <p>
5655
   *                         The string to search in
5656
   *                         </p>
5657
   * @param string $needle   <p>
5658
   *                         The substring to search for
5659
   *                         </p>
5660
   * @param int    $offset   [optional] <p>
5661
   *                         The offset where to start counting
5662
   *                         </p>
5663
   * @param int    $length   [optional] <p>
5664
   *                         The maximum length after the specified offset to search for the
5665 6
   *                         substring. It outputs a warning if the offset plus the length is
5666
   *                         greater than the haystack length.
5667
   *                         </p>
5668 6
   *
5669 1
   * @return int This functions returns an integer.
5670
   * @since 4.0
5671
   * @since 5.0
5672 1
   */
5673 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5674 1
  {
5675 1
    $haystack = (string)$haystack;
5676
    $needle = (string)$needle;
5677
5678
    if (!isset($haystack[0], $needle[0])) {
5679 1
      return 0;
5680 1
    }
5681 1
5682 1
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5683 1
      $offset = (int)$offset;
5684 1
      $length = (int)$length;
5685 1
5686 1
      $haystack = self::substr($haystack, $offset, $length);
5687
    }
5688
5689
    self::checkForSupport();
5690 1
5691 1
    return mb_substr_count($haystack, $needle);
5692 1
  }
5693 1
5694 1
  /**
5695 1
   * Replace text within a portion of a string.
5696 1
   *
5697 1
   * source: https://gist.github.com/stemar/8287074
5698
   *
5699
   * @param string|array $str
5700 1
   * @param string|array $replacement
5701 1
   * @param int          $start
5702 1
   * @param null|int     $length
5703 1
   *
5704
   * @return array|string
5705
   */
5706
  public static function substr_replace($str, $replacement, $start, $length = null)
5707 1
  {
5708
    if (is_array($str)) {
5709 6
      $num = count($str);
5710 1
5711 1
      // $replacement
5712 1
      if (is_array($replacement)) {
5713 1
        $replacement = array_slice($replacement, 0, $num);
5714
      } else {
5715 1
        $replacement = array_pad(array($replacement), $num, $replacement);
5716
      }
5717
5718 6
      // $start
5719 6 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5720
        $start = array_slice($start, 0, $num);
5721 6
        foreach ($start as &$valueTmp) {
5722 4
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5723
        }
5724 4
        unset($valueTmp);
5725 4
      } else {
5726
        $start = array_pad(array($start), $num, $start);
5727 6
      }
5728
5729 6
      // $length
5730
      if (!isset($length)) {
5731
        $length = array_fill(0, $num, 0);
5732 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5733
        $length = array_slice($length, 0, $num);
5734
        foreach ($length as &$valueTmpV2) {
5735
          if (isset($valueTmpV2)) {
5736
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5737
          } else {
5738
            $valueTmpV2 = 0;
5739
          }
5740 1
        }
5741
        unset($valueTmpV2);
5742 1
      } else {
5743
        $length = array_pad(array($length), $num, $length);
5744 1
      }
5745 1
5746
      // Recursive call
5747
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5748 1
    } else {
5749
      if (is_array($replacement)) {
5750 1
        if (count($replacement) > 0) {
5751 1
          $replacement = $replacement[0];
5752
        } else {
5753 1
          $replacement = '';
5754
        }
5755 1
      }
5756 1
    }
5757
5758 1
    preg_match_all('/./us', (string)$str, $smatches);
5759
    preg_match_all('/./us', (string)$replacement, $rmatches);
5760 1
5761
    if ($length === null) {
5762 1
      self::checkForSupport();
5763
5764 1
      $length = mb_strlen($str);
5765
    }
5766
5767
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5768
5769
    return implode($smatches[0], null);
5770
  }
5771
5772
  /**
5773
   * Returns a case swapped version of the string.
5774
   *
5775 6
   * @param string $str
5776
   * @param string $encoding
5777 6
   *
5778
   * @return string each character's case swapped
5779
   */
5780
  public static function swapCase($str, $encoding = 'UTF-8')
5781
  {
5782
    $str = (string)$str;
5783
5784
    if (!isset($str[0])) {
5785
      return '';
5786
    }
5787
5788
    $str = self::clean($str);
5789
5790
    $strSwappedCase = preg_replace_callback(
5791
        '/[\S]/u',
5792
        function ($match) use ($encoding) {
5793
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5794
5795
          if ($match[0] == $marchToUpper) {
5796
            return UTF8::strtolower($match[0], $encoding);
5797
          } else {
5798
            return $marchToUpper;
5799
          }
5800
        },
5801
        $str
5802
    );
5803
5804
    return $strSwappedCase;
5805
  }
5806
5807
  /**
5808
   * alias for "UTF8::to_ascii()"
5809
   *
5810
   * @param string $s The input string e.g. a UTF-8 String
5811
   * @param string $subst_chr
5812 7
   *
5813
   * @return string
5814 7
   */
5815
  public static function toAscii($s, $subst_chr = '?')
5816 7
  {
5817
    return self::to_ascii($s, $subst_chr);
5818 7
  }
5819 2
5820
  /**
5821
   * alias for "UTF8::to_latin1()"
5822 6
   *
5823
   * @param $str
5824 6
   *
5825 3
   * @return string
5826
   */
5827 3
  public static function toLatin1($str)
5828
  {
5829 3
    return self::to_latin1($str);
5830
  }
5831
5832 3
  /**
5833
   * alias for "UTF8::to_utf8"
5834 3
   *
5835 3
   * @param string $str
5836
   *
5837
   * @return string
5838 3
   */
5839 3
  public static function toUTF8($str)
5840 3
  {
5841
    return self::to_utf8($str);
5842
  }
5843
5844
  /**
5845
   * convert to ASCII
5846
   *
5847
   * @param string $s The input string e.g. a UTF-8 String
5848
   * @param string $subst_chr
5849
   *
5850
   * @return string
5851
   */
5852 3
  public static function to_ascii($s, $subst_chr = '?')
5853
  {
5854 1
    static $translitExtra = null;
5855 1
5856 1
    $s = (string)$s;
5857
5858 1
    if (!isset($s[0])) {
5859 1
      return '';
5860 1
    }
5861 1
5862
    $s = self::clean($s);
5863 1
5864
    if (preg_match("/[\x80-\xFF]/", $s)) {
5865
      $s = Normalizer::normalize($s, Normalizer::NFKC);
5866 1
5867
      $glibc = 'glibc' === ICONV_IMPL;
5868
5869 1
      preg_match_all('/./u', $s, $s);
5870
5871 3
      /** @noinspection AlterInForeachInspection */
5872 1
      foreach ($s[0] as &$c) {
5873 1
5874
        if (!isset($c[1])) {
5875 3
          continue;
5876 3
        }
5877
5878 3
        if ($glibc) {
5879 3
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
5880
        } else {
5881 6
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
5882
5883
          if ($t !== false && is_string($t)) {
5884
            if (!isset($t[0])) {
5885
              $t = '?';
5886
            } elseif (isset($t[1])) {
5887
              $t = ltrim($t, '\'`"^~');
5888
            }
5889
          }
5890
        }
5891
5892
        if ('?' === $t) {
5893
5894
          if ($translitExtra === null) {
5895
            $translitExtra = (array)self::getData('translit_extra');
5896
          }
5897
5898
          if (isset($translitExtra[$c])) {
5899
            $t = $translitExtra[$c];
5900
          } else {
5901
            $t = Normalizer::normalize($c, Normalizer::NFD);
5902
5903 2
            if ($t[0] < "\x80") {
5904
              $t = $t[0];
5905 2
            } else {
5906
              $t = $subst_chr;
5907
            }
5908
          }
5909
        }
5910
5911
        if ('?' === $t) {
5912
          $t = self::str_transliterate($c, $subst_chr);
5913
        }
5914
5915
        $c = $t;
5916
      }
5917
5918
      $s = implode('', $s[0]);
5919
    }
5920
5921
    return $s;
5922
  }
5923
5924
  /**
5925
   * alias for "UTF8::to_win1252()"
5926
   *
5927
   * @param   string $str
5928
   *
5929 20
   * @return  array|string
5930
   */
5931 20
  public static function to_iso8859($str)
5932 2
  {
5933
    return self::to_win1252($str);
5934 2
  }
5935 2
5936
  /**
5937 2
   * alias for "UTF8::to_win1252()"
5938
   *
5939
   * @param string|array $str
5940 20
   *
5941
   * @return string|array
5942 20
   */
5943 9
  public static function to_latin1($str)
5944
  {
5945
    return self::to_win1252($str);
5946 20
  }
5947
5948 20
  /**
5949
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5950 20
   *
5951 20
   * - It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
5952
   *
5953 20
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5954 20
   *
5955 20
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5956 20
   *    are followed by any of these:  ("group B")
5957
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5958 20
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5959
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5960 18
   * is also a valid unicode character, and will be left unchanged.
5961 17
   *
5962 17
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5963 17
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5964 5
   *
5965 5
   * @param string $str Any string or array.
5966 5
   *
5967
   * @return string The same string, but UTF8 encoded.
5968
   */
5969 20
  public static function to_utf8($str)
5970
  {
5971 18
    if (is_array($str)) {
5972 14
      foreach ($str as $k => $v) {
5973 14
        /** @noinspection AlterInForeachInspection */
5974 14
        $str[$k] = self::to_utf8($v);
5975 8
      }
5976 8
5977 8
      return $str;
5978
    }
5979
5980 19
    $str = (string)$str;
5981
5982 9
    if (!isset($str[0])) {
5983 3
      return $str;
5984 3
    }
5985 3
5986 6
    $max = self::strlen($str, '8bit');
5987 6
5988 6
    $buf = '';
5989
    /** @noinspection ForeachInvariantsInspection */
5990
    for ($i = 0; $i < $max; $i++) {
5991 9
      $c1 = $str[$i];
5992 6
5993 6
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5994 6
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5995
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5996
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5997 20
5998
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5999 2
6000 2
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6001
            $buf .= $c1 . $c2;
6002
            $i++;
6003 2
          } else { // not valid UTF8 - convert it
6004 2
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6005 2
            $cc2 = ($c1 & "\x3f") | "\x80";
6006
            $buf .= $cc1 . $cc2;
6007
          }
6008 2
6009 18 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6010
6011 20
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6012
            $buf .= $c1 . $c2 . $c3;
6013 20
            $i += 2;
6014
          } else { // not valid UTF8 - convert it
6015
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6016 20
            $cc2 = ($c1 & "\x3f") | "\x80";
6017 20
            $buf .= $cc1 . $cc2;
6018
          }
6019 3
6020 20
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6021
6022 20 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6023
            $buf .= $c1 . $c2 . $c3 . $c4;
6024
            $i += 3;
6025 20
          } else { // not valid UTF8 - convert it
6026 20
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6027 20
            $cc2 = ($c1 & "\x3f") | "\x80";
6028 2
            $buf .= $cc1 . $cc2;
6029 20
          }
6030
6031 20
        } else { // doesn't look like UTF8, but should be converted
6032
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6033 20
          $cc2 = (($c1 & "\x3f") | "\x80");
6034
          $buf .= $cc1 . $cc2;
6035
        }
6036
6037
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6038
6039
        $ordC1 = ord($c1);
6040
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6041
          $buf .= self::$win1252ToUtf8[$ordC1];
6042
        } else {
6043 2
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6044
          $cc2 = (($c1 & "\x3f") | "\x80");
6045 2
          $buf .= $cc1 . $cc2;
6046
        }
6047 1
6048
      } else { // it doesn't need conversion
6049 1
        $buf .= $c1;
6050 1
      }
6051
    }
6052 1
6053 2
    self::checkForSupport();
6054 2
6055
    // decode unicode escape sequences
6056
    $buf = preg_replace_callback(
6057
        '/\\\\u([0-9a-f]{4})/i',
6058
        function ($match) {
6059
          return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6060
        },
6061
        $buf
6062
    );
6063
6064
    // decode UTF-8 codepoints
6065
    $buf = preg_replace_callback(
6066
        '/&#\d{2,4};/',
6067
        function ($match) {
6068
          return mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6069
        },
6070
        $buf
6071
    );
6072
6073 26
    return $buf;
6074
  }
6075 26
6076
  /**
6077 26
   * Convert a string into win1252.
6078 5
   *
6079
   * @param  string|array $str
6080
   *
6081
   * @return string|array
6082 22
   */
6083 6
  protected static function to_win1252($str)
6084
  {
6085
    if (is_array($str)) {
6086 16
6087
      foreach ($str as $k => $v) {
6088
        /** @noinspection AlterInForeachInspection */
6089
        $str[$k] = self::to_win1252($v);
6090
      }
6091
6092
      return $str;
6093
    } elseif (is_string($str)) {
6094
      return self::utf8_decode($str);
6095
    } else {
6096 14
      return $str;
6097
    }
6098 14
  }
6099
6100
  /**
6101
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6102
   *
6103
   * INFO: This is slower then "trim()"
6104
   *
6105
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
6106
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6107
   *
6108
   * @param    string $str   The string to be trimmed
6109
   * @param    string $chars Optional characters to be stripped
6110
   *
6111
   * @return   string The trimmed string
6112
   */
6113
  public static function trim($str = '', $chars = INF)
6114
  {
6115
    $str = (string)$str;
6116
6117
    if (!isset($str[0])) {
6118
      return '';
6119
    }
6120
6121 8
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6122
    if ($chars === INF || !$chars) {
6123 8
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6124 2
    }
6125
6126
    return self::rtrim(self::ltrim($str, $chars), $chars);
6127
  }
6128 7
6129 7
  /**
6130
   * Makes string's first char uppercase.
6131 7
   *
6132 1
   * @param    string $str The input string
6133 1
   *
6134 7
   * @return   string The resulting string
6135
   */
6136
  public static function ucfirst($str)
6137 7
  {
6138
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6139 7
  }
6140
6141
  /**
6142
   * alias for "UTF8::ucfirst"
6143 1
   *
6144 1
   * @param $str
6145 1
   *
6146 7
   * @return string
6147 7
   */
6148 7
  public static function ucword($str)
6149 7
  {
6150 7
    return self::ucfirst($str);
6151
  }
6152 7
6153
  /**
6154
   * Uppercase for all words in the string.
6155
   *
6156
   * @param  string $str
6157
   * @param array   $exceptions
6158
   *
6159
   * @return string
6160
   */
6161
  public static function ucwords($str, $exceptions = array())
6162
  {
6163
    if (!$str) {
6164
      return '';
6165
    }
6166
6167
    // init
6168
    $words = explode(' ', $str);
6169
    $newwords = array();
6170
6171
    if (count($exceptions) > 0) {
6172 1
      $useExceptions = true;
6173
    } else {
6174 1
      $useExceptions = false;
6175
    }
6176 1
6177 1
    foreach ($words as $word) {
6178
      if (
6179
          ($useExceptions === false)
6180 1
          ||
6181
          (
6182 1
              $useExceptions === true
6183
              &&
6184 1
              !in_array($word, $exceptions, true)
6185 1
          )
6186 1
      ) {
6187 1
        $word = self::ucfirst($word);
6188
      }
6189 1
      $newwords[] = $word;
6190 1
    }
6191 1
6192
    return self::ucfirst(implode(' ', $newwords));
6193 1
  }
6194
6195
  /**
6196
   * Multi decode html entity & fix urlencoded-win1252-chars.
6197
   *
6198
   * e.g:
6199
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6200
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6201
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6202
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6203
   * 'Düsseldorf'                   => 'Düsseldorf'
6204
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6205
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6206
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6207
   *
6208
   * @param string $str
6209
   *
6210
   * @return string
6211
   */
6212
  public static function urldecode($str)
6213
  {
6214
    $str = (string)$str;
6215
6216
    if (!isset($str[0])) {
6217
      return '';
6218
    }
6219
6220
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6221
6222
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6223
6224
    $str = self::fix_simple_utf8(
6225
        rawurldecode(
6226
            self::html_entity_decode(
6227
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6228
                $flags
6229
            )
6230
        )
6231
    );
6232
6233
    return (string)$str;
6234
  }
6235
6236
  /**
6237
   * Return a array with "urlencoded"-win1252 -> UTF-8
6238
   *
6239
   * @return mixed
6240
   */
6241
  protected static function urldecode_fix_win1252_chars()
6242
  {
6243
    static $array = array(
6244
        '%20' => ' ',
6245
        '%21' => '!',
6246
        '%22' => '"',
6247
        '%23' => '#',
6248
        '%24' => '$',
6249
        '%25' => '%',
6250
        '%26' => '&',
6251
        '%27' => "'",
6252
        '%28' => '(',
6253
        '%29' => ')',
6254
        '%2A' => '*',
6255
        '%2B' => '+',
6256
        '%2C' => ',',
6257
        '%2D' => '-',
6258
        '%2E' => '.',
6259
        '%2F' => '/',
6260
        '%30' => '0',
6261
        '%31' => '1',
6262
        '%32' => '2',
6263
        '%33' => '3',
6264
        '%34' => '4',
6265
        '%35' => '5',
6266
        '%36' => '6',
6267
        '%37' => '7',
6268
        '%38' => '8',
6269
        '%39' => '9',
6270
        '%3A' => ':',
6271
        '%3B' => ';',
6272
        '%3C' => '<',
6273
        '%3D' => '=',
6274
        '%3E' => '>',
6275
        '%3F' => '?',
6276
        '%40' => '@',
6277
        '%41' => 'A',
6278
        '%42' => 'B',
6279
        '%43' => 'C',
6280
        '%44' => 'D',
6281
        '%45' => 'E',
6282
        '%46' => 'F',
6283
        '%47' => 'G',
6284
        '%48' => 'H',
6285
        '%49' => 'I',
6286
        '%4A' => 'J',
6287
        '%4B' => 'K',
6288
        '%4C' => 'L',
6289
        '%4D' => 'M',
6290
        '%4E' => 'N',
6291
        '%4F' => 'O',
6292
        '%50' => 'P',
6293
        '%51' => 'Q',
6294
        '%52' => 'R',
6295
        '%53' => 'S',
6296
        '%54' => 'T',
6297
        '%55' => 'U',
6298
        '%56' => 'V',
6299
        '%57' => 'W',
6300
        '%58' => 'X',
6301
        '%59' => 'Y',
6302
        '%5A' => 'Z',
6303
        '%5B' => '[',
6304
        '%5C' => '\\',
6305
        '%5D' => ']',
6306
        '%5E' => '^',
6307
        '%5F' => '_',
6308
        '%60' => '`',
6309
        '%61' => 'a',
6310
        '%62' => 'b',
6311
        '%63' => 'c',
6312
        '%64' => 'd',
6313
        '%65' => 'e',
6314
        '%66' => 'f',
6315
        '%67' => 'g',
6316
        '%68' => 'h',
6317
        '%69' => 'i',
6318
        '%6A' => 'j',
6319
        '%6B' => 'k',
6320
        '%6C' => 'l',
6321
        '%6D' => 'm',
6322
        '%6E' => 'n',
6323
        '%6F' => 'o',
6324
        '%70' => 'p',
6325
        '%71' => 'q',
6326
        '%72' => 'r',
6327
        '%73' => 's',
6328
        '%74' => 't',
6329
        '%75' => 'u',
6330
        '%76' => 'v',
6331
        '%77' => 'w',
6332
        '%78' => 'x',
6333
        '%79' => 'y',
6334
        '%7A' => 'z',
6335
        '%7B' => '{',
6336
        '%7C' => '|',
6337
        '%7D' => '}',
6338
        '%7E' => '~',
6339
        '%7F' => '',
6340
        '%80' => '`',
6341
        '%81' => '',
6342
        '%82' => '‚',
6343
        '%83' => 'ƒ',
6344
        '%84' => '„',
6345
        '%85' => '…',
6346
        '%86' => '†',
6347
        '%87' => '‡',
6348
        '%88' => 'ˆ',
6349
        '%89' => '‰',
6350
        '%8A' => 'Š',
6351
        '%8B' => '‹',
6352
        '%8C' => 'Œ',
6353
        '%8D' => '',
6354
        '%8E' => 'Ž',
6355
        '%8F' => '',
6356
        '%90' => '',
6357
        '%91' => '‘',
6358
        '%92' => '’',
6359
        '%93' => '“',
6360
        '%94' => '”',
6361
        '%95' => '•',
6362
        '%96' => '–',
6363
        '%97' => '—',
6364
        '%98' => '˜',
6365
        '%99' => '™',
6366
        '%9A' => 'š',
6367
        '%9B' => '›',
6368
        '%9C' => 'œ',
6369
        '%9D' => '',
6370
        '%9E' => 'ž',
6371
        '%9F' => 'Ÿ',
6372
        '%A0' => '',
6373
        '%A1' => '¡',
6374
        '%A2' => '¢',
6375
        '%A3' => '£',
6376
        '%A4' => '¤',
6377
        '%A5' => '¥',
6378
        '%A6' => '¦',
6379
        '%A7' => '§',
6380
        '%A8' => '¨',
6381
        '%A9' => '©',
6382
        '%AA' => 'ª',
6383
        '%AB' => '«',
6384
        '%AC' => '¬',
6385
        '%AD' => '',
6386
        '%AE' => '®',
6387
        '%AF' => '¯',
6388
        '%B0' => '°',
6389
        '%B1' => '±',
6390
        '%B2' => '²',
6391
        '%B3' => '³',
6392
        '%B4' => '´',
6393
        '%B5' => 'µ',
6394
        '%B6' => '¶',
6395
        '%B7' => '·',
6396
        '%B8' => '¸',
6397
        '%B9' => '¹',
6398
        '%BA' => 'º',
6399
        '%BB' => '»',
6400
        '%BC' => '¼',
6401
        '%BD' => '½',
6402
        '%BE' => '¾',
6403
        '%BF' => '¿',
6404
        '%C0' => 'À',
6405
        '%C1' => 'Á',
6406
        '%C2' => 'Â',
6407
        '%C3' => 'Ã',
6408
        '%C4' => 'Ä',
6409
        '%C5' => 'Å',
6410
        '%C6' => 'Æ',
6411
        '%C7' => 'Ç',
6412
        '%C8' => 'È',
6413
        '%C9' => 'É',
6414
        '%CA' => 'Ê',
6415
        '%CB' => 'Ë',
6416
        '%CC' => 'Ì',
6417
        '%CD' => 'Í',
6418
        '%CE' => 'Î',
6419
        '%CF' => 'Ï',
6420
        '%D0' => 'Ð',
6421
        '%D1' => 'Ñ',
6422
        '%D2' => 'Ò',
6423
        '%D3' => 'Ó',
6424
        '%D4' => 'Ô',
6425
        '%D5' => 'Õ',
6426
        '%D6' => 'Ö',
6427
        '%D7' => '×',
6428
        '%D8' => 'Ø',
6429
        '%D9' => 'Ù',
6430
        '%DA' => 'Ú',
6431
        '%DB' => 'Û',
6432
        '%DC' => 'Ü',
6433
        '%DD' => 'Ý',
6434
        '%DE' => 'Þ',
6435
        '%DF' => 'ß',
6436
        '%E0' => 'à',
6437
        '%E1' => 'á',
6438
        '%E2' => 'â',
6439
        '%E3' => 'ã',
6440 6
        '%E4' => 'ä',
6441
        '%E5' => 'å',
6442 6
        '%E6' => 'æ',
6443 6
        '%E7' => 'ç',
6444
        '%E8' => 'è',
6445 6
        '%E9' => 'é',
6446
        '%EA' => 'ê',
6447 6
        '%EB' => 'ë',
6448 5
        '%EC' => 'ì',
6449
        '%ED' => 'í',
6450
        '%EE' => 'î',
6451
        '%EF' => 'ï',
6452 6
        '%F0' => 'ð',
6453
        '%F1' => 'ñ',
6454 6
        '%F2' => 'ò',
6455
        '%F3' => 'ó',
6456 6
        '%F4' => 'ô',
6457 1
        '%F5' => 'õ',
6458 1
        '%F6' => 'ö',
6459 1
        '%F7' => '÷',
6460
        '%F8' => 'ø',
6461 6
        '%F9' => 'ù',
6462
        '%FA' => 'ú',
6463
        '%FB' => 'û',
6464
        '%FC' => 'ü',
6465
        '%FD' => 'ý',
6466
        '%FE' => 'þ',
6467
        '%FF' => 'ÿ',
6468
    );
6469
6470
    return $array;
6471 6
  }
6472
6473 6
  /**
6474
   * Decodes an UTF-8 string to ISO-8859-1.
6475 6
   *
6476 6
   * @param string $str
6477
   *
6478
   * @return string
6479 5
   */
6480 5
  public static function utf8_decode($str)
6481
  {
6482 5
    static $utf8ToWin1252Keys = null;
6483 1
    static $utf8ToWin1252Values = null;
6484 1
6485 1
    $str = (string)$str;
6486
6487 5
    if (!isset($str[0])) {
6488
      return '';
6489
    }
6490
6491
    // init
6492
    self::checkForSupport();
6493
6494
    $str = self::to_utf8($str);
6495
6496
    if ($utf8ToWin1252Keys === null) {
6497
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6498
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6499
    }
6500
6501
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6502
  }
6503
6504
  /**
6505
   * Encodes an ISO-8859-1 string to UTF-8.
6506
   *
6507
   * @param string $str
6508
   *
6509
   * @return string
6510
   */
6511
  public static function utf8_encode($str)
6512
  {
6513
    $str = utf8_encode($str);
6514
6515
    if (false === strpos($str, "\xC2")) {
6516
      return $str;
6517
    } else {
6518
6519 1
      static $cp1252ToUtf8Keys = null;
6520
      static $cp1252ToUtf8Values = null;
6521 1
6522
      if ($cp1252ToUtf8Keys === null) {
6523
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6524
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6525
      }
6526
6527
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6528
    }
6529
  }
6530
6531
  /**
6532
   * fix -> utf8-win1252 chars
6533 1
   *
6534
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
6535 1
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6536
   * See: http://en.wikipedia.org/wiki/Windows-1252
6537
   *
6538
   * @deprecated use "UTF8::fix_simple_utf8()"
6539 1
   *
6540
   * @param   string $str
6541 1
   *
6542
   * @return  string
6543
   */
6544 1
  public static function utf8_fix_win1252_chars($str)
6545 1
  {
6546 1
    return self::fix_simple_utf8($str);
6547 1
  }
6548 1
6549
  /**
6550
   * Returns an array with all utf8 whitespace characters.
6551 1
   *
6552
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6553
   *
6554
   * @author: Derek E. [email protected]
6555
   *
6556
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6557
   *         as defined in above URL
6558
   */
6559
  public static function whitespace_table()
6560
  {
6561
    return self::$whitespaceTable;
6562
  }
6563
6564 4
  /**
6565
   * Limit the number of words in a string.
6566 4
   *
6567
   * @param  string $str
6568
   * @param  int    $words
6569
   * @param  string $strAddOn
6570 4
   *
6571 4
   * @return string
6572 4
   */
6573
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6574 4
  {
6575 4
    if (!isset($str[0])) {
6576 4
      return '';
6577 4
    }
6578
6579 4
    $words = (int)$words;
6580
6581
    if ($words < 1) {
6582
      return '';
6583
    }
6584 4
6585
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6586 4
6587
    if (
6588
        !isset($matches[0])
6589
        ||
6590
        self::strlen($str) === self::strlen($matches[0])
6591 4
    ) {
6592 4
      return $str;
6593
    }
6594 4
6595 4
    return self::rtrim($matches[0]) . $strAddOn;
6596 4
  }
6597 4
6598 4
  /**
6599
   * Wraps a string to a given number of characters.
6600 4
   *
6601 4
   * @param string $str
6602 4
   * @param int    $width
6603 4
   * @param string $break
6604
   * @param bool   $cut
6605 4
   *
6606 3
   * @return false|string Returns the given string wrapped at the specified length.
6607 3
   */
6608 3
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6609 3
  {
6610
    if (false === wordwrap('-', $width, $break, $cut)) {
6611 3
      return false;
6612
    }
6613
6614
    if (is_string($break)) {
6615 3
      $break = (string)$break;
6616 3
    }
6617
6618 4
    $w = '';
6619
    $strSplit = explode($break, $str);
6620
    $count = count($strSplit);
6621
6622
    if (1 === $count && '' === $strSplit[0]) {
6623
      return '';
6624
    }
6625
6626
    $chars = array();
6627
    /** @noinspection ForeachInvariantsInspection */
6628
    for ($i = 0; $i < $count; ++$i) {
6629
6630
      if ($i) {
6631
        $chars[] = $break;
6632
        $w .= '#';
6633
      }
6634
6635
      $c = $strSplit[$i];
6636
      unset($strSplit[$i]);
6637
6638
      foreach (self::split($c) as $c) {
6639
        $chars[] = $c;
6640
        $w .= ' ' === $c ? ' ' : '?';
6641
      }
6642
    }
6643
6644
    $strReturn = '';
6645
    $j = 0;
6646
    $b = $i = -1;
6647
    $w = wordwrap($w, $width, '#', $cut);
6648
6649
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6650
      for (++$i; $i < $b; ++$i) {
6651
        $strReturn .= $chars[$j];
6652
        unset($chars[$j++]);
6653
      }
6654
6655
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6656
        unset($chars[$j++]);
6657
      }
6658
6659
      $strReturn .= $break;
6660
    }
6661
6662
    return $strReturn . implode('', $chars);
6663
  }
6664
6665
  /**
6666
   * Returns an array of Unicode White Space characters.
6667
   *
6668
   * @return   array An array with numeric code point as key and White Space Character as value.
6669
   */
6670
  public static function ws()
6671
  {
6672
    return self::$whitespace;
6673
  }
6674
6675
}
6676