Completed
Push — master ( bfffa5...8976d1 )
by Lars
03:17
created

UTF8::encode()   C

Complexity

Conditions 15
Paths 6

Size

Total Lines 61
Code Lines 36

Duplication

Lines 23
Ratio 37.7 %

Code Coverage

Tests 4
CRAP Score 16.7999

Importance

Changes 4
Bugs 0 Features 1
Metric Value
c 4
b 0
f 1
dl 23
loc 61
ccs 4
cts 5
cp 0.8
rs 6.2274
cc 15
eloc 36
nc 6
nop 3
crap 16.7999

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Numeric code point => UTF-8 Character
83
   *
84
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
85
   *
86
   * @var array
87
   */
88
  protected static $whitespace = array(
89
    // NUL Byte
90
    0     => "\x0",
91
    // Tab
92
    9     => "\x9",
93
    // New Line
94
    10    => "\xa",
95
    // Vertical Tab
96
    11    => "\xb",
97
    // Carriage Return
98
    13    => "\xd",
99
    // Ordinary Space
100
    32    => "\x20",
101
    // NO-BREAK SPACE
102
    160   => "\xc2\xa0",
103
    // OGHAM SPACE MARK
104
    5760  => "\xe1\x9a\x80",
105
    // MONGOLIAN VOWEL SEPARATOR
106
    6158  => "\xe1\xa0\x8e",
107
    // EN QUAD
108
    8192  => "\xe2\x80\x80",
109
    // EM QUAD
110
    8193  => "\xe2\x80\x81",
111
    // EN SPACE
112
    8194  => "\xe2\x80\x82",
113
    // EM SPACE
114
    8195  => "\xe2\x80\x83",
115
    // THREE-PER-EM SPACE
116
    8196  => "\xe2\x80\x84",
117
    // FOUR-PER-EM SPACE
118
    8197  => "\xe2\x80\x85",
119
    // SIX-PER-EM SPACE
120
    8198  => "\xe2\x80\x86",
121
    // FIGURE SPACE
122
    8199  => "\xe2\x80\x87",
123
    // PUNCTUATION SPACE
124
    8200  => "\xe2\x80\x88",
125
    // THIN SPACE
126
    8201  => "\xe2\x80\x89",
127
    //HAIR SPACE
128
    8202  => "\xe2\x80\x8a",
129
    // LINE SEPARATOR
130
    8232  => "\xe2\x80\xa8",
131
    // PARAGRAPH SEPARATOR
132
    8233  => "\xe2\x80\xa9",
133
    // NARROW NO-BREAK SPACE
134
    8239  => "\xe2\x80\xaf",
135
    // MEDIUM MATHEMATICAL SPACE
136
    8287  => "\xe2\x81\x9f",
137
    // IDEOGRAPHIC SPACE
138
    12288 => "\xe3\x80\x80",
139
  );
140
141
  /**
142
   * @var array
143
   */
144
  protected static $whitespaceTable = array(
145
      'SPACE'                     => "\x20",
146
      'NO-BREAK SPACE'            => "\xc2\xa0",
147
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
148
      'EN QUAD'                   => "\xe2\x80\x80",
149
      'EM QUAD'                   => "\xe2\x80\x81",
150
      'EN SPACE'                  => "\xe2\x80\x82",
151
      'EM SPACE'                  => "\xe2\x80\x83",
152
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
153
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
154
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
155
      'FIGURE SPACE'              => "\xe2\x80\x87",
156
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
157
      'THIN SPACE'                => "\xe2\x80\x89",
158
      'HAIR SPACE'                => "\xe2\x80\x8a",
159
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
160
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
161
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
162
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
163
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
164
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
165
  );
166
167
  /**
168
   * bidirectional text chars
169
   *
170
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
171
   *
172
   * @var array
173
   */
174
  protected static $bidiUniCodeControlsTable = array(
175
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
176
    8234 => "\xE2\x80\xAA",
177
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
178
    8235 => "\xE2\x80\xAB",
179
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
180
    8236 => "\xE2\x80\xAC",
181
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
182
    8237 => "\xE2\x80\xAD",
183
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
184
    8238 => "\xE2\x80\xAE",
185
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
186
    8294 => "\xE2\x81\xA6",
187
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
188
    8295 => "\xE2\x81\xA7",
189
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
190
    8296 => "\xE2\x81\xA8",
191
    // POP DIRECTIONAL ISOLATE
192
    8297 => "\xE2\x81\xA9",
193
  );
194
195
  /**
196
   * @var array
197
   */
198
  protected static $commonCaseFold = array(
199
      'ſ'            => 's',
200
      "\xCD\x85"     => 'ι',
201
      'ς'            => 'σ',
202
      "\xCF\x90"     => 'β',
203
      "\xCF\x91"     => 'θ',
204
      "\xCF\x95"     => 'φ',
205
      "\xCF\x96"     => 'π',
206
      "\xCF\xB0"     => 'κ',
207
      "\xCF\xB1"     => 'ρ',
208
      "\xCF\xB5"     => 'ε',
209
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
210
      "\xE1\xBE\xBE" => 'ι',
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $brokenUtf8ToUtf8 = array(
217
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
218
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
219
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
220
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
221
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
222
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
223
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
224
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
225
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
226
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
227
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
228
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
229
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
230
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
231
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
232
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
233
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
234
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
235
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
236
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
237
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
238
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
239
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
240
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
241
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
242
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
243
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
244
      'ü'       => 'ü',
245
      'ä'       => 'ä',
246
      'ö'       => 'ö',
247
      'Ö'       => 'Ö',
248
      'ß'       => 'ß',
249
      'Ã '       => 'à',
250
      'á'       => 'á',
251
      'â'       => 'â',
252
      'ã'       => 'ã',
253
      'ù'       => 'ù',
254
      'ú'       => 'ú',
255
      'û'       => 'û',
256
      'Ù'       => 'Ù',
257
      'Ú'       => 'Ú',
258
      'Û'       => 'Û',
259
      'Ü'       => 'Ü',
260
      'ò'       => 'ò',
261
      'ó'       => 'ó',
262
      'ô'       => 'ô',
263
      'è'       => 'è',
264
      'é'       => 'é',
265
      'ê'       => 'ê',
266
      'ë'       => 'ë',
267
      'À'       => 'À',
268
      'Á'       => 'Á',
269
      'Â'       => 'Â',
270
      'Ã'       => 'Ã',
271
      'Ä'       => 'Ä',
272
      'Ã…'       => 'Å',
273
      'Ç'       => 'Ç',
274
      'È'       => 'È',
275
      'É'       => 'É',
276
      'Ê'       => 'Ê',
277
      'Ë'       => 'Ë',
278
      'ÃŒ'       => 'Ì',
279
      'Í'       => 'Í',
280
      'ÃŽ'       => 'Î',
281
      'Ï'       => 'Ï',
282
      'Ñ'       => 'Ñ',
283
      'Ã’'       => 'Ò',
284
      'Ó'       => 'Ó',
285
      'Ô'       => 'Ô',
286
      'Õ'       => 'Õ',
287
      'Ø'       => 'Ø',
288
      'Ã¥'       => 'å',
289
      'æ'       => 'æ',
290
      'ç'       => 'ç',
291
      'ì'       => 'ì',
292
      'í'       => 'í',
293
      'î'       => 'î',
294
      'ï'       => 'ï',
295
      'ð'       => 'ð',
296
      'ñ'       => 'ñ',
297
      'õ'       => 'õ',
298
      'ø'       => 'ø',
299
      'ý'       => 'ý',
300
      'ÿ'       => 'ÿ',
301
      '€'      => '€',
302
  );
303
304
  /**
305
   * @var array
306
   */
307
  protected static $utf8ToWin1252 = array(
308
      "\xe2\x82\xac" => "\x80", // EURO SIGN
309
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
310
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
311
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
312
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
313
      "\xe2\x80\xa0" => "\x86", // DAGGER
314
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
315
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
316
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
317
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
318
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
319
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
320
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
321
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
322
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
323
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
324
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
325
      "\xe2\x80\xa2" => "\x95", // BULLET
326
      "\xe2\x80\x93" => "\x96", // EN DASH
327
      "\xe2\x80\x94" => "\x97", // EM DASH
328
      "\xcb\x9c"     => "\x98", // SMALL TILDE
329
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
330
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
331
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
332
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
333
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
334
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
335
  );
336
337
  /**
338
   * @var array
339
   */
340
  protected static $utf8MSWord = array(
341
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
342
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
343
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
344
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
345
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
346
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
347
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
348
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
349
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
350
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
351
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
352
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
353
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
354
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
355
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
356
  );
357
358
  protected static $iconvEncoding = array(
359
      'ANSI_X3.4-1968',
360
      'ANSI_X3.4-1986',
361
      'ASCII',
362
      'CP367',
363
      'IBM367',
364
      'ISO-IR-6',
365
      'ISO646-US',
366
      'ISO_646.IRV:1991',
367
      'US',
368
      'US-ASCII',
369
      'CSASCII',
370
      'UTF-8',
371
      'ISO-10646-UCS-2',
372
      'UCS-2',
373
      'CSUNICODE',
374
      'UCS-2BE',
375
      'UNICODE-1-1',
376
      'UNICODEBIG',
377
      'CSUNICODE11',
378
      'UCS-2LE',
379
      'UNICODELITTLE',
380
      'ISO-10646-UCS-4',
381
      'UCS-4',
382
      'CSUCS4',
383
      'UCS-4BE',
384
      'UCS-4LE',
385
      'UTF-16',
386
      'UTF-16BE',
387
      'UTF-16LE',
388
      'UTF-32',
389
      'UTF-32BE',
390
      'UTF-32LE',
391
      'UNICODE-1-1-UTF-7',
392
      'UTF-7',
393
      'CSUNICODE11UTF7',
394
      'UCS-2-INTERNAL',
395
      'UCS-2-SWAPPED',
396
      'UCS-4-INTERNAL',
397
      'UCS-4-SWAPPED',
398
      'C99',
399
      'JAVA',
400
      'CP819',
401
      'IBM819',
402
      'ISO-8859-1',
403
      'ISO-IR-100',
404
      'ISO8859-1',
405
      'ISO_8859-1',
406
      'ISO_8859-1:1987',
407
      'L1',
408
      'LATIN1',
409
      'CSISOLATIN1',
410
      'ISO-8859-2',
411
      'ISO-IR-101',
412
      'ISO8859-2',
413
      'ISO_8859-2',
414
      'ISO_8859-2:1987',
415
      'L2',
416
      'LATIN2',
417
      'CSISOLATIN2',
418
      'ISO-8859-3',
419
      'ISO-IR-109',
420
      'ISO8859-3',
421
      'ISO_8859-3',
422
      'ISO_8859-3:1988',
423
      'L3',
424
      'LATIN3',
425
      'CSISOLATIN3',
426
      'ISO-8859-4',
427
      'ISO-IR-110',
428
      'ISO8859-4',
429
      'ISO_8859-4',
430
      'ISO_8859-4:1988',
431
      'L4',
432
      'LATIN4',
433
      'CSISOLATIN4',
434
      'CYRILLIC',
435
      'ISO-8859-5',
436
      'ISO-IR-144',
437
      'ISO8859-5',
438
      'ISO_8859-5',
439
      'ISO_8859-5:1988',
440
      'CSISOLATINCYRILLIC',
441
      'ARABIC',
442
      'ASMO-708',
443
      'ECMA-114',
444
      'ISO-8859-6',
445
      'ISO-IR-127',
446
      'ISO8859-6',
447
      'ISO_8859-6',
448
      'ISO_8859-6:1987',
449
      'CSISOLATINARABIC',
450
      'ECMA-118',
451
      'ELOT_928',
452
      'GREEK',
453
      'GREEK8',
454
      'ISO-8859-7',
455
      'ISO-IR-126',
456
      'ISO8859-7',
457
      'ISO_8859-7',
458
      'ISO_8859-7:1987',
459
      'ISO_8859-7:2003',
460
      'CSISOLATINGREEK',
461
      'HEBREW',
462
      'ISO-8859-8',
463
      'ISO-IR-138',
464
      'ISO8859-8',
465
      'ISO_8859-8',
466
      'ISO_8859-8:1988',
467
      'CSISOLATINHEBREW',
468
      'ISO-8859-9',
469
      'ISO-IR-148',
470
      'ISO8859-9',
471
      'ISO_8859-9',
472
      'ISO_8859-9:1989',
473
      'L5',
474
      'LATIN5',
475
      'CSISOLATIN5',
476
      'ISO-8859-10',
477
      'ISO-IR-157',
478
      'ISO8859-10',
479
      'ISO_8859-10',
480
      'ISO_8859-10:1992',
481
      'L6',
482
      'LATIN6',
483
      'CSISOLATIN6',
484
      'ISO-8859-11',
485
      'ISO8859-11',
486
      'ISO_8859-11',
487
      'ISO-8859-13',
488
      'ISO-IR-179',
489
      'ISO8859-13',
490
      'ISO_8859-13',
491
      'L7',
492
      'LATIN7',
493
      'ISO-8859-14',
494
      'ISO-CELTIC',
495
      'ISO-IR-199',
496
      'ISO8859-14',
497
      'ISO_8859-14',
498
      'ISO_8859-14:1998',
499
      'L8',
500
      'LATIN8',
501
      'ISO-8859-15',
502
      'ISO-IR-203',
503
      'ISO8859-15',
504
      'ISO_8859-15',
505
      'ISO_8859-15:1998',
506
      'LATIN-9',
507
      'ISO-8859-16',
508
      'ISO-IR-226',
509
      'ISO8859-16',
510
      'ISO_8859-16',
511
      'ISO_8859-16:2001',
512
      'L10',
513
      'LATIN10',
514
      'KOI8-R',
515
      'CSKOI8R',
516
      'KOI8-U',
517
      'KOI8-RU',
518
      'CP1250',
519
      'MS-EE',
520
      'WINDOWS-1250',
521
      'CP1251',
522
      'MS-CYRL',
523
      'WINDOWS-1251',
524
      'CP1252',
525
      'MS-ANSI',
526
      'WINDOWS-1252',
527
      'CP1253',
528
      'MS-GREEK',
529
      'WINDOWS-1253',
530
      'CP1254',
531
      'MS-TURK',
532
      'WINDOWS-1254',
533
      'CP1255',
534
      'MS-HEBR',
535
      'WINDOWS-1255',
536
      'CP1256',
537
      'MS-ARAB',
538
      'WINDOWS-1256',
539
      'CP1257',
540
      'WINBALTRIM',
541
      'WINDOWS-1257',
542
      'CP1258',
543
      'WINDOWS-1258',
544
      '850',
545
      'CP850',
546
      'IBM850',
547
      'CSPC850MULTILINGUAL',
548
      '862',
549
      'CP862',
550
      'IBM862',
551
      'CSPC862LATINHEBREW',
552
      '866',
553
      'CP866',
554
      'IBM866',
555
      'CSIBM866',
556
      'MAC',
557
      'MACINTOSH',
558
      'MACROMAN',
559
      'CSMACINTOSH',
560
      'MACCENTRALEUROPE',
561
      'MACICELAND',
562
      'MACCROATIAN',
563
      'MACROMANIA',
564
      'MACCYRILLIC',
565
      'MACUKRAINE',
566
      'MACGREEK',
567
      'MACTURKISH',
568
      'MACHEBREW',
569
      'MACARABIC',
570
      'MACTHAI',
571
      'HP-ROMAN8',
572
      'R8',
573
      'ROMAN8',
574
      'CSHPROMAN8',
575
      'NEXTSTEP',
576
      'ARMSCII-8',
577
      'GEORGIAN-ACADEMY',
578
      'GEORGIAN-PS',
579
      'KOI8-T',
580
      'CP154',
581
      'CYRILLIC-ASIAN',
582
      'PT154',
583
      'PTCP154',
584
      'CSPTCP154',
585
      'KZ-1048',
586
      'RK1048',
587
      'STRK1048-2002',
588
      'CSKZ1048',
589
      'MULELAO-1',
590
      'CP1133',
591
      'IBM-CP1133',
592
      'ISO-IR-166',
593
      'TIS-620',
594
      'TIS620',
595
      'TIS620-0',
596
      'TIS620.2529-1',
597
      'TIS620.2533-0',
598
      'TIS620.2533-1',
599
      'CP874',
600
      'WINDOWS-874',
601
      'VISCII',
602
      'VISCII1.1-1',
603
      'CSVISCII',
604
      'TCVN',
605
      'TCVN-5712',
606
      'TCVN5712-1',
607
      'TCVN5712-1:1993',
608
      'ISO-IR-14',
609
      'ISO646-JP',
610
      'JIS_C6220-1969-RO',
611
      'JP',
612
      'CSISO14JISC6220RO',
613
      'JISX0201-1976',
614
      'JIS_X0201',
615
      'X0201',
616
      'CSHALFWIDTHKATAKANA',
617
      'ISO-IR-87',
618
      'JIS0208',
619
      'JIS_C6226-1983',
620
      'JIS_X0208',
621
      'JIS_X0208-1983',
622
      'JIS_X0208-1990',
623
      'X0208',
624
      'CSISO87JISX0208',
625
      'ISO-IR-159',
626
      'JIS_X0212',
627
      'JIS_X0212-1990',
628
      'JIS_X0212.1990-0',
629
      'X0212',
630
      'CSISO159JISX02121990',
631
      'CN',
632
      'GB_1988-80',
633
      'ISO-IR-57',
634
      'ISO646-CN',
635
      'CSISO57GB1988',
636
      'CHINESE',
637
      'GB_2312-80',
638
      'ISO-IR-58',
639
      'CSISO58GB231280',
640
      'CN-GB-ISOIR165',
641
      'ISO-IR-165',
642
      'ISO-IR-149',
643
      'KOREAN',
644
      'KSC_5601',
645
      'KS_C_5601-1987',
646
      'KS_C_5601-1989',
647
      'CSKSC56011987',
648
      'EUC-JP',
649
      'EUCJP',
650
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
651
      'CSEUCPKDFMTJAPANESE',
652
      'MS_KANJI',
653
      'SHIFT-JIS',
654
      'SHIFT_JIS',
655
      'SJIS',
656
      'CSSHIFTJIS',
657
      'CP932',
658
      'ISO-2022-JP',
659
      'CSISO2022JP',
660
      'ISO-2022-JP-1',
661
      'ISO-2022-JP-2',
662
      'CSISO2022JP2',
663
      'CN-GB',
664
      'EUC-CN',
665
      'EUCCN',
666
      'GB2312',
667
      'CSGB2312',
668
      'GBK',
669
      'CP936',
670
      'MS936',
671
      'WINDOWS-936',
672
      'GB18030',
673
      'ISO-2022-CN',
674
      'CSISO2022CN',
675
      'ISO-2022-CN-EXT',
676
      'HZ',
677
      'HZ-GB-2312',
678
      'EUC-TW',
679
      'EUCTW',
680
      'CSEUCTW',
681
      'BIG-5',
682
      'BIG-FIVE',
683
      'BIG5',
684
      'BIGFIVE',
685
      'CN-BIG5',
686
      'CSBIG5',
687
      'CP950',
688
      'BIG5-HKSCS:1999',
689
      'BIG5-HKSCS:2001',
690
      'BIG5-HKSCS',
691
      'BIG5-HKSCS:2004',
692
      'BIG5HKSCS',
693
      'EUC-KR',
694
      'EUCKR',
695
      'CSEUCKR',
696
      'CP949',
697
      'UHC',
698
      'CP1361',
699
      'JOHAB',
700
      'ISO-2022-KR',
701
      'CSISO2022KR',
702
      'CP856',
703
      'CP922',
704
      'CP943',
705
      'CP1046',
706
      'CP1124',
707
      'CP1129',
708
      'CP1161',
709
      'IBM-1161',
710
      'IBM1161',
711
      'CSIBM1161',
712
      'CP1162',
713
      'IBM-1162',
714
      'IBM1162',
715
      'CSIBM1162',
716
      'CP1163',
717
      'IBM-1163',
718
      'IBM1163',
719
      'CSIBM1163',
720
      'DEC-KANJI',
721
      'DEC-HANYU',
722
      '437',
723
      'CP437',
724
      'IBM437',
725
      'CSPC8CODEPAGE437',
726
      'CP737',
727
      'CP775',
728
      'IBM775',
729
      'CSPC775BALTIC',
730
      '852',
731
      'CP852',
732
      'IBM852',
733
      'CSPCP852',
734
      'CP853',
735
      '855',
736
      'CP855',
737
      'IBM855',
738
      'CSIBM855',
739
      '857',
740
      'CP857',
741
      'IBM857',
742
      'CSIBM857',
743
      'CP858',
744
      '860',
745
      'CP860',
746
      'IBM860',
747
      'CSIBM860',
748
      '861',
749
      'CP-IS',
750
      'CP861',
751
      'IBM861',
752
      'CSIBM861',
753
      '863',
754
      'CP863',
755
      'IBM863',
756
      'CSIBM863',
757
      'CP864',
758
      'IBM864',
759
      'CSIBM864',
760
      '865',
761
      'CP865',
762
      'IBM865',
763
      'CSIBM865',
764
      '869',
765
      'CP-GR',
766
      'CP869',
767
      'IBM869',
768
      'CSIBM869',
769
      'CP1125',
770
      'EUC-JISX0213',
771
      'SHIFT_JISX0213',
772
      'ISO-2022-JP-3',
773
      'BIG5-2003',
774
      'ISO-IR-230',
775
      'TDS565',
776
      'ATARI',
777
      'ATARIST',
778
      'RISCOS-LATIN1',
779
  );
780
781
  /**
782
   * @var array
783
   */
784
  private static $support = array();
785
786
  /**
787
   * __construct()
788
   */
789
  public function __construct()
790 1
  {
791
    self::checkForSupport();
792 1
  }
793 1
794
  /**
795
   * Returns a single UTF-8 character from string.
796
   *
797
   * @param    string $str A UTF-8 string.
798
   * @param    int    $pos The position of character to return.
799
   *
800
   * @return   string Single Multi-Byte character.
801
   */
802
  public static function access($str, $pos)
803 1
  {
804
    // Return the character at the specified position: $str[1] like functionality.
805
806
    return self::substr($str, $pos, 1);
807 1
  }
808
809
  /**
810
   * Prepends BOM character to the string and returns the whole string.
811
   *
812
   * INFO: If BOM already existed there, the Input string is returned.
813
   *
814
   * @param    string $str The input string
815
   *
816
   * @return   string The output string that contains BOM
817
   */
818
  public static function add_bom_to_string($str)
819
  {
820
    if (!self::is_bom(substr($str, 0, 3))) {
821
      $str = self::bom() . $str;
822
    }
823
824
    return $str;
825
  }
826
827
  /**
828
   * Returns the Byte Order Mark Character.
829
   *
830
   * @return   string Byte Order Mark
831
   */
832
  public static function bom()
833 2
  {
834
    return "\xEF\xBB\xBF";
835 2
  }
836
837
  /**
838
   * @alias of UTF8::chr_map()
839
   *
840
   * @param $callback
841
   * @param $str
842
   *
843
   * @return array
844
   */
845
  public static function callback($callback, $str)
846 1
  {
847
    return self::chr_map($callback, $str);
848 1
  }
849
850
  /**
851
   * Returns an array of all lower and upper case UTF-8 encoded characters.
852
   *
853
   * @return   string An array with lower case chars as keys and upper chars as values.
854
   */
855
  protected static function case_table()
856
  {
857
    static $case = array(
858
859
      // lower => upper
860
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
861
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
862
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
863
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
864
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
865
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
866
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
867
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
868
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
869
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
870
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
871
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
872
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
873
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
874
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
875
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
876
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
877
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
878
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
879
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
880
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
881
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
882
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
883
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
884
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
885
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
886
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
887
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
888
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
889
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
890
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
891
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
892
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
893
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
894
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
895
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
896
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
897
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
898
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
899
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
900
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
901
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
902
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
903
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
904
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
905
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
906
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
907
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
908
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
909
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
910
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
911
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
912
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
913
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
914
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
915
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
916
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
917
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
918
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
919
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
920
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
921
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
922
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
923
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
924
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
925
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
926
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
927
      "\xea\x9e\x87"     => "\xea\x9e\x86",
928
      "\xea\x9e\x85"     => "\xea\x9e\x84",
929
      "\xea\x9e\x83"     => "\xea\x9e\x82",
930
      "\xea\x9e\x81"     => "\xea\x9e\x80",
931
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
932
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
933
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
934
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
935
      "\xea\x9d\xad"     => "\xea\x9d\xac",
936
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
937
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
938
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
939
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
940
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
941
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
942
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
943
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
944
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
945
      "\xea\x9d\x99"     => "\xea\x9d\x98",
946
      "\xea\x9d\x97"     => "\xea\x9d\x96",
947
      "\xea\x9d\x95"     => "\xea\x9d\x94",
948
      "\xea\x9d\x93"     => "\xea\x9d\x92",
949
      "\xea\x9d\x91"     => "\xea\x9d\x90",
950
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
951
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
952
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
953
      "\xea\x9d\x89"     => "\xea\x9d\x88",
954
      "\xea\x9d\x87"     => "\xea\x9d\x86",
955
      "\xea\x9d\x85"     => "\xea\x9d\x84",
956
      "\xea\x9d\x83"     => "\xea\x9d\x82",
957
      "\xea\x9d\x81"     => "\xea\x9d\x80",
958
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
959
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
960
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
961
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
962
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
963
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
964
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
965
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
966
      "\xea\x9c\xad"     => "\xea\x9c\xac",
967
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
968
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
969
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
970
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
971
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
972
      "\xea\x9a\x97"     => "\xea\x9a\x96",
973
      "\xea\x9a\x95"     => "\xea\x9a\x94",
974
      "\xea\x9a\x93"     => "\xea\x9a\x92",
975
      "\xea\x9a\x91"     => "\xea\x9a\x90",
976
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
977
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
978
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
979
      "\xea\x9a\x89"     => "\xea\x9a\x88",
980
      "\xea\x9a\x87"     => "\xea\x9a\x86",
981
      "\xea\x9a\x85"     => "\xea\x9a\x84",
982
      "\xea\x9a\x83"     => "\xea\x9a\x82",
983
      "\xea\x9a\x81"     => "\xea\x9a\x80",
984
      "\xea\x99\xad"     => "\xea\x99\xac",
985
      "\xea\x99\xab"     => "\xea\x99\xaa",
986
      "\xea\x99\xa9"     => "\xea\x99\xa8",
987
      "\xea\x99\xa7"     => "\xea\x99\xa6",
988
      "\xea\x99\xa5"     => "\xea\x99\xa4",
989
      "\xea\x99\xa3"     => "\xea\x99\xa2",
990
      "\xea\x99\x9f"     => "\xea\x99\x9e",
991
      "\xea\x99\x9d"     => "\xea\x99\x9c",
992
      "\xea\x99\x9b"     => "\xea\x99\x9a",
993
      "\xea\x99\x99"     => "\xea\x99\x98",
994
      "\xea\x99\x97"     => "\xea\x99\x96",
995
      "\xea\x99\x95"     => "\xea\x99\x94",
996
      "\xea\x99\x93"     => "\xea\x99\x92",
997
      "\xea\x99\x91"     => "\xea\x99\x90",
998
      "\xea\x99\x8f"     => "\xea\x99\x8e",
999
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1000
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1001
      "\xea\x99\x89"     => "\xea\x99\x88",
1002
      "\xea\x99\x87"     => "\xea\x99\x86",
1003
      "\xea\x99\x85"     => "\xea\x99\x84",
1004
      "\xea\x99\x83"     => "\xea\x99\x82",
1005
      "\xea\x99\x81"     => "\xea\x99\x80",
1006
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1007
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1008
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1009
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1010
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1011
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1012
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1013
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1014
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1015
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1016
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1017
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1018
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1019
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1020
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1021
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1022
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1023
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1024
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1025
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1026
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1027
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1028
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1029
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1030
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1031
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1032
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1033
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1034
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1035
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1036
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1037
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1038
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1039
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1040
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1041
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1042
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1043
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1044
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1045
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1046
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1047
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1048
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1049
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1050
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1051
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1052
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1053
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1054
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1055
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1056
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1057
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1058
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1059
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1060
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1061
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1062
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1063
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1064
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1065
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1066
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1067
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1068
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1069
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1070
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1071
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1072
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1073
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1074
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1075
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1076
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1077
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1078
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1079
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1080
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1081
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1082
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1083
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1084
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1085
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1086
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1087
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1088
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1089
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1090
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1091
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1092
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1093
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1094
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1095
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1096
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1097
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1098
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1099
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1100
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1101
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1102
      "\xe2\xb1\xa5"     => "\xc8\xba",
1103
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1104
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1105
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1106
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1107
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1108
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1109
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1110
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1111
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1112
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1113
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1114
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1115
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1116
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1117
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1118
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1119
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1120
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1121
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1122
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1123
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1124
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1125
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1126
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1127
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1128
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1129
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1130
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1131
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1132
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1133
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1134
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1135
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1136
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1137
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1138
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1139
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1140
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1141
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1142
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1143
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1144
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1145
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1146
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1147
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1148
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1149
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1150
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1151
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1152
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1153
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1154
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1155
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1156
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1157
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1158
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1159
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1160
      "\xe1\xbe\xbe"     => "\xce\x99",
1161
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1162
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1163
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1164
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1165
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1166
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1167
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1168
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1169
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1170
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1171
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1172
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1173
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1174
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1175
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1176
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1177
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1178
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1179
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1180
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1181
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1182
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1183
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1184
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1185
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1186
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1187
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1188
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1189
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1190
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1191
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1192
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1193
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1194
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1195
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1196
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1197
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1198
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1199
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1200
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1201
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1202
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1203
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1204
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1205
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1206
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1207
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1208
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1209
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1210
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1211
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1212
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1213
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1214
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1215
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1216
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1217
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1218
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1219
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1220
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1221
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1222
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1223
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1224
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1225
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1226
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1227
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1228
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1229
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1230
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1231
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1232
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1233
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1234
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1235
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1236
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1237
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1238
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1239
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1240
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1241
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1242
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1243
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1244
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1245
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1246
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1247
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1248
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1249
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1250
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1251
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1252
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1253
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1254
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1255
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1256
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1257
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1258
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1259
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1260
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1261
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1262
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1263
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1264
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1265
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1266
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1267
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1268
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1269
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1270
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1271
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1272
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1273
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1274
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1275
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1276
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1277
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1278
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1279
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1280
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1281
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1282
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1283
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1284
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1285
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1286
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1287
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1288
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1289
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1290
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1291
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1292
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1293
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1294
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1295
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1296
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1297
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1298
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1299
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1300
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1301
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1302
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1303
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1304
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1305
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1306
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1307
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1308
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1309
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1310
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1311
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1312
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1313
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1314
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1315
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1316
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1317
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1318
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1319
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1320
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1321
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1322
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1323
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1324
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1325
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1326
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1327
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1328
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1329
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1330
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1331
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1332
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1333
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1334
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1335
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1336
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1337
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1338
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1339
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1340
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1341
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1342
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1343
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1344
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1345
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1346
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1347
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1348
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1349
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1350
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1351
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1352
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1353
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1354
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1355
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1356
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1357
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1358
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1359
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1360
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1361
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1362
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1363
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1364
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1365
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1366
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1367
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1368
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1369
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1370
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1371
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1372
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1373
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1374
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1375
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1376
      "\xd6\x86"         => "\xd5\x96",
1377
      "\xd6\x85"         => "\xd5\x95",
1378
      "\xd6\x84"         => "\xd5\x94",
1379
      "\xd6\x83"         => "\xd5\x93",
1380
      "\xd6\x82"         => "\xd5\x92",
1381
      "\xd6\x81"         => "\xd5\x91",
1382
      "\xd6\x80"         => "\xd5\x90",
1383
      "\xd5\xbf"         => "\xd5\x8f",
1384
      "\xd5\xbe"         => "\xd5\x8e",
1385
      "\xd5\xbd"         => "\xd5\x8d",
1386
      "\xd5\xbc"         => "\xd5\x8c",
1387
      "\xd5\xbb"         => "\xd5\x8b",
1388
      "\xd5\xba"         => "\xd5\x8a",
1389
      "\xd5\xb9"         => "\xd5\x89",
1390
      "\xd5\xb8"         => "\xd5\x88",
1391
      "\xd5\xb7"         => "\xd5\x87",
1392
      "\xd5\xb6"         => "\xd5\x86",
1393
      "\xd5\xb5"         => "\xd5\x85",
1394
      "\xd5\xb4"         => "\xd5\x84",
1395
      "\xd5\xb3"         => "\xd5\x83",
1396
      "\xd5\xb2"         => "\xd5\x82",
1397
      "\xd5\xb1"         => "\xd5\x81",
1398
      "\xd5\xb0"         => "\xd5\x80",
1399
      "\xd5\xaf"         => "\xd4\xbf",
1400
      "\xd5\xae"         => "\xd4\xbe",
1401
      "\xd5\xad"         => "\xd4\xbd",
1402
      "\xd5\xac"         => "\xd4\xbc",
1403
      "\xd5\xab"         => "\xd4\xbb",
1404
      "\xd5\xaa"         => "\xd4\xba",
1405
      "\xd5\xa9"         => "\xd4\xb9",
1406
      "\xd5\xa8"         => "\xd4\xb8",
1407
      "\xd5\xa7"         => "\xd4\xb7",
1408
      "\xd5\xa6"         => "\xd4\xb6",
1409
      "\xd5\xa5"         => "\xd4\xb5",
1410
      "\xd5\xa4"         => "\xd4\xb4",
1411
      "\xd5\xa3"         => "\xd4\xb3",
1412
      "\xd5\xa2"         => "\xd4\xb2",
1413
      "\xd5\xa1"         => "\xd4\xb1",
1414
      "\xd4\xa5"         => "\xd4\xa4",
1415
      "\xd4\xa3"         => "\xd4\xa2",
1416
      "\xd4\xa1"         => "\xd4\xa0",
1417
      "\xd4\x9f"         => "\xd4\x9e",
1418
      "\xd4\x9d"         => "\xd4\x9c",
1419
      "\xd4\x9b"         => "\xd4\x9a",
1420
      "\xd4\x99"         => "\xd4\x98",
1421
      "\xd4\x97"         => "\xd4\x96",
1422
      "\xd4\x95"         => "\xd4\x94",
1423
      "\xd4\x93"         => "\xd4\x92",
1424
      "\xd4\x91"         => "\xd4\x90",
1425
      "\xd4\x8f"         => "\xd4\x8e",
1426
      "\xd4\x8d"         => "\xd4\x8c",
1427
      "\xd4\x8b"         => "\xd4\x8a",
1428
      "\xd4\x89"         => "\xd4\x88",
1429
      "\xd4\x87"         => "\xd4\x86",
1430
      "\xd4\x85"         => "\xd4\x84",
1431
      "\xd4\x83"         => "\xd4\x82",
1432
      "\xd4\x81"         => "\xd4\x80",
1433
      "\xd3\xbf"         => "\xd3\xbe",
1434
      "\xd3\xbd"         => "\xd3\xbc",
1435
      "\xd3\xbb"         => "\xd3\xba",
1436
      "\xd3\xb9"         => "\xd3\xb8",
1437
      "\xd3\xb7"         => "\xd3\xb6",
1438
      "\xd3\xb5"         => "\xd3\xb4",
1439
      "\xd3\xb3"         => "\xd3\xb2",
1440
      "\xd3\xb1"         => "\xd3\xb0",
1441
      "\xd3\xaf"         => "\xd3\xae",
1442
      "\xd3\xad"         => "\xd3\xac",
1443
      "\xd3\xab"         => "\xd3\xaa",
1444
      "\xd3\xa9"         => "\xd3\xa8",
1445
      "\xd3\xa7"         => "\xd3\xa6",
1446
      "\xd3\xa5"         => "\xd3\xa4",
1447
      "\xd3\xa3"         => "\xd3\xa2",
1448
      "\xd3\xa1"         => "\xd3\xa0",
1449
      "\xd3\x9f"         => "\xd3\x9e",
1450
      "\xd3\x9d"         => "\xd3\x9c",
1451
      "\xd3\x9b"         => "\xd3\x9a",
1452
      "\xd3\x99"         => "\xd3\x98",
1453
      "\xd3\x97"         => "\xd3\x96",
1454
      "\xd3\x95"         => "\xd3\x94",
1455
      "\xd3\x93"         => "\xd3\x92",
1456
      "\xd3\x91"         => "\xd3\x90",
1457
      "\xd3\x8f"         => "\xd3\x80",
1458
      "\xd3\x8e"         => "\xd3\x8d",
1459
      "\xd3\x8c"         => "\xd3\x8b",
1460
      "\xd3\x8a"         => "\xd3\x89",
1461
      "\xd3\x88"         => "\xd3\x87",
1462
      "\xd3\x86"         => "\xd3\x85",
1463
      "\xd3\x84"         => "\xd3\x83",
1464
      "\xd3\x82"         => "\xd3\x81",
1465
      "\xd2\xbf"         => "\xd2\xbe",
1466
      "\xd2\xbd"         => "\xd2\xbc",
1467
      "\xd2\xbb"         => "\xd2\xba",
1468
      "\xd2\xb9"         => "\xd2\xb8",
1469
      "\xd2\xb7"         => "\xd2\xb6",
1470
      "\xd2\xb5"         => "\xd2\xb4",
1471
      "\xd2\xb3"         => "\xd2\xb2",
1472
      "\xd2\xb1"         => "\xd2\xb0",
1473
      "\xd2\xaf"         => "\xd2\xae",
1474
      "\xd2\xad"         => "\xd2\xac",
1475
      "\xd2\xab"         => "\xd2\xaa",
1476
      "\xd2\xa9"         => "\xd2\xa8",
1477
      "\xd2\xa7"         => "\xd2\xa6",
1478
      "\xd2\xa5"         => "\xd2\xa4",
1479
      "\xd2\xa3"         => "\xd2\xa2",
1480
      "\xd2\xa1"         => "\xd2\xa0",
1481
      "\xd2\x9f"         => "\xd2\x9e",
1482
      "\xd2\x9d"         => "\xd2\x9c",
1483
      "\xd2\x9b"         => "\xd2\x9a",
1484
      "\xd2\x99"         => "\xd2\x98",
1485
      "\xd2\x97"         => "\xd2\x96",
1486
      "\xd2\x95"         => "\xd2\x94",
1487
      "\xd2\x93"         => "\xd2\x92",
1488
      "\xd2\x91"         => "\xd2\x90",
1489
      "\xd2\x8f"         => "\xd2\x8e",
1490
      "\xd2\x8d"         => "\xd2\x8c",
1491
      "\xd2\x8b"         => "\xd2\x8a",
1492
      "\xd2\x81"         => "\xd2\x80",
1493
      "\xd1\xbf"         => "\xd1\xbe",
1494
      "\xd1\xbd"         => "\xd1\xbc",
1495
      "\xd1\xbb"         => "\xd1\xba",
1496
      "\xd1\xb9"         => "\xd1\xb8",
1497
      "\xd1\xb7"         => "\xd1\xb6",
1498
      "\xd1\xb5"         => "\xd1\xb4",
1499
      "\xd1\xb3"         => "\xd1\xb2",
1500
      "\xd1\xb1"         => "\xd1\xb0",
1501
      "\xd1\xaf"         => "\xd1\xae",
1502
      "\xd1\xad"         => "\xd1\xac",
1503
      "\xd1\xab"         => "\xd1\xaa",
1504
      "\xd1\xa9"         => "\xd1\xa8",
1505
      "\xd1\xa7"         => "\xd1\xa6",
1506
      "\xd1\xa5"         => "\xd1\xa4",
1507
      "\xd1\xa3"         => "\xd1\xa2",
1508
      "\xd1\xa1"         => "\xd1\xa0",
1509
      "\xd1\x9f"         => "\xd0\x8f",
1510
      "\xd1\x9e"         => "\xd0\x8e",
1511
      "\xd1\x9d"         => "\xd0\x8d",
1512
      "\xd1\x9c"         => "\xd0\x8c",
1513
      "\xd1\x9b"         => "\xd0\x8b",
1514
      "\xd1\x9a"         => "\xd0\x8a",
1515
      "\xd1\x99"         => "\xd0\x89",
1516
      "\xd1\x98"         => "\xd0\x88",
1517
      "\xd1\x97"         => "\xd0\x87",
1518
      "\xd1\x96"         => "\xd0\x86",
1519
      "\xd1\x95"         => "\xd0\x85",
1520
      "\xd1\x94"         => "\xd0\x84",
1521
      "\xd1\x93"         => "\xd0\x83",
1522
      "\xd1\x92"         => "\xd0\x82",
1523
      "\xd1\x91"         => "\xd0\x81",
1524
      "\xd1\x90"         => "\xd0\x80",
1525
      "\xd1\x8f"         => "\xd0\xaf",
1526
      "\xd1\x8e"         => "\xd0\xae",
1527
      "\xd1\x8d"         => "\xd0\xad",
1528
      "\xd1\x8c"         => "\xd0\xac",
1529
      "\xd1\x8b"         => "\xd0\xab",
1530
      "\xd1\x8a"         => "\xd0\xaa",
1531
      "\xd1\x89"         => "\xd0\xa9",
1532
      "\xd1\x88"         => "\xd0\xa8",
1533
      "\xd1\x87"         => "\xd0\xa7",
1534
      "\xd1\x86"         => "\xd0\xa6",
1535
      "\xd1\x85"         => "\xd0\xa5",
1536
      "\xd1\x84"         => "\xd0\xa4",
1537
      "\xd1\x83"         => "\xd0\xa3",
1538
      "\xd1\x82"         => "\xd0\xa2",
1539
      "\xd1\x81"         => "\xd0\xa1",
1540
      "\xd1\x80"         => "\xd0\xa0",
1541
      "\xd0\xbf"         => "\xd0\x9f",
1542
      "\xd0\xbe"         => "\xd0\x9e",
1543
      "\xd0\xbd"         => "\xd0\x9d",
1544
      "\xd0\xbc"         => "\xd0\x9c",
1545
      "\xd0\xbb"         => "\xd0\x9b",
1546
      "\xd0\xba"         => "\xd0\x9a",
1547
      "\xd0\xb9"         => "\xd0\x99",
1548
      "\xd0\xb8"         => "\xd0\x98",
1549
      "\xd0\xb7"         => "\xd0\x97",
1550
      "\xd0\xb6"         => "\xd0\x96",
1551
      "\xd0\xb5"         => "\xd0\x95",
1552
      "\xd0\xb4"         => "\xd0\x94",
1553
      "\xd0\xb3"         => "\xd0\x93",
1554
      "\xd0\xb2"         => "\xd0\x92",
1555
      "\xd0\xb1"         => "\xd0\x91",
1556
      "\xd0\xb0"         => "\xd0\x90",
1557
      "\xcf\xbb"         => "\xcf\xba",
1558
      "\xcf\xb8"         => "\xcf\xb7",
1559
      "\xcf\xb5"         => "\xce\x95",
1560
      "\xcf\xb2"         => "\xcf\xb9",
1561
      "\xcf\xb1"         => "\xce\xa1",
1562
      "\xcf\xb0"         => "\xce\x9a",
1563
      "\xcf\xaf"         => "\xcf\xae",
1564
      "\xcf\xad"         => "\xcf\xac",
1565
      "\xcf\xab"         => "\xcf\xaa",
1566
      "\xcf\xa9"         => "\xcf\xa8",
1567
      "\xcf\xa7"         => "\xcf\xa6",
1568
      "\xcf\xa5"         => "\xcf\xa4",
1569
      "\xcf\xa3"         => "\xcf\xa2",
1570
      "\xcf\xa1"         => "\xcf\xa0",
1571
      "\xcf\x9f"         => "\xcf\x9e",
1572
      "\xcf\x9d"         => "\xcf\x9c",
1573
      "\xcf\x9b"         => "\xcf\x9a",
1574
      "\xcf\x99"         => "\xcf\x98",
1575
      "\xcf\x97"         => "\xcf\x8f",
1576
      "\xcf\x96"         => "\xce\xa0",
1577
      "\xcf\x95"         => "\xce\xa6",
1578
      "\xcf\x91"         => "\xce\x98",
1579
      "\xcf\x90"         => "\xce\x92",
1580
      "\xcf\x8e"         => "\xce\x8f",
1581
      "\xcf\x8d"         => "\xce\x8e",
1582
      "\xcf\x8c"         => "\xce\x8c",
1583
      "\xcf\x8b"         => "\xce\xab",
1584
      "\xcf\x8a"         => "\xce\xaa",
1585
      "\xcf\x89"         => "\xce\xa9",
1586
      "\xcf\x88"         => "\xce\xa8",
1587
      "\xcf\x87"         => "\xce\xa7",
1588
      "\xcf\x86"         => "\xce\xa6",
1589
      "\xcf\x85"         => "\xce\xa5",
1590
      "\xcf\x84"         => "\xce\xa4",
1591
      "\xcf\x83"         => "\xce\xa3",
1592
      "\xcf\x82"         => "\xce\xa3",
1593
      "\xcf\x81"         => "\xce\xa1",
1594
      "\xcf\x80"         => "\xce\xa0",
1595
      "\xce\xbf"         => "\xce\x9f",
1596
      "\xce\xbe"         => "\xce\x9e",
1597
      "\xce\xbd"         => "\xce\x9d",
1598
      "\xce\xbc"         => "\xce\x9c",
1599
      "\xce\xbb"         => "\xce\x9b",
1600
      "\xce\xba"         => "\xce\x9a",
1601
      "\xce\xb9"         => "\xce\x99",
1602
      "\xce\xb8"         => "\xce\x98",
1603
      "\xce\xb7"         => "\xce\x97",
1604
      "\xce\xb6"         => "\xce\x96",
1605
      "\xce\xb5"         => "\xce\x95",
1606
      "\xce\xb4"         => "\xce\x94",
1607
      "\xce\xb3"         => "\xce\x93",
1608
      "\xce\xb2"         => "\xce\x92",
1609
      "\xce\xb1"         => "\xce\x91",
1610
      "\xce\xaf"         => "\xce\x8a",
1611
      "\xce\xae"         => "\xce\x89",
1612
      "\xce\xad"         => "\xce\x88",
1613
      "\xce\xac"         => "\xce\x86",
1614
      "\xcd\xbd"         => "\xcf\xbf",
1615
      "\xcd\xbc"         => "\xcf\xbe",
1616
      "\xcd\xbb"         => "\xcf\xbd",
1617
      "\xcd\xb7"         => "\xcd\xb6",
1618
      "\xcd\xb3"         => "\xcd\xb2",
1619
      "\xcd\xb1"         => "\xcd\xb0",
1620
      "\xca\x92"         => "\xc6\xb7",
1621
      "\xca\x8c"         => "\xc9\x85",
1622
      "\xca\x8b"         => "\xc6\xb2",
1623
      "\xca\x8a"         => "\xc6\xb1",
1624
      "\xca\x89"         => "\xc9\x84",
1625
      "\xca\x88"         => "\xc6\xae",
1626
      "\xca\x83"         => "\xc6\xa9",
1627
      "\xca\x80"         => "\xc6\xa6",
1628
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1629
      "\xc9\xb5"         => "\xc6\x9f",
1630
      "\xc9\xb2"         => "\xc6\x9d",
1631
      "\xc9\xb1"         => "\xe2\xb1\xae",
1632
      "\xc9\xaf"         => "\xc6\x9c",
1633
      "\xc9\xab"         => "\xe2\xb1\xa2",
1634
      "\xc9\xa9"         => "\xc6\x96",
1635
      "\xc9\xa8"         => "\xc6\x97",
1636
      "\xc9\xa5"         => "\xea\x9e\x8d",
1637
      "\xc9\xa3"         => "\xc6\x94",
1638
      "\xc9\xa0"         => "\xc6\x93",
1639
      "\xc9\x9b"         => "\xc6\x90",
1640
      "\xc9\x99"         => "\xc6\x8f",
1641
      "\xc9\x97"         => "\xc6\x8a",
1642
      "\xc9\x96"         => "\xc6\x89",
1643
      "\xc9\x94"         => "\xc6\x86",
1644
      "\xc9\x93"         => "\xc6\x81",
1645
      "\xc9\x92"         => "\xe2\xb1\xb0",
1646
      "\xc9\x91"         => "\xe2\xb1\xad",
1647
      "\xc9\x90"         => "\xe2\xb1\xaf",
1648
      "\xc9\x8f"         => "\xc9\x8e",
1649
      "\xc9\x8d"         => "\xc9\x8c",
1650
      "\xc9\x8b"         => "\xc9\x8a",
1651
      "\xc9\x89"         => "\xc9\x88",
1652
      "\xc9\x87"         => "\xc9\x86",
1653
      "\xc9\x82"         => "\xc9\x81",
1654
      "\xc9\x80"         => "\xe2\xb1\xbf",
1655
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1656
      "\xc8\xbc"         => "\xc8\xbb",
1657
      "\xc8\xb3"         => "\xc8\xb2",
1658
      "\xc8\xb1"         => "\xc8\xb0",
1659
      "\xc8\xaf"         => "\xc8\xae",
1660
      "\xc8\xad"         => "\xc8\xac",
1661
      "\xc8\xab"         => "\xc8\xaa",
1662
      "\xc8\xa9"         => "\xc8\xa8",
1663
      "\xc8\xa7"         => "\xc8\xa6",
1664
      "\xc8\xa5"         => "\xc8\xa4",
1665
      "\xc8\xa3"         => "\xc8\xa2",
1666
      "\xc8\x9f"         => "\xc8\x9e",
1667
      "\xc8\x9d"         => "\xc8\x9c",
1668
      "\xc8\x9b"         => "\xc8\x9a",
1669
      "\xc8\x99"         => "\xc8\x98",
1670
      "\xc8\x97"         => "\xc8\x96",
1671
      "\xc8\x95"         => "\xc8\x94",
1672
      "\xc8\x93"         => "\xc8\x92",
1673
      "\xc8\x91"         => "\xc8\x90",
1674
      "\xc8\x8f"         => "\xc8\x8e",
1675
      "\xc8\x8d"         => "\xc8\x8c",
1676
      "\xc8\x8b"         => "\xc8\x8a",
1677
      "\xc8\x89"         => "\xc8\x88",
1678
      "\xc8\x87"         => "\xc8\x86",
1679
      "\xc8\x85"         => "\xc8\x84",
1680
      "\xc8\x83"         => "\xc8\x82",
1681
      "\xc8\x81"         => "\xc8\x80",
1682
      "\xc7\xbf"         => "\xc7\xbe",
1683
      "\xc7\xbd"         => "\xc7\xbc",
1684
      "\xc7\xbb"         => "\xc7\xba",
1685
      "\xc7\xb9"         => "\xc7\xb8",
1686
      "\xc7\xb5"         => "\xc7\xb4",
1687
      "\xc7\xb3"         => "\xc7\xb2",
1688
      "\xc7\xaf"         => "\xc7\xae",
1689
      "\xc7\xad"         => "\xc7\xac",
1690
      "\xc7\xab"         => "\xc7\xaa",
1691
      "\xc7\xa9"         => "\xc7\xa8",
1692
      "\xc7\xa7"         => "\xc7\xa6",
1693
      "\xc7\xa5"         => "\xc7\xa4",
1694
      "\xc7\xa3"         => "\xc7\xa2",
1695
      "\xc7\xa1"         => "\xc7\xa0",
1696
      "\xc7\x9f"         => "\xc7\x9e",
1697
      "\xc7\x9d"         => "\xc6\x8e",
1698
      "\xc7\x9c"         => "\xc7\x9b",
1699
      "\xc7\x9a"         => "\xc7\x99",
1700
      "\xc7\x98"         => "\xc7\x97",
1701
      "\xc7\x96"         => "\xc7\x95",
1702
      "\xc7\x94"         => "\xc7\x93",
1703
      "\xc7\x92"         => "\xc7\x91",
1704
      "\xc7\x90"         => "\xc7\x8f",
1705
      "\xc7\x8e"         => "\xc7\x8d",
1706
      "\xc7\x8c"         => "\xc7\x8b",
1707
      "\xc7\x89"         => "\xc7\x88",
1708
      "\xc7\x86"         => "\xc7\x85",
1709
      "\xc6\xbf"         => "\xc7\xb7",
1710
      "\xc6\xbd"         => "\xc6\xbc",
1711
      "\xc6\xb9"         => "\xc6\xb8",
1712
      "\xc6\xb6"         => "\xc6\xb5",
1713
      "\xc6\xb4"         => "\xc6\xb3",
1714
      "\xc6\xb0"         => "\xc6\xaf",
1715
      "\xc6\xad"         => "\xc6\xac",
1716
      "\xc6\xa8"         => "\xc6\xa7",
1717
      "\xc6\xa5"         => "\xc6\xa4",
1718
      "\xc6\xa3"         => "\xc6\xa2",
1719
      "\xc6\xa1"         => "\xc6\xa0",
1720
      "\xc6\x9e"         => "\xc8\xa0",
1721
      "\xc6\x9a"         => "\xc8\xbd",
1722
      "\xc6\x99"         => "\xc6\x98",
1723
      "\xc6\x95"         => "\xc7\xb6",
1724
      "\xc6\x92"         => "\xc6\x91",
1725
      "\xc6\x8c"         => "\xc6\x8b",
1726
      "\xc6\x88"         => "\xc6\x87",
1727
      "\xc6\x85"         => "\xc6\x84",
1728
      "\xc6\x83"         => "\xc6\x82",
1729
      "\xc6\x80"         => "\xc9\x83",
1730
      "\xc5\xbf"         => "\x53",
1731
      "\xc5\xbe"         => "\xc5\xbd",
1732
      "\xc5\xbc"         => "\xc5\xbb",
1733
      "\xc5\xba"         => "\xc5\xb9",
1734
      "\xc5\xb7"         => "\xc5\xb6",
1735
      "\xc5\xb5"         => "\xc5\xb4",
1736
      "\xc5\xb3"         => "\xc5\xb2",
1737
      "\xc5\xb1"         => "\xc5\xb0",
1738
      "\xc5\xaf"         => "\xc5\xae",
1739
      "\xc5\xad"         => "\xc5\xac",
1740
      "\xc5\xab"         => "\xc5\xaa",
1741
      "\xc5\xa9"         => "\xc5\xa8",
1742
      "\xc5\xa7"         => "\xc5\xa6",
1743
      "\xc5\xa5"         => "\xc5\xa4",
1744
      "\xc5\xa3"         => "\xc5\xa2",
1745
      "\xc5\xa1"         => "\xc5\xa0",
1746
      "\xc5\x9f"         => "\xc5\x9e",
1747
      "\xc5\x9d"         => "\xc5\x9c",
1748
      "\xc5\x9b"         => "\xc5\x9a",
1749
      "\xc5\x99"         => "\xc5\x98",
1750
      "\xc5\x97"         => "\xc5\x96",
1751
      "\xc5\x95"         => "\xc5\x94",
1752
      "\xc5\x93"         => "\xc5\x92",
1753
      "\xc5\x91"         => "\xc5\x90",
1754
      "\xc5\x8f"         => "\xc5\x8e",
1755
      "\xc5\x8d"         => "\xc5\x8c",
1756
      "\xc5\x8b"         => "\xc5\x8a",
1757
      "\xc5\x88"         => "\xc5\x87",
1758
      "\xc5\x86"         => "\xc5\x85",
1759
      "\xc5\x84"         => "\xc5\x83",
1760
      "\xc5\x82"         => "\xc5\x81",
1761
      "\xc5\x80"         => "\xc4\xbf",
1762
      "\xc4\xbe"         => "\xc4\xbd",
1763
      "\xc4\xbc"         => "\xc4\xbb",
1764
      "\xc4\xba"         => "\xc4\xb9",
1765
      "\xc4\xb7"         => "\xc4\xb6",
1766
      "\xc4\xb5"         => "\xc4\xb4",
1767
      "\xc4\xb3"         => "\xc4\xb2",
1768
      "\xc4\xb1"         => "\x49",
1769
      "\xc4\xaf"         => "\xc4\xae",
1770
      "\xc4\xad"         => "\xc4\xac",
1771
      "\xc4\xab"         => "\xc4\xaa",
1772
      "\xc4\xa9"         => "\xc4\xa8",
1773
      "\xc4\xa7"         => "\xc4\xa6",
1774
      "\xc4\xa5"         => "\xc4\xa4",
1775
      "\xc4\xa3"         => "\xc4\xa2",
1776
      "\xc4\xa1"         => "\xc4\xa0",
1777
      "\xc4\x9f"         => "\xc4\x9e",
1778
      "\xc4\x9d"         => "\xc4\x9c",
1779
      "\xc4\x9b"         => "\xc4\x9a",
1780
      "\xc4\x99"         => "\xc4\x98",
1781
      "\xc4\x97"         => "\xc4\x96",
1782
      "\xc4\x95"         => "\xc4\x94",
1783
      "\xc4\x93"         => "\xc4\x92",
1784
      "\xc4\x91"         => "\xc4\x90",
1785
      "\xc4\x8f"         => "\xc4\x8e",
1786
      "\xc4\x8d"         => "\xc4\x8c",
1787
      "\xc4\x8b"         => "\xc4\x8a",
1788
      "\xc4\x89"         => "\xc4\x88",
1789
      "\xc4\x87"         => "\xc4\x86",
1790
      "\xc4\x85"         => "\xc4\x84",
1791
      "\xc4\x83"         => "\xc4\x82",
1792
      "\xc4\x81"         => "\xc4\x80",
1793
      "\xc3\xbf"         => "\xc5\xb8",
1794
      "\xc3\xbe"         => "\xc3\x9e",
1795
      "\xc3\xbd"         => "\xc3\x9d",
1796
      "\xc3\xbc"         => "\xc3\x9c",
1797
      "\xc3\xbb"         => "\xc3\x9b",
1798
      "\xc3\xba"         => "\xc3\x9a",
1799
      "\xc3\xb9"         => "\xc3\x99",
1800
      "\xc3\xb8"         => "\xc3\x98",
1801
      "\xc3\xb6"         => "\xc3\x96",
1802
      "\xc3\xb5"         => "\xc3\x95",
1803
      "\xc3\xb4"         => "\xc3\x94",
1804
      "\xc3\xb3"         => "\xc3\x93",
1805
      "\xc3\xb2"         => "\xc3\x92",
1806
      "\xc3\xb1"         => "\xc3\x91",
1807
      "\xc3\xb0"         => "\xc3\x90",
1808
      "\xc3\xaf"         => "\xc3\x8f",
1809
      "\xc3\xae"         => "\xc3\x8e",
1810
      "\xc3\xad"         => "\xc3\x8d",
1811
      "\xc3\xac"         => "\xc3\x8c",
1812
      "\xc3\xab"         => "\xc3\x8b",
1813
      "\xc3\xaa"         => "\xc3\x8a",
1814
      "\xc3\xa9"         => "\xc3\x89",
1815
      "\xc3\xa8"         => "\xc3\x88",
1816
      "\xc3\xa7"         => "\xc3\x87",
1817
      "\xc3\xa6"         => "\xc3\x86",
1818
      "\xc3\xa5"         => "\xc3\x85",
1819
      "\xc3\xa4"         => "\xc3\x84",
1820
      "\xc3\xa3"         => "\xc3\x83",
1821
      "\xc3\xa2"         => "\xc3\x82",
1822
      "\xc3\xa1"         => "\xc3\x81",
1823
      "\xc3\xa0"         => "\xc3\x80",
1824
      "\xc2\xb5"         => "\xce\x9c",
1825
      "\x7a"             => "\x5a",
1826
      "\x79"             => "\x59",
1827
      "\x78"             => "\x58",
1828
      "\x77"             => "\x57",
1829
      "\x76"             => "\x56",
1830
      "\x75"             => "\x55",
1831
      "\x74"             => "\x54",
1832
      "\x73"             => "\x53",
1833
      "\x72"             => "\x52",
1834
      "\x71"             => "\x51",
1835
      "\x70"             => "\x50",
1836
      "\x6f"             => "\x4f",
1837
      "\x6e"             => "\x4e",
1838
      "\x6d"             => "\x4d",
1839
      "\x6c"             => "\x4c",
1840
      "\x6b"             => "\x4b",
1841
      "\x6a"             => "\x4a",
1842
      "\x69"             => "\x49",
1843
      "\x68"             => "\x48",
1844
      "\x67"             => "\x47",
1845
      "\x66"             => "\x46",
1846
      "\x65"             => "\x45",
1847
      "\x64"             => "\x44",
1848
      "\x63"             => "\x43",
1849
      "\x62"             => "\x42",
1850
      "\x61"             => "\x41",
1851
1852
    );
1853
1854
    return $case;
1855
  }
1856
1857
  /**
1858
   * check for UTF8-Support
1859
   */
1860
  public static function checkForSupport()
1861 157
  {
1862
    if (!isset(self::$support['mbstring'])) {
1863 157
1864
      self::$support['mbstring'] = self::mbstring_loaded();
1865 1
      self::$support['iconv'] = self::iconv_loaded();
1866 1
      self::$support['intl'] = self::intl_loaded();
1867 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1868 1
    }
1869 1
  }
1870 157
1871
  /**
1872
   * Generates a UTF-8 encoded character from the given code point.
1873
   *
1874
   * @param    int $code_point The code point for which to generate a character.
1875
   *
1876
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1877
   */
1878
  public static function chr($code_point)
1879 8
  {
1880
    self::checkForSupport();
1881 8
1882
    $i = (int)$code_point;
1883 8
1884
    if ($i !== $code_point) {
1885
      $i = (int)self::hex_to_int($code_point);
1886
    }
1887
1888
    if (!$i) {
1889
      return '';
1890 8
    }
1891
1892
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1893
  }
1894
1895
  /**
1896
   * Applies callback to all characters of a string.
1897
   *
1898
   * @param    string $callback The callback function.
1899
   * @param    string $str      UTF-8 string to run callback on.
1900
   *
1901
   * @return   array The outcome of callback.
1902 1
   */
1903
1904 1
  public static function chr_map($callback, $str)
1905
  {
1906 1
    $chars = self::split($str);
1907
1908
    return array_map($callback, $chars);
1909
  }
1910
1911
  /**
1912
   * Generates an array of byte length of each character of a Unicode string.
1913
   *
1914
   * 1 byte => U+0000  - U+007F
1915
   * 2 byte => U+0080  - U+07FF
1916
   * 3 byte => U+0800  - U+FFFF
1917
   * 4 byte => U+10000 - U+10FFFF
1918
   *
1919
   * @param    string $str The original Unicode string.
1920
   *
1921 2
   * @return   array An array of byte lengths of each character.
1922
   */
1923 2
  public static function chr_size_list($str)
1924 2
  {
1925
    if (!$str) {
1926
      return array();
1927 2
    }
1928
1929
    return array_map('strlen', self::split($str));
1930
  }
1931
1932
  /**
1933
   * Get a decimal code representation of a specific character.
1934
   *
1935
   * @param   string $chr The input character
1936
   *
1937 2
   * @return  int
1938
   */
1939 2
  public static function chr_to_decimal($chr)
1940 2
  {
1941 2
    $chr = (string)$chr;
1942
    $code = self::ord($chr[0]);
1943 2
    $bytes = 1;
1944
1945 2
    if (!($code & 0x80)) {
1946
      // 0xxxxxxx
1947
      return $code;
1948 2
    }
1949
1950 2
    if (($code & 0xe0) === 0xc0) {
1951 2
      // 110xxxxx
1952 2
      $bytes = 2;
1953
      $code &= ~0xc0;
1954 1
    } elseif (($code & 0xf0) === 0xe0) {
1955 1
      // 1110xxxx
1956 1
      $bytes = 3;
1957
      $code &= ~0xe0;
1958
    } elseif (($code & 0xf8) === 0xf0) {
1959
      // 11110xxx
1960
      $bytes = 4;
1961
      $code &= ~0xf0;
1962 2
    }
1963
1964 2
    for ($i = 2; $i <= $bytes; $i++) {
1965 2
      // 10xxxxxx
1966
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1967 2
    }
1968
1969
    return $code;
1970
  }
1971
1972
  /**
1973
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1974
   *
1975
   * @param    string $chr The input character
1976
   * @param    string $pfix
1977
   *
1978
   * @return   string The code point encoded as U+xxxx
1979
   */
1980
  public static function chr_to_hex($chr, $pfix = 'U+')
1981
  {
1982
    return self::int_to_hex(self::ord($chr), $pfix);
1983
  }
1984
1985
  /**
1986
   * Splits a string into smaller chunks and multiple lines, using the specified
1987
   * line ending character.
1988
   *
1989
   * @param    string $body     The original string to be split.
1990
   * @param    int    $chunklen The maximum character length of a chunk.
1991
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1992
   *
1993 1
   * @return   string The chunked string
1994
   */
1995 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1996
  {
1997
    return implode($end, self::split($body, $chunklen));
1998
  }
1999
2000
  /**
2001
   * accepts a string and removes all non-UTF-8 characters from it.
2002
   *
2003
   * @param string $str                     The string to be sanitized.
2004
   * @param bool   $remove_bom
2005
   * @param bool   $normalize_whitespace
2006
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2007
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2008
   *
2009 35
   * @return string Clean UTF-8 encoded string
2010
   */
2011
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2012
  {
2013
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2014
    // caused connection reset problem on larger strings
2015
2016
    $regx = '/
2017
      (
2018
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2019
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2020
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2021
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2022
        ){1,100}                      # ...one or more times
2023
      )
2024 35
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2025 35
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2026
    /x';
2027 35
    $str = preg_replace($regx, '$1', $str);
2028 35
2029
    $str = self::replace_diamond_question_mark($str, '');
2030 35
    $str = self::remove_invisible_characters($str);
2031 7
2032 7
    if ($normalize_whitespace === true) {
2033
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2034 35
    }
2035 1
2036 1
    if ($normalize_msword === true) {
2037
      $str = self::normalize_msword($str);
2038 35
    }
2039 4
2040 4
    if ($remove_bom === true) {
2041
      $str = self::removeBOM($str);
2042 35
    }
2043
2044
    return $str;
2045
  }
2046
2047
  /**
2048
   * Clean-up a and show only printable UTF-8 chars at the end.
2049
   *
2050
   * @param string|false $str
2051
   *
2052 3
   * @return string
2053
   */
2054 3
  public static function cleanup($str)
2055
  {
2056 3
    $str = (string)$str;
2057 1
2058
    if (!isset($str[0])) {
2059
      return '';
2060
    }
2061 3
2062
    // fixed ISO <-> UTF-8 Errors
2063
    $str = self::fix_simple_utf8($str);
2064
2065
    // remove all none UTF-8 symbols
2066
    // && remove diamond question mark (�)
2067
    // && remove remove invisible characters (e.g. "\0")
2068 3
    // && remove BOM
2069
    // && normalize whitespace chars (but keep non-breaking-spaces)
2070 3
    $str = self::clean($str, true, true, false, true);
2071
2072
    return (string)$str;
2073
  }
2074
2075
  /**
2076
   * Accepts a string and returns an array of Unicode code points.
2077
   *
2078
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
2079
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
2080
   *                          default, code points will be returned as integers.
2081
   *
2082 3
   * @return   array The array of code points
2083
   */
2084 3
  public static function codepoints($arg, $u_style = false)
2085 3
  {
2086 3
    if (is_string($arg)) {
2087
      $arg = self::split($arg);
2088 3
    }
2089
2090 3
    $arg = array_map(
2091 3
        array(
2092 3
            '\\voku\\helper\\UTF8',
2093
            'ord',
2094 3
        ),
2095
        $arg
2096 3
    );
2097
2098
    if ($u_style) {
2099
      $arg = array_map(
2100
          array(
2101
              '\\voku\\helper\\UTF8',
2102
              'int_to_hex',
2103
          ),
2104
          $arg
2105
      );
2106 3
    }
2107
2108
    return $arg;
2109
  }
2110
2111
  /**
2112
   * Returns count of characters used in a string.
2113
   *
2114
   * @param    string $str The input string.
2115
   *
2116
   * @return   array An associative array of Character as keys and
2117 3
   *           their count as values.
2118
   */
2119 3
  public static function count_chars($str) // there is no $mode parameters
2120
  {
2121 3
    $array = array_count_values(self::split($str));
2122
2123 3
    ksort($array);
2124
2125
    return $array;
2126
  }
2127
2128
  /**
2129
   * Get a UTF-8 character from its decimal code representation.
2130
   *
2131
   * @param   int $code Code.
2132
   *
2133 1
   * @return  string
2134
   */
2135 1
  public static function decimal_to_chr($code)
2136
  {
2137 1
    self::checkForSupport();
2138 1
2139 1
    return \mb_convert_encoding(
2140
        '&#x' . dechex($code) . ';',
2141 1
        'UTF-8',
2142
        'HTML-ENTITIES'
2143
    );
2144
  }
2145
2146
  /**
2147
   * encode a string
2148
   *
2149
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2150
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2151
   *
2152
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2153
   * @param string $str      the string
2154
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2155 11
   *                         otherwise we auto-detect the current string-encoding
2156
   *
2157 11
   * @return string
2158
   */
2159 11
  public static function encode($encoding, $str, $force = true)
2160 11
  {
2161
    $str = (string)$str;
2162
    $encoding = (string)$encoding;
2163 1
2164 1
    if (!isset($str[0], $encoding[0])) {
2165
      return $str;
2166
    }
2167
2168
    $encoding = self::normalizeEncoding($encoding);
2169
    $encodingDetected = self::str_detect_encoding($str);
2170
2171
    if (
2172
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2173
        &&
2174
        (
2175
            $force === true
2176
            ||
2177
            $encodingDetected !== $encoding
2178
        )
2179
    ) {
2180
      self::checkForSupport();
2181
2182 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2183
          $encoding === 'UTF-8'
2184
          &&
2185
          (
2186
              $force === true
2187
              || $encodingDetected === 'UTF-8'
2188
              || $encodingDetected === 'WINDOWS-1252'
2189
              || $encodingDetected === 'ISO-8859-1'
2190
          )
2191
      ) {
2192
        return self::to_utf8($str);
2193
      }
2194
2195 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2196
          $encoding === 'ISO-8859-1'
2197
          &&
2198
          (
2199
              $force === true
2200
              || $encodingDetected === 'ISO-8859-1'
2201
              || $encodingDetected === 'UTF-8'
2202
          )
2203
      ) {
2204
        return self::to_win1252($str);
2205
      }
2206
2207
      $strEncoded = \mb_convert_encoding(
2208
          $str,
2209
          $encoding,
2210
          $encodingDetected
2211
      );
2212
2213
      if ($strEncoded) {
2214
        return $strEncoded;
2215
      }
2216
    }
2217
2218
    return $str;
2219
  }
2220
2221
  /**
2222
   * Callback function for preg_replace_callback use.
2223
   *
2224
   * @param  array $matches PREG matches
2225
   *
2226
   * @return string
2227
   */
2228
  protected static function entityCallback($matches)
2229
  {
2230
    self::checkForSupport();
2231
2232
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2233
2234
    if ($return === "'") {
2235
      return '&#x27;';
2236
    }
2237
2238
    return $return;
2239
  }
2240
2241
  /**
2242
   * Reads entire file into a string.
2243
   *
2244
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2245
   *
2246
   * @link http://php.net/manual/en/function.file-get-contents.php
2247
   *
2248
   * @param string   $filename      <p>
2249
   *                                Name of the file to read.
2250
   *                                </p>
2251
   * @param int      $flags         [optional] <p>
2252 2
   *                                Prior to PHP 6, this parameter is called
2253
   *                                use_include_path and is a bool.
2254
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2255 2
   *                                to trigger include path
2256 2
   *                                search.
2257
   *                                </p>
2258 2
   *                                <p>
2259 2
   *                                The value of flags can be any combination of
2260
   *                                the following flags (with some restrictions), joined with the
2261
   *                                binary OR (|)
2262
   *                                operator.
2263 2
   *                                </p>
2264 2
   *                                <p>
2265
   *                                <table>
2266 2
   *                                Available flags
2267 2
   *                                <tr valign="top">
2268
   *                                <td>Flag</td>
2269 2
   *                                <td>Description</td>
2270 1
   *                                </tr>
2271 1
   *                                <tr valign="top">
2272 2
   *                                <td>
2273
   *                                FILE_USE_INCLUDE_PATH
2274
   *                                </td>
2275
   *                                <td>
2276 2
   *                                Search for filename in the include directory.
2277
   *                                See include_path for more
2278
   *                                information.
2279
   *                                </td>
2280 2
   *                                </tr>
2281 2
   *                                <tr valign="top">
2282
   *                                <td>
2283 2
   *                                FILE_TEXT
2284
   *                                </td>
2285 2
   *                                <td>
2286 1
   *                                As of PHP 6, the default encoding of the read
2287 1
   *                                data is UTF-8. You can specify a different encoding by creating a
2288 1
   *                                custom context or by changing the default using
2289 1
   *                                stream_default_encoding. This flag cannot be
2290 1
   *                                used with FILE_BINARY.
2291 1
   *                                </td>
2292
   *                                </tr>
2293 2
   *                                <tr valign="top">
2294 2
   *                                <td>
2295 2
   *                                FILE_BINARY
2296 2
   *                                </td>
2297
   *                                <td>
2298
   *                                With this flag, the file is read in binary mode. This is the default
2299 2
   *                                setting and cannot be used with FILE_TEXT.
2300
   *                                </td>
2301
   *                                </tr>
2302
   *                                </table>
2303
   *                                </p>
2304
   * @param resource $context       [optional] <p>
2305
   *                                A valid context resource created with
2306
   *                                stream_context_create. If you don't need to use a
2307
   *                                custom context, you can skip this parameter by &null;.
2308
   *                                </p>
2309 1
   * @param int      $offset        [optional] <p>
2310
   *                                The offset where the reading starts.
2311 1
   *                                </p>
2312
   * @param int      $maxlen        [optional] <p>
2313
   *                                Maximum length of data read. The default is to read until end
2314
   *                                of file is reached.
2315
   *                                </p>
2316
   * @param int      $timeout
2317
   *
2318
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2319
   *                                default utf-8 chars
2320
   *
2321
   * @return string The function returns the read data or false on failure.
2322
   */
2323 7
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2324
  {
2325 7
    // init
2326 7
    $timeout = (int)$timeout;
2327 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2328
2329 1
    if ($timeout && $context === null) {
2330 2
      $context = stream_context_create(
2331 2
          array(
2332 7
              'http' =>
2333 1
                  array(
2334 1
                      'timeout' => $timeout,
2335 1
                  ),
2336 1
          )
2337 7
      );
2338 7
    }
2339
2340
    if (is_int($maxlen)) {
2341
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2342 7
    } else {
2343 7
      $data = file_get_contents($filename, $flags, $context, $offset);
2344 1
    }
2345 1
2346 7
    // return false on error
2347
    if ($data === false) {
2348 7
      return false;
2349 5
    }
2350 5
2351 4
    if ($convertToUtf8 === true) {
2352
      self::checkForSupport();
2353
2354
      $data = self::encode('UTF-8', $data, false);
2355 7
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string|false, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2356
    }
2357
2358
    // clean utf-8 string
2359
    return $data;
2360 7
  }
2361 7
2362 7
  /**
2363
   * Checks if a file starts with BOM character.
2364 7
   *
2365
   * @param    string $file_path Path to a valid file.
2366
   *
2367
   * @return   bool True if the file has BOM at the start, False otherwise.
2368
   */
2369
  public static function file_has_bom($file_path)
2370
  {
2371
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
2372
  }
2373
2374
  /**
2375
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2376
   *
2377
   * @param mixed  $var
2378
   * @param int    $normalization_form
2379
   * @param string $leading_combining
2380
   *
2381
   * @return mixed
2382
   */
2383
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2384
  {
2385
    switch (gettype($var)) {
2386 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2387
        foreach ($var as $k => $v) {
2388
          /** @noinspection AlterInForeachInspection */
2389
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2390
        }
2391
        break;
2392 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2393
        foreach ($var as $k => $v) {
2394
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2395
        }
2396
        break;
2397
      case 'string':
2398 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2399
          // Workaround https://bugs.php.net/65732
2400
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2401
        }
2402 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2403
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2404
            $n = '-';
2405
          } else {
2406
            $n = \Normalizer::normalize($var, $normalization_form);
2407
2408
            if (isset($n[0])) {
2409
              $var = $n;
2410
            } else {
2411
              $var = self::encode('UTF-8', $var);
2412
            }
2413
2414
          }
2415
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2416
            // Prevent leading combining chars
2417 1
            // for NFC-safe concatenations.
2418
            $var = $leading_combining . $var;
2419 1
          }
2420 1
        }
2421 1
        break;
2422 1
    }
2423
2424
    return $var;
2425 1
  }
2426
2427
  /**
2428
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2429
   *
2430
   * @param int    $type
2431
   * @param string $var
2432
   * @param int    $filter
2433
   * @param mixed  $option
2434
   *
2435
   * @return mixed
2436
   */
2437 1 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2438
  {
2439 1
    if (4 > func_num_args()) {
2440 1
      $var = filter_input($type, $var, $filter);
2441 1
    } else {
2442 1
      $var = filter_input($type, $var, $filter, $option);
2443
    }
2444
2445 1
    return self::filter($var);
2446
  }
2447
2448
  /**
2449
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2450
   *
2451
   * @param int   $type
2452
   * @param mixed $definition
2453
   * @param bool  $add_empty
2454
   *
2455
   * @return mixed
2456
   */
2457 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2458
  {
2459 1
    if (2 > func_num_args()) {
2460
      $a = filter_input_array($type);
2461
    } else {
2462
      $a = filter_input_array($type, $definition, $add_empty);
2463
    }
2464
2465
    return self::filter($a);
2466
  }
2467
2468
  /**
2469 8
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2470
   *
2471 8
   * @param mixed $var
2472 8
   * @param int   $filter
2473
   * @param mixed $option
2474 8
   *
2475
   * @return mixed
2476 8
   */
2477 2 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2478
  {
2479
    if (3 > func_num_args()) {
2480 8
      $var = filter_var($var, $filter);
2481 1
    } else {
2482 1
      $var = filter_var($var, $filter, $option);
2483 1
    }
2484
2485 8
    return self::filter($var);
2486
  }
2487
2488
  /**
2489
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2490
   *
2491
   * @param array $data
2492
   * @param mixed $definition
2493
   * @param bool  $add_empty
2494
   *
2495 1
   * @return mixed
2496
   */
2497 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2498
  {
2499
    if (2 > func_num_args()) {
2500
      $a = filter_var_array($data);
2501
    } else {
2502
      $a = filter_var_array($data, $definition, $add_empty);
2503
    }
2504
2505
    return self::filter($a);
2506
  }
2507 1
2508 1
  /**
2509 1
   * Checks if the number of Unicode characters in a string are not
2510 1
   * more than the specified integer.
2511 1
   *
2512
   * @param    string $str      The original string to be checked.
2513 1
   * @param    int    $box_size The size in number of chars to be checked against string.
2514
   *
2515
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2516
   */
2517
  public static function fits_inside($str, $box_size)
2518
  {
2519
    return (self::strlen($str) <= $box_size);
2520
  }
2521
2522
  /**
2523 1
   * Fixing a broken UTF-8 string.
2524
   *
2525 1
   * @param string $str
2526
   *
2527 1
   * @return string
2528 1
   */
2529
  public static function fix_simple_utf8($str)
2530
  {
2531 1
    static $brokenUtf8ToUtf8Keys = null;
2532
    static $brokenUtf8ToUtf8Values = null;
2533 1
2534 1
    $str = (string)$str;
2535 1
2536 1
    if (!isset($str[0])) {
2537 1
      return '';
2538 1
    }
2539 1
2540 1
    if ($brokenUtf8ToUtf8Keys === null) {
2541 1
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2542 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2543 1
    }
2544
2545
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2546
  }
2547
2548
  /**
2549
   * Fix a double (or multiple) encoded UTF8 string.
2550
   *
2551
   * @param array|string $str
2552
   *
2553
   * @return string
2554
   */
2555
  public static function fix_utf8($str)
2556
  {
2557
    if (is_array($str)) {
2558
2559
      foreach ($str as $k => $v) {
2560
        /** @noinspection AlterInForeachInspection */
2561
        $str[$k] = self::fix_utf8($v);
2562
      }
2563 1
2564 1
      return $str;
2565
    }
2566
2567
    $last = '';
2568
    while ($last !== $str) {
2569
      $last = $str;
2570
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2570 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2571
    }
2572
2573
    return $str;
2574
  }
2575
2576
  /**
2577
   * Get character of a specific character.
2578
   *
2579
   * @param   string $chr Character.
2580
   *
2581
   * @return  string 'RTL' or 'LTR'
2582
   */
2583
  public static function getCharDirection($chr)
2584
  {
2585
    $c = static::chr_to_decimal($chr);
2586
2587
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2588
      return 'LTR';
2589
    }
2590
2591
    if (0x85e >= $c) {
2592
2593
      if (0x5be === $c ||
2594
          0x5c0 === $c ||
2595
          0x5c3 === $c ||
2596
          0x5c6 === $c ||
2597
          (0x5d0 <= $c && 0x5ea >= $c) ||
2598
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2599
          0x608 === $c ||
2600
          0x60b === $c ||
2601
          0x60d === $c ||
2602
          0x61b === $c ||
2603
          (0x61e <= $c && 0x64a >= $c) ||
2604
          (0x66d <= $c && 0x66f >= $c) ||
2605
          (0x671 <= $c && 0x6d5 >= $c) ||
2606
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2607
          (0x6ee <= $c && 0x6ef >= $c) ||
2608
          (0x6fa <= $c && 0x70d >= $c) ||
2609
          0x710 === $c ||
2610
          (0x712 <= $c && 0x72f >= $c) ||
2611
          (0x74d <= $c && 0x7a5 >= $c) ||
2612
          0x7b1 === $c ||
2613
          (0x7c0 <= $c && 0x7ea >= $c) ||
2614
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2615
          0x7fa === $c ||
2616
          (0x800 <= $c && 0x815 >= $c) ||
2617
          0x81a === $c ||
2618
          0x824 === $c ||
2619
          0x828 === $c ||
2620
          (0x830 <= $c && 0x83e >= $c) ||
2621
          (0x840 <= $c && 0x858 >= $c) ||
2622
          0x85e === $c
2623 2
      ) {
2624
        return 'RTL';
2625 2
      }
2626 2
2627 2
    } elseif (0x200f === $c) {
2628
2629
      return 'RTL';
2630
2631
    } elseif (0xfb1d <= $c) {
2632
2633
      if (0xfb1d === $c ||
2634
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2635
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2636
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2637
          0xfb3e === $c ||
2638
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2639
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2640 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2641
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2642 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2643 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2644
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2645 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2646 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
2647
          (0x10800 <= $c && 0x10805 >= $c) ||
2648
          0x10808 === $c ||
2649
          (0x1080a <= $c && 0x10835 >= $c) ||
2650 1
          (0x10837 <= $c && 0x10838 >= $c) ||
2651
          0x1083c === $c ||
2652 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2653 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2654 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2655
          (0x10920 <= $c && 0x10939 >= $c) ||
2656 1
          0x1093f === $c ||
2657 1
          0x10a00 === $c ||
2658 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2659 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2660 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2661
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2662 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2663
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2664 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2665 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2666
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2667
          (0x10b78 <= $c && 0x10b7f >= $c)
2668
      ) {
2669 1
        return 'RTL';
2670 1
      }
2671
    }
2672 1
2673
    return 'LTR';
2674 1
  }
2675 1
2676 1
  /**
2677
   * get data from "/data/*.ser"
2678 1
   *
2679
   * @param string $file
2680
   *
2681
   * @return bool|string|array|int false on error
2682
   */
2683
  protected static function getData($file)
2684
  {
2685
    $file = __DIR__ . '/data/' . $file . '.php';
2686
    if (file_exists($file)) {
2687
      /** @noinspection PhpIncludeInspection */
2688
      return require $file;
2689
    } else {
2690
      return false;
2691
    }
2692
  }
2693
2694
  /**
2695
   * Creates a random string of UTF-8 characters.
2696
   *
2697
   * @param    int $len The length of string in characters.
2698
   *
2699
   * @return   string String consisting of random characters.
2700
   */
2701
  public static function hash($len = 8)
2702
  {
2703
    static $chars = array();
2704
    static $chars_len = null;
2705
2706
    if ($len <= 0) {
2707
      return '';
2708 1
    }
2709
2710 1
    // init
2711 1
    self::checkForSupport();
2712
2713 1
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2714 1
      if (self::$support['pcre_utf8'] === true) {
2715 1
        $chars = array_map(
2716 1
            array(
2717 1
                '\\voku\\helper\\UTF8',
2718 1
                'chr',
2719
            ),
2720
            range(48, 79)
2721
        );
2722
2723
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2724
2725
        $chars = array_values(array_filter($chars));
2726
      } else {
2727
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2728
      }
2729
2730
      $chars_len = count($chars);
2731
    }
2732
2733
    $hash = '';
2734
2735
    for (; $len; --$len) {
2736
      $hash .= $chars[mt_rand() % $chars_len];
2737
    }
2738
2739
    return $hash;
2740
  }
2741
2742
  /**
2743
   * Converts hexadecimal U+xxxx code point representation to Integer.
2744
   *
2745
   * INFO: opposite to UTF8::int_to_hex( )
2746
   *
2747
   * @param    string $str The hexadecimal code point representation.
2748
   *
2749
   * @return   int The code point, or 0 on failure.
2750
   */
2751
  public static function hex_to_int($str)
2752
  {
2753
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2754
      return intval($match[1], 16);
2755
    }
2756
2757
    return 0;
2758
  }
2759
2760
  /**
2761
   * Converts a UTF-8 string to a series of HTML numbered entities.
2762
   *
2763
   * e.g.: &#123;&#39;&#1740;
2764
   *
2765
   * @param  string $str The Unicode string to be encoded as numbered entities.
2766
   *
2767
   * @return string HTML numbered entities.
2768
   */
2769
  public static function html_encode($str)
2770
  {
2771
    return implode(
2772
        array_map(
2773
            array(
2774
                '\\voku\\helper\\UTF8',
2775
                'single_chr_html_encode',
2776
            ),
2777
            self::split($str)
2778
        )
2779
    );
2780
  }
2781
2782
  /**
2783
   * UTF-8 version of html_entity_decode()
2784
   *
2785
   * The reason we are not using html_entity_decode() by itself is because
2786
   * while it is not technically correct to leave out the semicolon
2787
   * at the end of an entity most browsers will still interpret the entity
2788
   * correctly. html_entity_decode() does not convert entities without
2789
   * semicolons, so we are left with our own little solution here. Bummer.
2790 15
   *
2791
   * Convert all HTML entities to their applicable characters
2792 15
   *
2793
   * @link http://php.net/manual/en/function.html-entity-decode.php
2794 15
   *
2795 3
   * @param string $str      <p>
2796
   *                         The input string.
2797
   *                         </p>
2798 15
   * @param int    $flags    [optional] <p>
2799 4
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2800
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2801
   *                         <table>
2802 15
   *                         Available <i>flags</i> constants
2803 3
   *                         <tr valign="top">
2804 3
   *                         <td>Constant Name</td>
2805 3
   *                         <td>Description</td>
2806
   *                         </tr>
2807
   *                         <tr valign="top">
2808 3
   *                         <td><b>ENT_COMPAT</b></td>
2809
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2810
   *                         </tr>
2811 15
   *                         <tr valign="top">
2812
   *                         <td><b>ENT_QUOTES</b></td>
2813 15
   *                         <td>Will convert both double and single quotes.</td>
2814
   *                         </tr>
2815
   *                         <tr valign="top">
2816 15
   *                         <td><b>ENT_NOQUOTES</b></td>
2817 15
   *                         <td>Will leave both double and single quotes unconverted.</td>
2818 15
   *                         </tr>
2819
   *                         <tr valign="top">
2820 15
   *                         <td><b>ENT_HTML401</b></td>
2821
   *                         <td>
2822 15
   *                         Handle code as HTML 4.01.
2823
   *                         </td>
2824 15
   *                         </tr>
2825
   *                         <tr valign="top">
2826
   *                         <td><b>ENT_XML1</b></td>
2827
   *                         <td>
2828
   *                         Handle code as XML 1.
2829
   *                         </td>
2830
   *                         </tr>
2831
   *                         <tr valign="top">
2832
   *                         <td><b>ENT_XHTML</b></td>
2833
   *                         <td>
2834 12
   *                         Handle code as XHTML.
2835
   *                         </td>
2836 12
   *                         </tr>
2837
   *                         <tr valign="top">
2838 12
   *                         <td><b>ENT_HTML5</b></td>
2839
   *                         <td>
2840 12
   *                         Handle code as HTML 5.
2841 5
   *                         </td>
2842
   *                         </tr>
2843
   *                         </table>
2844 11
   *                         </p>
2845
   * @param string $encoding [optional] <p>
2846
   *                         Encoding to use.
2847
   *                         </p>
2848
   *
2849
   * @return string the decoded string.
2850
   */
2851
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2852
  {
2853
    $str = (string)$str;
2854
2855
    if (!isset($str[0])) {
2856
      return '';
2857
    }
2858
2859
    if (strpos($str, '&') === false) {
2860
      return $str;
2861
    }
2862
2863
    if ($flags === null) {
2864
      if (Bootup::is_php('5.4') === true) {
2865
        $flags = ENT_COMPAT | ENT_HTML5;
2866
      } else {
2867
        $flags = ENT_COMPAT;
2868
      }
2869
    }
2870
2871
    do {
2872
      $str_compare = $str;
2873
2874
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2875
2876
      // decode numeric & UTF16 two byte entities
2877
      $str = html_entity_decode(
2878
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2879
          $flags,
2880
          $encoding
2881
      );
2882
2883
    } while ($str_compare !== $str);
2884
2885
    return $str;
2886
  }
2887
2888
  /**
2889
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2890
   *
2891
   * @link http://php.net/manual/en/function.htmlentities.php
2892
   *
2893
   * @param string $str           <p>
2894
   *                              The input string.
2895
   *                              </p>
2896
   * @param int    $flags         [optional] <p>
2897
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2898
   *                              invalid code unit sequences and the used document type. The default is
2899
   *                              ENT_COMPAT | ENT_HTML401.
2900
   *                              <table>
2901
   *                              Available <i>flags</i> constants
2902
   *                              <tr valign="top">
2903
   *                              <td>Constant Name</td>
2904
   *                              <td>Description</td>
2905
   *                              </tr>
2906
   *                              <tr valign="top">
2907
   *                              <td><b>ENT_COMPAT</b></td>
2908
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2909
   *                              </tr>
2910
   *                              <tr valign="top">
2911
   *                              <td><b>ENT_QUOTES</b></td>
2912
   *                              <td>Will convert both double and single quotes.</td>
2913
   *                              </tr>
2914
   *                              <tr valign="top">
2915
   *                              <td><b>ENT_NOQUOTES</b></td>
2916
   *                              <td>Will leave both double and single quotes unconverted.</td>
2917
   *                              </tr>
2918
   *                              <tr valign="top">
2919
   *                              <td><b>ENT_IGNORE</b></td>
2920
   *                              <td>
2921
   *                              Silently discard invalid code unit sequences instead of returning
2922
   *                              an empty string. Using this flag is discouraged as it
2923
   *                              may have security implications.
2924
   *                              </td>
2925
   *                              </tr>
2926
   *                              <tr valign="top">
2927
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2928
   *                              <td>
2929
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2930
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2931
   *                              </td>
2932
   *                              </tr>
2933
   *                              <tr valign="top">
2934
   *                              <td><b>ENT_DISALLOWED</b></td>
2935
   *                              <td>
2936
   *                              Replace invalid code points for the given document type with a
2937
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2938
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2939
   *                              instance, to ensure the well-formedness of XML documents with
2940
   *                              embedded external content.
2941
   *                              </td>
2942
   *                              </tr>
2943
   *                              <tr valign="top">
2944
   *                              <td><b>ENT_HTML401</b></td>
2945
   *                              <td>
2946
   *                              Handle code as HTML 4.01.
2947
   *                              </td>
2948
   *                              </tr>
2949
   *                              <tr valign="top">
2950 2
   *                              <td><b>ENT_XML1</b></td>
2951
   *                              <td>
2952 2
   *                              Handle code as XML 1.
2953
   *                              </td>
2954
   *                              </tr>
2955
   *                              <tr valign="top">
2956
   *                              <td><b>ENT_XHTML</b></td>
2957
   *                              <td>
2958
   *                              Handle code as XHTML.
2959
   *                              </td>
2960
   *                              </tr>
2961
   *                              <tr valign="top">
2962
   *                              <td><b>ENT_HTML5</b></td>
2963
   *                              <td>
2964
   *                              Handle code as HTML 5.
2965
   *                              </td>
2966
   *                              </tr>
2967
   *                              </table>
2968
   *                              </p>
2969
   * @param string $encoding      [optional] <p>
2970
   *                              Like <b>htmlspecialchars</b>,
2971
   *                              <b>htmlentities</b> takes an optional third argument
2972
   *                              <i>encoding</i> which defines encoding used in
2973
   *                              conversion.
2974
   *                              Although this argument is technically optional, you are highly
2975
   *                              encouraged to specify the correct value for your code.
2976
   *                              </p>
2977
   * @param bool   $double_encode [optional] <p>
2978
   *                              When <i>double_encode</i> is turned off PHP will not
2979
   *                              encode existing html entities. The default is to convert everything.
2980
   *                              </p>
2981
   *
2982
   *
2983
   * @return string the encoded string.
2984
   * </p>
2985
   * <p>
2986
   * If the input <i>string</i> contains an invalid code unit
2987
   * sequence within the given <i>encoding</i> an empty string
2988
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2989
   * <b>ENT_SUBSTITUTE</b> flags are set.
2990
   */
2991
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2992
  {
2993
    return htmlentities($str, $flags, $encoding, $double_encode);
2994
  }
2995
2996
  /**
2997
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
2998
   *
2999
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3000
   *
3001
   * @param string $str           <p>
3002
   *                              The string being converted.
3003
   *                              </p>
3004
   * @param int    $flags         [optional] <p>
3005
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3006
   *                              invalid code unit sequences and the used document type. The default is
3007
   *                              ENT_COMPAT | ENT_HTML401.
3008
   *                              <table>
3009
   *                              Available <i>flags</i> constants
3010
   *                              <tr valign="top">
3011
   *                              <td>Constant Name</td>
3012
   *                              <td>Description</td>
3013
   *                              </tr>
3014
   *                              <tr valign="top">
3015
   *                              <td><b>ENT_COMPAT</b></td>
3016
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3017
   *                              </tr>
3018
   *                              <tr valign="top">
3019
   *                              <td><b>ENT_QUOTES</b></td>
3020
   *                              <td>Will convert both double and single quotes.</td>
3021
   *                              </tr>
3022
   *                              <tr valign="top">
3023
   *                              <td><b>ENT_NOQUOTES</b></td>
3024
   *                              <td>Will leave both double and single quotes unconverted.</td>
3025
   *                              </tr>
3026
   *                              <tr valign="top">
3027
   *                              <td><b>ENT_IGNORE</b></td>
3028
   *                              <td>
3029
   *                              Silently discard invalid code unit sequences instead of returning
3030
   *                              an empty string. Using this flag is discouraged as it
3031
   *                              may have security implications.
3032
   *                              </td>
3033
   *                              </tr>
3034
   *                              <tr valign="top">
3035
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3036
   *                              <td>
3037
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3038
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3039
   *                              </td>
3040
   *                              </tr>
3041
   *                              <tr valign="top">
3042
   *                              <td><b>ENT_DISALLOWED</b></td>
3043
   *                              <td>
3044
   *                              Replace invalid code points for the given document type with a
3045
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3046
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3047
   *                              instance, to ensure the well-formedness of XML documents with
3048
   *                              embedded external content.
3049
   *                              </td>
3050
   *                              </tr>
3051
   *                              <tr valign="top">
3052
   *                              <td><b>ENT_HTML401</b></td>
3053
   *                              <td>
3054
   *                              Handle code as HTML 4.01.
3055
   *                              </td>
3056
   *                              </tr>
3057
   *                              <tr valign="top">
3058
   *                              <td><b>ENT_XML1</b></td>
3059
   *                              <td>
3060
   *                              Handle code as XML 1.
3061
   *                              </td>
3062 1
   *                              </tr>
3063
   *                              <tr valign="top">
3064 1
   *                              <td><b>ENT_XHTML</b></td>
3065
   *                              <td>
3066
   *                              Handle code as XHTML.
3067
   *                              </td>
3068
   *                              </tr>
3069
   *                              <tr valign="top">
3070
   *                              <td><b>ENT_HTML5</b></td>
3071
   *                              <td>
3072 1
   *                              Handle code as HTML 5.
3073
   *                              </td>
3074 1
   *                              </tr>
3075
   *                              </table>
3076
   *                              </p>
3077
   * @param string $encoding      [optional] <p>
3078
   *                              Defines encoding used in conversion.
3079
   *                              </p>
3080
   *                              <p>
3081
   *                              For the purposes of this function, the encodings
3082
   *                              ISO-8859-1, ISO-8859-15,
3083
   *                              UTF-8, cp866,
3084
   *                              cp1251, cp1252, and
3085
   *                              KOI8-R are effectively equivalent, provided the
3086
   *                              <i>string</i> itself is valid for the encoding, as
3087
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3088
   *                              the same positions in all of these encodings.
3089
   *                              </p>
3090
   * @param bool   $double_encode [optional] <p>
3091
   *                              When <i>double_encode</i> is turned off PHP will not
3092
   *                              encode existing html entities, the default is to convert everything.
3093
   *                              </p>
3094
   *
3095
   * @return string The converted string.
3096
   * </p>
3097
   * <p>
3098
   * If the input <i>string</i> contains an invalid code unit
3099
   * sequence within the given <i>encoding</i> an empty string
3100
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3101
   * <b>ENT_SUBSTITUTE</b> flags are set.
3102
   */
3103 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3104
  {
3105 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3106
  }
3107
3108
  /**
3109
   * checks whether iconv is available on the server
3110
   *
3111
   * @return   bool True if available, False otherwise
3112
   */
3113
  public static function iconv_loaded()
3114
  {
3115 1
    return extension_loaded('iconv') ? true : false;
3116
  }
3117 1
3118
  /**
3119
   * Converts Integer to hexadecimal U+xxxx code point representation.
3120
   *
3121
   * @param    int    $int The integer to be converted to hexadecimal code point.
3122
   * @param    string $pfix
3123
   *
3124
   * @return   string The code point, or empty string on failure.
3125
   */
3126
  public static function int_to_hex($int, $pfix = 'U+')
3127 1
  {
3128
    if (ctype_digit((string)$int)) {
3129 1
      $hex = dechex((int)$int);
3130
3131
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3132
3133
      return $pfix . $hex;
3134
    }
3135
3136
    return '';
3137
  }
3138
3139
  /**
3140
   * checks whether intl is available on the server
3141
   *
3142
   * @return   bool True if available, False otherwise
3143
   */
3144
  public static function intl_loaded()
3145
  {
3146
    return extension_loaded('intl') ? true : false;
3147
  }
3148
3149
  /**
3150
   * alias for "UTF8::is_ascii()"
3151
   *
3152
   * @param string $str
3153
   *
3154
   * @return boolean
3155
   */
3156
  public static function isAscii($str)
3157
  {
3158
    return self::is_ascii($str);
3159
  }
3160
3161
  /**
3162
   * alias for "UTF8::is_base64"
3163
   *
3164
   * @param string $str
3165
   *
3166
   * @return bool
3167
   */
3168
  public static function isBase64($str)
3169
  {
3170
    return self::is_base64($str);
3171
  }
3172
3173
  /**
3174
   * alias for "UTF8::is_bom"
3175
   *
3176
   * @param string $utf8_chr
3177
   *
3178
   * @return boolean
3179 16
   */
3180
  public static function isBom($utf8_chr)
3181 16
  {
3182
    return self::is_bom($utf8_chr);
3183
  }
3184
3185
  /**
3186
   * Try to check if a string is a json-string...
3187
   *
3188
   * @param $str
3189
   *
3190
   * @return bool
3191
   *
3192 4
   * @deprecated
3193
   */
3194 4
  public static function isJson($str)
3195
  {
3196
    $str = (string)$str;
3197
3198
    if (!isset($str[0])) {
3199
      return false;
3200
    }
3201
3202
    if (
3203
        is_object(json_decode($str))
3204 1
        &&
3205
        json_last_error() === JSON_ERROR_NONE
3206 1
    ) {
3207
      return true;
3208 1
    } else {
3209 1
      return false;
3210
    }
3211
  }
3212 1
3213 1
  /**
3214
   * alias for "UTF8::is_utf8"
3215 1
   *
3216
   * @param string $str
3217
   *
3218
   * @return bool
3219
   */
3220
  public static function isUtf8($str)
3221
  {
3222
    return self::is_utf8($str);
3223
  }
3224
3225
  /**
3226 4
   * Checks if a string is 7 bit ASCII.
3227
   *
3228
   * @param    string $str The string to check.
3229 4
   *
3230
   * @return   bool <strong>true</strong> if it is ASCII<br />
3231
   *                <strong>false</strong> otherwise
3232 4
   */
3233
  public static function is_ascii($str)
3234 4
  {
3235 4
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3236 4
  }
3237 4
3238 3
  /**
3239
   * Returns true if the string is base64 encoded, false otherwise.
3240 4
   *
3241
   * @param string $str
3242
   *
3243
   * @return bool Whether or not $str is base64 encoded
3244
   */
3245
  public static function is_base64($str)
3246
  {
3247
    $str = (string)$str;
3248
3249
    if (!isset($str[0])) {
3250
      return false;
3251
    }
3252
3253
    if (base64_encode(base64_decode($str, true)) === $str) {
3254
      return true;
3255
    } else {
3256
      return false;
3257
    }
3258
  }
3259
3260
  /**
3261
   * Check if the input is binary... (is look like a hack)
3262
   *
3263
   * @param string $input
3264
   *
3265
   * @return bool
3266
   */
3267
  public static function is_binary($input)
3268
  {
3269
3270
    $testLength = strlen($input);
3271
3272
    if (
3273 2
        preg_match('~^[01]+$~', $input)
3274
        ||
3275 2
        substr_count($input, "\x00") > 0
3276
        ||
3277
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3278
    ) {
3279
      return true;
3280
    } else {
3281
      return false;
3282
    }
3283
  }
3284
3285 2
  /**
3286
   * Check if the file is binary.
3287 2
   *
3288 2
   * @param string $file
3289
   *
3290 2
   * @return boolean
3291 2
   */
3292 2
  public static function is_binary_file($file)
3293 2
  {
3294 2
    try {
3295 2
      $fp = fopen($file, 'r');
3296 2
      $block = fread($fp, 512);
3297 2
      fclose($fp);
3298 2
    } catch (\Exception $e) {
3299 1
      $block = '';
3300 1
    }
3301 2
3302 2
    return self::is_binary($block);
3303 2
  }
3304
3305 2
  /**
3306 2
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3307 2
   *
3308 2
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3309 2
   *
3310 2
   * @param    string $utf8_chr The input string.
3311 2
   *
3312 2
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3313 2
   */
3314 1
  public static function is_bom($utf8_chr)
3315 1
  {
3316 2
    return ($utf8_chr === self::bom());
3317 2
  }
3318 2
3319
  /**
3320 2
   * Check if the string is UTF-16.
3321 1
   *
3322 1
   * @param string $str
3323
   *
3324 1
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3325
   */
3326 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3327
  {
3328 2
    if (self::is_binary($str)) {
3329
      self::checkForSupport();
3330 2
3331
      $maybeUTF16LE = 0;
3332
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3333
      if ($test !== false && strlen($test) > 1) {
3334
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3335
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3336
        if ($test3 === $test) {
3337
          $strChars = self::count_chars($str);
3338
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3339
            if (in_array($test3char, $strChars, true) === true) {
3340 2
              $maybeUTF16LE++;
3341
            }
3342 2
          }
3343 2
        }
3344
      }
3345 2
3346 2
      $maybeUTF16BE = 0;
3347 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3348 2
      if ($test !== false && strlen($test) > 1) {
3349 2
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3350 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3351 2
        if ($test3 === $test) {
3352 2
          $strChars = self::count_chars($str);
3353 2
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3354
            if (in_array($test3char, $strChars, true) === true) {
3355
              $maybeUTF16BE++;
3356 2
            }
3357 2
          }
3358 2
        }
3359
      }
3360 2
3361 2
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3362 2
        if ($maybeUTF16LE > $maybeUTF16BE) {
3363 1
          return 1;
3364 1
        } else {
3365 1
          return 2;
3366 1
        }
3367 1
      }
3368 1
3369
    }
3370
3371 1
    return false;
3372 1
  }
3373 1
3374
  /**
3375 2
   * Check if the string is UTF-32.
3376
   *
3377
   * @param string $str
3378
   *
3379
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3380
   */
3381 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3382
  {
3383 2
    if (self::is_binary($str)) {
3384
      self::checkForSupport();
3385 2
3386
      $maybeUTF32LE = 0;
3387
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3388
      if ($test !== false && strlen($test) > 1) {
3389
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3390
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3391
        if ($test3 === $test) {
3392
          $strChars = self::count_chars($str);
3393
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3394
            if (in_array($test3char, $strChars, true) === true) {
3395
              $maybeUTF32LE++;
3396
            }
3397 34
          }
3398
        }
3399 34
      }
3400
3401 34
      $maybeUTF32BE = 0;
3402 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3403
      if ($test !== false && strlen($test) > 1) {
3404
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3405 32
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3406
        if ($test3 === $test) {
3407
          $strChars = self::count_chars($str);
3408
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3409
            if (in_array($test3char, $strChars, true) === true) {
3410
              $maybeUTF32BE++;
3411
            }
3412
          }
3413
        }
3414
      }
3415 32
3416
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3417 32
        if ($maybeUTF32LE > $maybeUTF32BE) {
3418 32
          return 1;
3419 32
        } else {
3420
          return 2;
3421
        }
3422 32
      }
3423 32
3424 32
    }
3425
3426
    return false;
3427 32
  }
3428
3429 30
  /**
3430 32
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3431
   *
3432 28
   * @see    http://hsivonen.iki.fi/php-utf8/
3433 28
   *
3434 28
   * @param    string $str The string to be checked.
3435 28
   *
3436 30
   * @return   bool
3437
   */
3438 13
  public static function is_utf8($str)
3439 13
  {
3440 13
    $str = (string)$str;
3441 13
3442 23
    if (!isset($str[0])) {
3443
      return true;
3444 6
    }
3445 6
3446 6
    if (self::pcre_utf8_support() !== true) {
3447 6
3448 12
      // If even just the first character can be matched, when the /u
3449
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3450
      // invalid, nothing at all will match, even if the string contains
3451
      // some valid sequences
3452
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3453
3454
    } else {
3455
3456
      $mState = 0; // cached expected number of octets after the current octet
3457 3
      // until the beginning of the next UTF8 character sequence
3458 3
      $mUcs4 = 0; // cached Unicode character
3459 3
      $mBytes = 1; // cached expected number of octets in the current sequence
3460 3
      $len = strlen($str);
3461 7
3462
      /** @noinspection ForeachInvariantsInspection */
3463 3
      for ($i = 0; $i < $len; $i++) {
3464 3
        $in = ord($str[$i]);
3465 3
        if ($mState === 0) {
3466 3
          // When mState is zero we expect either a US-ASCII character or a
3467 3
          // multi-octet sequence.
3468
          if (0 === (0x80 & $in)) {
3469
            // US-ASCII, pass straight through.
3470
            $mBytes = 1;
3471 3 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3472
            // First octet of 2 octet sequence.
3473 32
            $mUcs4 = $in;
3474
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3475
            $mState = 1;
3476 30
            $mBytes = 2;
3477
          } elseif (0xE0 === (0xF0 & $in)) {
3478 28
            // First octet of 3 octet sequence.
3479 28
            $mUcs4 = $in;
3480 28
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3481 28
            $mState = 2;
3482
            $mBytes = 3;
3483 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3484
            // First octet of 4 octet sequence.
3485
            $mUcs4 = $in;
3486 28
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3487
            $mState = 3;
3488
            $mBytes = 4;
3489
          } elseif (0xF8 === (0xFC & $in)) {
3490
            /* First octet of 5 octet sequence.
3491
            *
3492 28
            * This is illegal because the encoded codepoint must be either
3493 28
            * (a) not the shortest form or
3494 28
            * (b) outside the Unicode range of 0-0x10FFFF.
3495 28
            * Rather than trying to resynchronize, we will carry on until the end
3496
            * of the sequence and let the later error handling code catch it.
3497 28
            */
3498
            $mUcs4 = $in;
3499 28
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3500 28
            $mState = 4;
3501 5
            $mBytes = 5;
3502 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3503
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3504 28
            $mUcs4 = $in;
3505 28
            $mUcs4 = ($mUcs4 & 1) << 30;
3506 28
            $mState = 5;
3507 28
            $mBytes = 6;
3508 28
          } else {
3509
            /* Current octet is neither in the US-ASCII range nor a legal first
3510
             * octet of a multi-octet sequence.
3511
             */
3512
            return false;
3513 13
          }
3514
        } else {
3515
          // When mState is non-zero, we expect a continuation of the multi-octet
3516 32
          // sequence
3517
          if (0x80 === (0xC0 & $in)) {
3518 14
            // Legal continuation.
3519
            $shift = ($mState - 1) * 6;
3520
            $tmp = $in;
3521
            $tmp = ($tmp & 0x0000003F) << $shift;
3522
            $mUcs4 |= $tmp;
3523
            /**
3524
             * End of the multi-octet sequence. mUcs4 now contains the final
3525
             * Unicode code point to be output
3526
             */
3527
            if (0 === --$mState) {
3528
              /*
3529
              * Check for illegal sequences and code points.
3530
              */
3531
              // From Unicode 3.1, non-shortest form is illegal
3532
              if (
3533
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3534
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3535
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3536
                  (4 < $mBytes) ||
3537
                  // From Unicode 3.2, surrogate characters are illegal.
3538
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3539
                  // Code points outside the Unicode range are illegal.
3540
                  ($mUcs4 > 0x10FFFF)
3541
              ) {
3542
                return false;
3543
              }
3544
              // initialize UTF8 cache
3545
              $mState = 0;
3546
              $mUcs4 = 0;
3547
              $mBytes = 1;
3548
            }
3549
          } else {
3550
            /**
3551
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3552
             * Incomplete multi-octet sequence.
3553
             */
3554
            return false;
3555
          }
3556
        }
3557
      }
3558 2
3559
      return true;
3560 2
    }
3561
  }
3562 2
3563 2
  /**
3564 2
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3565
   * Decodes a JSON string
3566
   *
3567
   * @link http://php.net/manual/en/function.json-decode.php
3568 2
   *
3569
   * @param string $json    <p>
3570
   *                        The <i>json</i> string being decoded.
3571
   *                        </p>
3572
   *                        <p>
3573
   *                        This function only works with UTF-8 encoded strings.
3574
   *                        </p>
3575
   *                        <p>PHP implements a superset of
3576
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3577
   *                        only supports these values when they are nested inside an array or an object.
3578
   *                        </p>
3579
   * @param bool   $assoc   [optional] <p>
3580
   *                        When <b>TRUE</b>, returned objects will be converted into
3581
   *                        associative arrays.
3582
   *                        </p>
3583
   * @param int    $depth   [optional] <p>
3584
   *                        User specified recursion depth.
3585
   *                        </p>
3586
   * @param int    $options [optional] <p>
3587
   *                        Bitmask of JSON decode options. Currently only
3588
   *                        <b>JSON_BIGINT_AS_STRING</b>
3589
   *                        is supported (default is to cast large integers as floats)
3590
   *                        </p>
3591
   *
3592
   * @return mixed the value encoded in <i>json</i> in appropriate
3593
   * PHP type. Values true, false and
3594
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3595
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3596
   * <i>json</i> cannot be decoded or if the encoded
3597
   * data is deeper than the recursion limit.
3598
   */
3599
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3600
  {
3601
    $json = self::filter($json);
3602
3603
    if (Bootup::is_php('5.4') === true) {
3604
      $json = json_decode($json, $assoc, $depth, $options);
3605
    } else {
3606
      $json = json_decode($json, $assoc, $depth);
3607 1
    }
3608
3609 1
    return $json;
3610
  }
3611 1
3612
  /**
3613
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3614 1
   * Returns the JSON representation of a value
3615
   *
3616
   * @link http://php.net/manual/en/function.json-encode.php
3617 1
   *
3618
   * @param mixed $value   <p>
3619
   *                       The <i>value</i> being encoded. Can be any type except
3620
   *                       a resource.
3621
   *                       </p>
3622
   *                       <p>
3623
   *                       All string data must be UTF-8 encoded.
3624
   *                       </p>
3625
   *                       <p>PHP implements a superset of
3626
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3627 6
   *                       only supports these values when they are nested inside an array or an object.
3628
   *                       </p>
3629 6
   * @param int   $options [optional] <p>
3630
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3631
   *                       <b>JSON_HEX_TAG</b>,
3632
   *                       <b>JSON_HEX_AMP</b>,
3633
   *                       <b>JSON_HEX_APOS</b>,
3634
   *                       <b>JSON_NUMERIC_CHECK</b>,
3635
   *                       <b>JSON_PRETTY_PRINT</b>,
3636
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3637
   *                       <b>JSON_FORCE_OBJECT</b>,
3638
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3639
   *                       constants is described on
3640
   *                       the JSON constants page.
3641
   *                       </p>
3642 24
   * @param int   $depth   [optional] <p>
3643
   *                       Set the maximum depth. Must be greater than zero.
3644 24
   *                       </p>
3645
   *
3646 24
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3647 2
   */
3648
  public static function json_encode($value, $options = 0, $depth = 512)
3649
  {
3650 23
    $value = self::filter($value);
3651
3652 23
    if (Bootup::is_php('5.5')) {
3653
      $json = json_encode($value, $options, $depth);
3654
    } else {
3655
      $json = json_encode($value, $options);
3656
    }
3657
3658
    return $json;
3659
  }
3660
3661
  /**
3662 1
   * Makes string's first char lowercase.
3663
   *
3664 1
   * @param    string $str The input string
3665
   *
3666
   * @return   string The resulting string
3667
   */
3668 1
  public static function lcfirst($str)
3669
  {
3670
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3671
  }
3672
3673
  /**
3674
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3675
   *
3676
   * WARNING: This is much slower then "ltrim()" !!!!
3677
   *
3678
   * @param    string $str   The string to be trimmed
3679 1
   * @param    string $chars Optional characters to be stripped
3680
   *
3681 1
   * @return   string The string with unwanted characters stripped from the left
3682 1
   */
3683 1 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3684
  {
3685 1
    $str = (string)$str;
3686
3687
    if (!isset($str[0])) {
3688
      return '';
3689
    }
3690
3691
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3692
3693
    return preg_replace("/^{$chars}+/u", '', $str);
3694 2
  }
3695
3696 2
  /**
3697
   * Returns the UTF-8 character with the maximum code point in the given data.
3698 2
   *
3699 2
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3700 2
   *
3701
   * @return   string The character with the highest code point than others.
3702 2
   */
3703 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3704
  {
3705
    if (is_array($arg)) {
3706
      $arg = implode($arg);
3707
    }
3708
3709
    return self::chr(max(self::codepoints($arg)));
3710
  }
3711
3712 1
  /**
3713
   * Calculates and returns the maximum number of bytes taken by any
3714 1
   * UTF-8 encoded character in the given string.
3715
   *
3716
   * @param    string $str The original Unicode string.
3717
   *
3718 1
   * @return   int An array of byte lengths of each character.
3719
   */
3720
  public static function max_chr_width($str)
3721
  {
3722
    $bytes = self::chr_size_list($str);
3723
    if (count($bytes) > 0) {
3724
      return (int)max($bytes);
3725
    } else {
3726
      return 0;
3727
    }
3728 13
  }
3729
3730 13
  /**
3731
   * checks whether mbstring is available on the server
3732 13
   *
3733
   * @return   bool True if available, False otherwise
3734
   */
3735 13
  public static function mbstring_loaded()
3736 13
  {
3737 13
    $return = extension_loaded('mbstring');
3738 13
3739 13
    if ($return === true) {
3740 13
      \mb_internal_encoding('UTF-8');
3741 13
    }
3742 13
3743 13
    return $return;
3744 13
  }
3745 13
3746 13
  /**
3747 13
   * Returns the UTF-8 character with the minimum code point in the given data.
3748 13
   *
3749
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3750 13
   *
3751 2
   * @return   string The character with the lowest code point than others.
3752
   */
3753 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3754 13
  {
3755
    if (is_array($arg)) {
3756
      $arg = implode($arg);
3757
    }
3758
3759
    return self::chr(min(self::codepoints($arg)));
3760
  }
3761
3762
  /**
3763
   * Normalize the encoding-name input.
3764 2
   *
3765
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3766 2
   *
3767 2
   * @return string e.g.: ISO-8859-1, UTF-8, ISO-8859-5 etc.
3768
   */
3769 2
  public static function normalizeEncoding($encoding)
3770 1
  {
3771 1
    if (!$encoding) {
3772 1
      return $encoding;
3773
    }
3774 2
3775
    $encoding = (string)$encoding;
3776
    if (!isset($encoding[0])) {
3777
      return '';
3778
    }
3779
3780
    $encodingUpper = strtoupper($encoding);
3781
3782
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encodingUpper);
3783
3784
    $equivalences = array(
3785
        'ISO88591'    => 'ISO-8859-1',
3786 8
        'ISO8859'     => 'ISO-8859-1',
3787
        'ISO'         => 'ISO-8859-1',
3788 8
        'LATIN1'      => 'ISO-8859-1',
3789 8
        'LATIN'       => 'ISO-8859-1',
3790
        'UTF16'       => 'UTF-16',
3791 8
        'UTF32'       => 'UTF-32',
3792
        'UTF8'        => 'UTF-8',
3793 8
        'UTF'         => 'UTF-8',
3794
        'UTF7'        => 'UTF-7',
3795 2
        'WIN1252'     => 'ISO-8859-1',
3796
        'WINDOWS1252' => 'ISO-8859-1',
3797 2
        'WINDOWS1251' => 'ISO-8859-5',
3798
    );
3799 1
3800 1
    if (!empty($equivalences[$encodingUpperHelper])) {
3801
      return $equivalences[$encodingUpperHelper];
3802 2
    }
3803 2
3804
    return $encodingUpper;
3805 8
  }
3806 8
3807 1
  /**
3808 1
   * Normalize MS Word special characters.
3809
   *
3810 8
   * @param string $str The string to be normalized.
3811 8
   *
3812
   * @return string
3813 8
   */
3814
  public static function normalize_msword($str)
3815
  {
3816
    static $utf8MSWordKeys = null;
3817
    static $utf8MSWordValues = null;
3818
3819
    if ($utf8MSWordKeys === null) {
3820
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3821
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3822
    }
3823
3824
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3825
  }
3826 1
3827
  /**
3828 1
   * Normalize the whitespace.
3829 1
   *
3830
   * @param string $str                     The string to be normalized.
3831
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3832
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3833
   *
3834
   * @return string
3835
   */
3836
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3837
  {
3838
    static $whitespaces = array();
3839
    static $bidiUniCodeControls = null;
3840
3841
    $cacheKey = (int)$keepNonBreakingSpace;
3842 1
3843
    if (!isset($whitespaces[$cacheKey])) {
3844 1
3845
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3846
3847
      if ($keepNonBreakingSpace === true) {
3848
        /** @noinspection OffsetOperationsInspection */
3849
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3850
      }
3851
3852
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3853
    }
3854
3855 15
    if ($keepBidiUnicodeControls === false) {
3856
      if ($bidiUniCodeControls === null) {
3857 15
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3858 2
      }
3859
3860
      $str = str_replace($bidiUniCodeControls, '', $str);
3861 14
    }
3862 14
3863
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3864 14
  }
3865 2
3866
  /**
3867
   * Format a number with grouped thousands.
3868 13
   *
3869 7
   * @param float  $number
3870
   * @param int    $decimals
3871
   * @param string $dec_point
3872 12
   * @param string $thousands_sep
3873 8
   *
3874
   * @return string
3875
   */
3876 10
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3877
  {
3878
    $thousands_sep = (string)$thousands_sep;
3879
    $dec_point = (string)$dec_point;
3880
3881
    if (
3882
      isset($thousands_sep[1], $dec_point[1])
3883
      &&
3884
      Bootup::is_php('5.4') === true
3885
    ) {
3886
        return str_replace(
3887
            array(
3888
                '.',
3889
                ',',
3890
            ),
3891
            array(
3892
                $dec_point,
3893
                $thousands_sep,
3894
            ),
3895
            number_format($number, $decimals, '.', ',')
3896
        );
3897 1
    }
3898
3899
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3900 1
  }
3901
3902 1
  /**
3903
   * Calculates Unicode code point of the given UTF-8 encoded character.
3904 1
   *
3905 1
   * @param    string $s The character of which to calculate code point.
3906
   *
3907
   * @return   int Unicode code point of the given character,<br />
3908
   *           0 on invalid UTF-8 byte sequence.
3909
   */
3910
  public static function ord($s)
3911
  {
3912 33
    if (!$s) {
3913
      return 0;
3914
    }
3915 33
3916
    $s = unpack('C*', substr($s, 0, 4));
3917
    $a = $s ? $s[1] : 0;
3918
3919
    if (0xF0 <= $a && isset($s[4])) {
3920
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3921
    }
3922
3923
    if (0xE0 <= $a && isset($s[3])) {
3924
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3925
    }
3926 1
3927
    if (0xC0 <= $a && isset($s[2])) {
3928 1
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3929 1
    }
3930
3931
    return $a;
3932 1
  }
3933
3934 1
  /**
3935
   * Parses the string into variables.
3936
   *
3937 1
   * WARNING: This differs from parse_str() by returning the results
3938
   *    instead of placing them in the local scope!
3939
   *
3940 1
   * @link http://php.net/manual/en/function.parse-str.php
3941
   *
3942
   * @param string $str     <p>
3943
   *                        The input string.
3944 1
   *                        </p>
3945
   * @param array  $result  <p>
3946 1
   *                        If the second parameter arr is present,
3947
   *                        variables are stored in this variable as array elements instead.
3948
   *                        </p>
3949 1
   *
3950
   * @return void
3951
   */
3952 1
  public static function parse_str($str, &$result)
3953
  {
3954
    // init
3955
    self::checkForSupport();
3956 1
3957
    $str = self::filter($str);
3958 1
3959 1
    \mb_parse_str($str, $result);
3960 1
  }
3961 1
3962 1
  /**
3963
   * checks if \u modifier is available that enables Unicode support in PCRE.
3964
   *
3965
   * @return   bool True if support is available, false otherwise
3966
   */
3967
  public static function pcre_utf8_support()
3968
  {
3969
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3970
    return (bool)@preg_match('//u', '');
3971
  }
3972
3973
  /**
3974
   * Create an array containing a range of UTF-8 characters.
3975 7
   *
3976
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
3977 7
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
3978
   *
3979
   * @return   array
3980 7
   */
3981 2
  public static function range($var1, $var2)
3982 2
  {
3983 7
    if (!$var1 || !$var2) {
3984
      return array();
3985 7
    }
3986
3987 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3988 3
      $start = (int)$var1;
3989 1
    } elseif (ctype_xdigit($var1)) {
3990 1
      $start = (int)self::hex_to_int($var1);
3991
    } else {
3992
      $start = self::ord($var1);
3993
    }
3994 3
3995 1
    if (!$start) {
3996 1
      return array();
3997 3
    }
3998
3999 7 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4000
      $end = (int)$var2;
4001
    } elseif (ctype_xdigit($var2)) {
4002 3
      $end = (int)self::hex_to_int($var2);
4003 1
    } else {
4004 1
      $end = self::ord($var2);
4005
    }
4006
4007
    if (!$end) {
4008 3
      return array();
4009 1
    }
4010 1
4011 3
    return array_map(
4012
        array(
4013 7
            '\\voku\\helper\\UTF8',
4014
            'chr',
4015
        ),
4016
        range($start, $end)
4017
    );
4018
  }
4019
4020
  /**
4021
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4022
   *
4023
   * @param string $str
4024 1
   *
4025
   * @return string
4026 1
   */
4027 1
  public static function removeBOM($str = '')
4028 1
  {
4029
    // INFO: https://en.wikipedia.org/wiki/Byte_order_mark
4030 1
4031 1
    if (0 === strpos($str, "\xef\xbb\xbf")) { // UTF-8 BOM
4032 1
      $str = substr($str, 3);
4033 1
    } elseif (0 === strpos($str, '')) { // UTF-8 BOM as "WINDOWS-1252"
4034 1
      $str = substr($str, 6); // INFO: one char has (maybe) more then one byte ...
4035
    } elseif (0 === strpos($str, "\x00\x00\xfe\xff")) { // UTF-32 (BE) BOM
4036 1
      $str = substr($str, 4);
4037
    } elseif (0 === strpos($str, "\xff\xfe\x00\x00")) { // UTF-32 (LE) BOM
4038
      $str = substr($str, 4);
4039
    } elseif (0 === strpos($str, "\xfe\xff")) { // UTF-16 (BE) BOM
4040
      $str = substr($str, 2);
4041
    } elseif (0 === strpos($str, 'þÿ')) { // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4042
      $str = substr($str, 4);
4043
    } elseif (0 === strpos($str, "\xff\xfe")) { // UTF-16 (LE) BOM
4044
      $str = substr($str, 2);
4045
    } elseif (0 === strpos($str, 'ÿþ')) { // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4046
      $str = substr($str, 4);
4047
    }
4048
4049
    return $str;
4050
  }
4051
4052 36
  /**
4053
   * Removes duplicate occurrences of a string in another string.
4054
   *
4055 36
   * @param    string       $str  The base string
4056
   * @param    string|array $what String to search for in the base string
4057
   *
4058
   * @return   string The result string with removed duplicates
4059 36
   */
4060 36
  public static function remove_duplicates($str, $what = ' ')
4061 36
  {
4062 36
    if (is_string($what)) {
4063
      $what = array($what);
4064 36
    }
4065
4066
    if (is_array($what)) {
4067 36
      foreach ($what as $item) {
4068 36
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4069
      }
4070 36
    }
4071
4072
    return $str;
4073
  }
4074
4075
  /**
4076
   * Remove Invisible Characters
4077
   *
4078
   * This prevents sandwiching null characters
4079
   * between ascii characters, like Java\0script.
4080
   *
4081 36
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4082
   *
4083 36
   * @param  string $str
4084
   * @param  bool   $url_encoded
4085 36
   *
4086 36
   * @return  string
4087 36
   */
4088
  public static function remove_invisible_characters($str, $url_encoded = true)
4089 36
  {
4090 36
    // init
4091 36
    $non_displayables = array();
4092
4093 36
    // every control character except newline (dec 10),
4094
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4095
    if ($url_encoded) {
4096
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4097
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4098
    }
4099
4100
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4101
4102
    do {
4103
      $str = preg_replace($non_displayables, '', $str, -1, $count);
4104
    } while ($count !== 0);
4105
4106 23
    return $str;
4107
  }
4108 23
4109
  /**
4110 23
   * replace diamond question mark (�)
4111 5
   *
4112
   * @param string $str
4113
   * @param string $unknown
4114 19
   *
4115
   * @return string
4116 19
   */
4117
  public static function replace_diamond_question_mark($str, $unknown = '?')
4118
  {
4119
    return str_replace(
4120
        array(
4121
            "\xEF\xBF\xBD",
4122
            '�',
4123
        ),
4124
        array(
4125
            $unknown,
4126
            $unknown,
4127 40
        ),
4128
        $str
4129 40
    );
4130
  }
4131 40
4132
  /**
4133 40
   * Strip whitespace or other characters from end of a UTF-8 string.
4134 30
   *
4135
   * WARNING: This is much slower then "rtrim()" !!!!
4136
   *
4137 16
   * @param    string $str   The string to be trimmed
4138
   * @param    string $chars Optional characters to be stripped
4139 16
   *
4140 15
   * @return   string The string with unwanted characters stripped from the right
4141
   */
4142 15 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4143 14
  {
4144 15
    $str = (string)$str;
4145 1
4146 1
    if (!isset($str[0])) {
4147
      return '';
4148
    }
4149 16
4150
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4151 16
4152
    return preg_replace("/{$chars}+$/u", '', $str);
4153 16
  }
4154 16
4155 16
  /**
4156
   * rxClass
4157
   *
4158
   * @param string $s
4159 16
   * @param string $class
4160
   *
4161 16
   * @return string
4162
   */
4163
  protected static function rxClass($s, $class = '')
4164
  {
4165
    static $rxClassCache = array();
4166
4167
    $cacheKey = $s . $class;
4168
4169
    if (isset($rxClassCache[$cacheKey])) {
4170
      return $rxClassCache[$cacheKey];
4171
    }
4172
4173
    $class = array($class);
4174
4175
    /** @noinspection SuspiciousLoopInspection */
4176
    foreach (self::str_split($s) as $s) {
4177
      if ('-' === $s) {
4178
        $class[0] = '-' . $class[0];
4179
      } elseif (!isset($s[2])) {
4180
        $class[0] .= preg_quote($s, '/');
4181 2
      } elseif (1 === self::strlen($s)) {
4182
        $class[0] .= $s;
4183 2
      } else {
4184 1
        $class[] = $s;
4185
      }
4186
    }
4187 2
4188
    $class[0] = '[' . $class[0] . ']';
4189
4190
    if (1 === count($class)) {
4191
      $return = $class[0];
4192
    } else {
4193
      $return = '(?:' . implode('|', $class) . ')';
4194
    }
4195
4196
    $rxClassCache[$cacheKey] = $return;
4197
4198
    return $return;
4199 25
  }
4200
4201 25
  /**
4202
   * Echo native UTF8-Support libs, e.g. for debugging.
4203 25
   */
4204 5
  public static function showSupport()
4205
  {
4206
    foreach (self::$support as $utf8Support) {
4207
      echo $utf8Support . "\n<br>";
4208 24
    }
4209 24
  }
4210 24
4211
  /**
4212 24
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4213
   *
4214 24
   * @param    string $chr The Unicode character to be encoded as numbered entity.
4215
   *
4216
   * @return   string The HTML numbered entity.
4217
   */
4218 24
  public static function single_chr_html_encode($chr)
4219 24
  {
4220 24
    if (!$chr) {
4221 24
      return '';
4222 24
    }
4223
4224 24
    return '&#' . self::ord($chr) . ';';
4225
  }
4226
4227
  /**
4228
   * Convert a string to an array of Unicode characters.
4229
   *
4230
   * @param    string  $str       The string to split into array.
4231
   * @param    int     $length    Max character length of each array element.
4232
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4233
   *
4234
   * @return   array An array containing chunks of the string.
4235
   */
4236
  public static function split($str, $length = 1, $cleanUtf8 = false)
4237
  {
4238
    $str = (string)$str;
4239
4240
    if (!isset($str[0])) {
4241
      return array();
4242
    }
4243
4244
    // init
4245
    self::checkForSupport();
4246
    $str = (string)$str;
4247
    $ret = array();
4248
4249
    if (self::$support['pcre_utf8'] === true) {
4250
4251
      if ($cleanUtf8 === true) {
4252
        $str = self::clean($str);
4253
      }
4254
4255
      preg_match_all('/./us', $str, $retArray);
4256 24
      if (isset($retArray[0])) {
4257 5
        $ret = $retArray[0];
4258
      }
4259 5
      unset($retArray);
4260 5
4261
    } else {
4262 24
4263
      // fallback
4264
4265
      $len = strlen($str);
4266 24
4267
      /** @noinspection ForeachInvariantsInspection */
4268
      for ($i = 0; $i < $len; $i++) {
4269
        if (($str[$i] & "\x80") === "\x00") {
4270
          $ret[] = $str[$i];
4271
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4272
          if (($str[$i + 1] & "\xC0") === "\x80") {
4273
            $ret[] = $str[$i] . $str[$i + 1];
4274
4275
            $i++;
4276
          }
4277 3 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4278
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4279
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4280
4281
            $i += 2;
4282
          }
4283
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4284 3 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4285 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4286 1
4287 2
            $i += 3;
4288 1
          }
4289 2
        }
4290
      }
4291 2
    }
4292
4293
    if ($length > 1) {
4294 2
      $ret = array_chunk($ret, $length);
4295
4296
      $ret = array_map('implode', $ret);
4297
    }
4298
4299
    if (isset($ret[0]) && $ret[0] === '') {
4300 3
      return array();
4301 1
    }
4302
4303
    return $ret;
4304
  }
4305
4306
  /**
4307
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4308
   *
4309
   * @param string $str
4310 3
   *
4311 3
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4312 3
   *                      otherwise it will return false.
4313 3
   */
4314 3
  public static function str_detect_encoding($str)
4315 3
  {
4316 3
4317 3
    //
4318
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4319
    //
4320 3
4321 3
    if (self::is_binary($str)) {
4322 3
      if (self::is_utf16($str) === 1) {
4323 3
        return 'UTF-16LE';
4324
      } elseif (self::is_utf16($str) === 2) {
4325
        return 'UTF-16BE';
4326
      } elseif (self::is_utf32($str) === 1) {
4327
        return 'UTF-32LE';
4328
      } elseif (self::is_utf32($str) === 2) {
4329
        return 'UTF-32BE';
4330
      }
4331
    }
4332
4333
    //
4334
    // 2.) simple check for ASCII chars
4335
    //
4336
4337
    if (self::is_ascii($str) === true) {
4338
      return 'ASCII';
4339
    }
4340
4341
    //
4342
    // 3.) simple check for UTF-8 chars
4343
    //
4344
4345
    if (self::is_utf8($str) === true) {
4346
      return 'UTF-8';
4347
    }
4348
4349
    //
4350
    // 4.) check via "\mb_detect_encoding()"
4351
    //
4352
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4353 13
4354
    $detectOrder = array(
4355 13
        'windows-1251',
4356
        'ISO-8859-1',
4357
        'ASCII',
4358 13
        'UTF-8',
4359 13
    );
4360 1
4361 1
    self::checkForSupport();
4362 12
4363
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4364 13
    if ($encoding) {
4365
      return $encoding;
4366 13
    }
4367 13
4368
    //
4369 13
    // 5.) check via "iconv()"
4370
    //
4371
4372
    $md5 = md5($str);
4373
    foreach (self::$iconvEncoding as $encodingTmp) {
4374
      # INFO: //IGNORE and //TRANSLIT still throw notice
4375
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4376
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4377
        return $encodingTmp;
4378
      }
4379
    }
4380
4381 1
    return false;
4382
  }
4383 1
4384
  /**
4385
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4386
   *
4387 1
   * @link  http://php.net/manual/en/function.str-ireplace.php
4388
   *
4389 1
   * @param mixed $search  <p>
4390
   *                       Every replacement with search array is
4391
   *                       performed on the result of previous replacement.
4392
   *                       </p>
4393 1
   * @param mixed $replace <p>
4394 1
   *                       </p>
4395
   * @param mixed $subject <p>
4396
   *                       If subject is an array, then the search and
4397 1
   *                       replace is performed with every entry of
4398 1
   *                       subject, and the return value is an array as
4399 1
   *                       well.
4400 1
   *                       </p>
4401
   * @param int   $count   [optional] <p>
4402 1
   *                       The number of matched and replaced needles will
4403
   *                       be returned in count which is passed by
4404
   *                       reference.
4405 1
   *                       </p>
4406
   *
4407
   * @return mixed a string or an array of replacements.
4408 1
   * @since 5.0
4409
   */
4410
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4411
  {
4412
    $search = (array)$search;
4413
4414
    /** @noinspection AlterInForeachInspection */
4415
    foreach ($search as &$s) {
4416
      if ('' === $s .= '') {
4417
        $s = '/^(?<=.)$/';
4418
      } else {
4419
        $s = '/' . preg_quote($s, '/') . '/ui';
4420
      }
4421 2
    }
4422
4423 2
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4424
    $count = $replace;
4425 2
4426 2
    return $subject;
4427
  }
4428 2
4429
  /**
4430
   * Limit the number of characters in a string, but also after the next word.
4431 2
   *
4432 2
   * @param  string $str
4433 2
   * @param  int    $length
4434 2
   * @param  string $strAddOn
4435 2
   *
4436
   * @return string
4437 2
   */
4438 2
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4439 2
  {
4440 2
    $str = (string)$str;
4441 2
4442 2
    if (!isset($str[0])) {
4443
      return '';
4444 2
    }
4445 2
4446 2
    $length = (int)$length;
4447 2
4448 2
    if (self::strlen($str) <= $length) {
4449 2
      return $str;
4450
    }
4451 2
4452
    if (self::substr($str, $length - 1, 1) === ' ') {
4453
      return self::substr($str, 0, $length - 1) . $strAddOn;
4454 2
    }
4455
4456
    $str = self::substr($str, 0, $length);
4457
    $array = explode(' ', $str);
4458
    array_pop($array);
4459
    $new_str = implode(' ', $array);
4460
4461
    if ($new_str === '') {
4462
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4463
    } else {
4464
      $str = $new_str . $strAddOn;
4465
    }
4466
4467
    return $str;
4468
  }
4469
4470
  /**
4471
   * Pad a UTF-8 string to given length with another string.
4472
   *
4473
   * @param    string $input      The input string
4474
   * @param    int    $pad_length The length of return string
4475 1
   * @param    string $pad_string String to use for padding the input string
4476
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4477 1
   *
4478
   * @return   string Returns the padded string
4479 1
   */
4480
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4481
  {
4482
    $input_length = self::strlen($input);
4483
4484
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4485
      $ps_length = self::strlen($pad_string);
4486
4487
      $diff = $pad_length - $input_length;
4488
4489
      switch ($pad_type) {
4490 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4491
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4492
          $pre = self::substr($pre, 0, $diff);
4493
          $post = '';
4494
          break;
4495
4496
        case STR_PAD_BOTH:
4497
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4498
          $pre = self::substr($pre, 0, (int)$diff / 2);
4499
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4500
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4501
          break;
4502
4503
        case STR_PAD_RIGHT:
4504 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4505
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4506
          $post = self::substr($post, 0, $diff);
4507
          $pre = '';
4508
      }
4509
4510
      return $pre . $input . $post;
4511
    }
4512 12
4513
    return $input;
4514 12
  }
4515
4516
  /**
4517
   * Repeat a string.
4518
   *
4519
   * @param string $input      <p>
4520
   *                           The string to be repeated.
4521
   *                           </p>
4522
   * @param int    $multiplier <p>
4523
   *                           Number of time the input string should be
4524
   *                           repeated.
4525
   *                           </p>
4526
   *                           <p>
4527
   *                           multiplier has to be greater than or equal to 0.
4528
   *                           If the multiplier is set to 0, the function
4529
   *                           will return an empty string.
4530
   *                           </p>
4531
   *
4532
   * @return string the repeated string.
4533
   */
4534
  public static function str_repeat($input, $multiplier)
4535
  {
4536
    $input = self::filter($input);
4537
4538
    return str_repeat($input, $multiplier);
4539
  }
4540
4541
  /**
4542 1
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4543
   *
4544 1
   * (PHP 4, PHP 5)<br/>
4545
   * Replace all occurrences of the search string with the replacement string
4546 1
   *
4547 1
   * @link http://php.net/manual/en/function.str-replace.php
4548 1
   *
4549
   * @param mixed $search  <p>
4550 1
   *                       The value being searched for, otherwise known as the needle.
4551 1
   *                       An array may be used to designate multiple needles.
4552 1
   *                       </p>
4553 1
   * @param mixed $replace <p>
4554
   *                       The replacement value that replaces found search
4555
   *                       values. An array may be used to designate multiple replacements.
4556 1
   *                       </p>
4557
   * @param mixed $subject <p>
4558
   *                       The string or array being searched and replaced on,
4559
   *                       otherwise known as the haystack.
4560
   *                       </p>
4561
   *                       <p>
4562
   *                       If subject is an array, then the search and
4563
   *                       replace is performed with every entry of
4564
   *                       subject, and the return value is an array as
4565
   *                       well.
4566
   *                       </p>
4567 17
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4568
   *
4569
   * @return mixed This function returns a string or an array with the replaced values.
4570 17
   */
4571
  public static function str_replace($search, $replace, $subject, &$count = null)
4572 17
  {
4573
    return str_replace($search, $replace, $subject, $count);
4574
  }
4575
4576
  /**
4577
   * Shuffles all the characters in the string.
4578 17
   *
4579 17
   * @param    string $str The input string
4580 17
   *
4581 17
   * @return   string The shuffled string.
4582 17
   */
4583 16
  public static function str_shuffle($str)
4584 16
  {
4585 17
    $array = self::split($str);
4586
4587
    shuffle($array);
4588
4589
    return implode('', $array);
4590 17
  }
4591 17
4592
  /**
4593
   * Sort all characters according to code points.
4594 1
   *
4595 1
   * @param    string $str    A UTF-8 string.
4596
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4597
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4598 1
   *
4599 1
   * @return   string String of sorted characters
4600 1
   */
4601 1
  public static function str_sort($str, $unique = false, $desc = false)
4602 1
  {
4603
    $array = self::codepoints($str);
4604 1
4605
    if ($unique) {
4606 1
      $array = array_flip(array_flip($array));
4607
    }
4608
4609
    if ($desc) {
4610
      arsort($array);
4611
    } else {
4612
      asort($array);
4613
    }
4614
4615
    return self::string($array);
4616 1
  }
4617
4618 1
  /**
4619
   * Convert a string to an array.
4620 1
   *
4621
   * @param string $str
4622
   * @param int    $len
4623
   *
4624
   * @return array
4625 1
   */
4626 1
  public static function str_split($str, $len = 1)
4627
  {
4628
    // init
4629 1
    self::checkForSupport();
4630 1
    $len = (int)$len;
4631 1
4632
    if ($len < 1) {
4633 1
      return str_split($str, $len);
4634
    }
4635
4636
    if (self::$support['intl'] === true) {
4637
      $a = array();
4638
      $p = 0;
4639
      $l = strlen($str);
4640
      while ($p < $l) {
4641
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4642
      }
4643
    } else {
4644
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4645
      $a = $a[0];
4646
    }
4647
4648
    if ($len === 1) {
4649
      return $a;
4650
    }
4651
4652
    $arrayOutput = array();
4653
    $p = -1;
4654 8
4655
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4656 8
    foreach ($a as $l => $a) {
4657
      if ($l % $len) {
4658 8
        $arrayOutput[$p] .= $a;
4659
      } else {
4660 8
        $arrayOutput[++$p] = $a;
4661 2
      }
4662
    }
4663
4664 7
    return $arrayOutput;
4665
  }
4666 7
4667 7
  /**
4668 7
   * Get a binary representation of a specific character.
4669
   *
4670 7
   * @param   string $str The input character.
4671
   *
4672 7
   * @return  string
4673 6
   */
4674
  public static function str_to_binary($str)
4675
  {
4676 4
    $str = (string)$str;
4677
4678
    if (!isset($str[0])) {
4679 4
      return '';
4680 4
    }
4681 4
4682
    // init
4683 4
    $out = null;
4684 3
    $max = strlen($str);
4685
4686 3
    /** @noinspection ForeachInvariantsInspection */
4687 3
    for ($i = 0; $i < $max; ++$i) {
4688 3
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4689
    }
4690 3
4691 1
    return $out;
4692
  }
4693 1
4694 1
  /**
4695 1
   * US-ASCII transliterations of Unicode text.
4696
   *
4697 1
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4698
   * Warning: you should only pass this well formed UTF-8!
4699
   * Be aware it works by making a copy of the input string which it appends transliterated
4700
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4701
   * requiring up to the same amount again as the input string
4702
   *
4703
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4704
   *
4705
   * @author <[email protected]>
4706
   *
4707
   * @param string $str     UTF-8 string to convert
4708
   * @param string $unknown Character use if character unknown. (default is ?)
4709
   *
4710
   * @return string US-ASCII string
4711
   */
4712 1
  public static function str_transliterate($str, $unknown = '?')
4713 3
  {
4714
    static $UTF8_TO_ASCII;
4715 4
4716
    $str = (string)$str;
4717
4718
    if (!isset($str[0])) {
4719
      return '';
4720 4
    }
4721
4722
    $str = self::clean($str);
4723
4724
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4725 4
    $chars = $ar[0];
4726 4
    foreach ($chars as &$c) {
4727 2
4728 2
      $ordC0 = ord($c[0]);
4729
4730 2
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4731 2
        continue;
4732 1
      }
4733
4734 2
      $ordC1 = ord($c[1]);
4735
4736 4
      // ASCII - next please
4737 4
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4738 4
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4739 4
      }
4740 1
4741
      if ($ordC0 >= 224) {
4742 7
        $ordC2 = ord($c[2]);
4743
4744 7
        if ($ordC0 <= 239) {
4745
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4746
        }
4747
4748
        if ($ordC0 >= 240) {
4749
          $ordC3 = ord($c[3]);
4750
4751
          if ($ordC0 <= 247) {
4752
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4753
          }
4754
4755
          if ($ordC0 >= 248) {
4756 1
            $ordC4 = ord($c[4]);
4757
4758 1 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4759 1
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4760 1
            }
4761 1
4762
            if ($ordC0 >= 252) {
4763 1
              $ordC5 = ord($c[5]);
4764
4765 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4766
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4767 1
              }
4768
            }
4769
          }
4770
        }
4771
      }
4772
4773
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4774
        $c = $unknown;
4775
        continue;
4776 1
      }
4777
4778
      if (!isset($ord)) {
4779 1
        $c = $unknown;
4780
        continue;
4781
      }
4782
4783
      $bank = $ord >> 8;
4784
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4785
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4786
        if (file_exists($bankfile)) {
4787
          /** @noinspection PhpIncludeInspection */
4788
          require $bankfile;
4789
        } else {
4790 8
          $UTF8_TO_ASCII[$bank] = array();
4791
        }
4792 8
      }
4793
4794
      $newchar = $ord & 255;
4795
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4796
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4797
      } else {
4798
        $c = $unknown;
4799
      }
4800
    }
4801
4802
    return implode('', $chars);
4803
  }
4804
4805 8
  /**
4806
   * Counts number of words in the UTF-8 string.
4807 8
   *
4808 5
   * @param string $str The input string.
4809 5
   * @param int $format <strong>0</strong> => return a number of words<br />
4810 8
   *                    <strong>1</strong> => return an array of words
4811
   *                    <strong>2</strong> => return an array of words with word-offset as key
4812
   * @param string $charlist
4813
   *
4814
   * @return array|float The number of words in the string
4815
   */
4816
  public static function str_word_count($str, $format = 0, $charlist = '')
4817
  {
4818
    $charlist = self::rxClass($charlist, '\pL');
4819
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4820
4821
    $len = count($strParts);
4822
4823 5
    if ($format === 1) {
4824
4825 5
      $numberOfWords = array();
4826
      for ($i = 1; $i < $len; $i += 2) {
4827
        $numberOfWords[] = $strParts[$i];
4828
      }
4829 5
4830
    } elseif ($format === 2) {
4831
4832 5
      self::checkForSupport();
4833
4834
      $numberOfWords = array();
4835
      $offset = self::strlen($strParts[0]);
4836 5
      for ($i = 1; $i < $len; $i += 2) {
4837 5
        $numberOfWords[$offset] = $strParts[$i];
4838
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4839
      }
4840
4841
    } else {
4842
4843
      $numberOfWords = ($len - 1) / 2;
4844
4845
    }
4846
4847
    return $numberOfWords;
4848
  }
4849
4850 2
  /**
4851
   * Case-insensitive string comparison.
4852 2
   *
4853 2
   * @param string $str1
4854
   * @param string $str2
4855 2
   *
4856 2
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4857 2
   */
4858
  public static function strcasecmp($str1, $str2)
4859 2
  {
4860 2
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4861
  }
4862
4863
  /**
4864
   * String comparison.
4865
   *
4866
   * @param string $str1
4867
   * @param string $str2
4868
   *
4869
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4870 1
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4871
   *              <strong>0</strong> if they are equal.
4872 1
   */
4873
  public static function strcmp($str1, $str2)
4874
  {
4875
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4876
        \Normalizer::normalize($str1, \Normalizer::NFD),
4877
        \Normalizer::normalize($str2, \Normalizer::NFD)
4878
    );
4879
  }
4880
4881
  /**
4882
   * Find length of initial segment not matching mask.
4883
   *
4884
   * @param string $str
4885
   * @param string $charList
4886
   * @param int    $offset
4887
   * @param int    $length
4888
   *
4889
   * @return int|null
4890
   */
4891
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4892
  {
4893
    if ('' === $charList .= '') {
4894 2
      return null;
4895
    }
4896
4897 2
    if ($offset || 2147483647 !== $length) {
4898
      $str = (string)self::substr($str, $offset, $length);
4899 2
    } else {
4900
      $str = (string)$str;
4901
    }
4902
4903
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4904
      /** @noinspection OffsetOperationsInspection */
4905
      return self::strlen($length[1]);
4906
    } else {
4907
      return self::strlen($str);
4908
    }
4909
  }
4910
4911
  /**
4912
   * Makes a UTF-8 string from code points.
4913
   *
4914
   * @param    array $array Integer or Hexadecimal codepoints
4915
   *
4916
   * @return   string UTF-8 encoded string
4917
   */
4918
  public static function string($array)
4919
  {
4920
    return implode(
4921
        array_map(
4922
            array(
4923
                '\\voku\\helper\\UTF8',
4924
                'chr',
4925 8
            ),
4926
            $array
4927 8
        )
4928 8
    );
4929
  }
4930 8
4931 2
  /**
4932
   * Checks if string starts with "UTF-8 BOM" character.
4933
   *
4934
   * @param    string $str The input string.
4935 7
   *
4936
   * @return   bool True if the string has BOM at the start, False otherwise.
4937 7
   */
4938 1
  public static function string_has_bom($str)
4939 1
  {
4940 1
    return self::is_bom(substr($str, 0, 3));
4941
  }
4942
4943 7
  /**
4944 1
   * Strip HTML and PHP tags from a string.
4945 1
   *
4946
   * @link http://php.net/manual/en/function.strip-tags.php
4947 7
   *
4948
   * @param string $str            <p>
4949
   *                               The input string.
4950
   *                               </p>
4951
   * @param string $allowable_tags [optional] <p>
4952
   *                               You can use the optional second parameter to specify tags which should
4953
   *                               not be stripped.
4954
   *                               </p>
4955
   *                               <p>
4956
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4957
   *                               can not be changed with allowable_tags.
4958
   *                               </p>
4959 7
   *
4960
   * @return string the stripped string.
4961 7
   */
4962 2
  public static function strip_tags($str, $allowable_tags = null)
4963
  {
4964
    //clean broken utf8
4965
    $str = self::clean($str);
4966 5
4967
    return strip_tags($str, $allowable_tags);
4968 5
  }
4969
4970
  /**
4971
   * Finds position of first occurrence of a string within another, case insensitive.
4972
   *
4973
   * @link http://php.net/manual/en/function.mb-stripos.php
4974
   *
4975
   * @param string  $haystack  <p>
4976
   *                           The string from which to get the position of the first occurrence
4977
   *                           of needle
4978
   *                           </p>
4979
   * @param string  $needle    <p>
4980
   *                           The string to find in haystack
4981
   *                           </p>
4982
   * @param int     $offset    [optional] <p>
4983
   *                           The position in haystack
4984
   *                           to start searching
4985 66
   *                           </p>
4986
   * @param string  $encoding
4987 66
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
4988
   *
4989 66
   * @return int Return the numeric position of the first occurrence of
4990 4
   * needle in the haystack
4991
   * string, or false if needle is not found.
4992
   */
4993
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4994 65
  {
4995
    $haystack = (string)$haystack;
4996
    $needle = (string)$needle;
4997 65
4998
    if (!isset($haystack[0], $needle[0])) {
4999
      return false;
5000
    }
5001 65
5002
    // init
5003
    self::checkForSupport();
5004
5005 65
    if ($cleanUtf8 === true) {
5006
      $haystack = self::clean($haystack);
5007
      $needle = self::clean($needle);
5008
    }
5009
5010
    // INFO: this is only a fallback for old versions
5011
    if ($encoding === true || $encoding === false) {
5012
      $encoding = 'UTF-8';
5013
    }
5014
5015
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5016
  }
5017 1
5018
  /**
5019 1
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5020
   *
5021
   * @param string $str
5022
   * @param string $needle
5023
   * @param bool   $before_needle
5024
   *
5025
   * @return false|string
5026
   */
5027
  public static function stristr($str, $needle, $before_needle = false)
5028
  {
5029
    if ('' === $needle .= '') {
5030
      return false;
5031 2
    }
5032
5033 2
    // init
5034
    self::checkForSupport();
5035
5036
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5037
  }
5038
5039
  /**
5040
   * Get the string length, not the byte-length!
5041
   *
5042
   * @link     http://php.net/manual/en/function.mb-strlen.php
5043
   *
5044
   * @param string  $str       The string being checked for length.
5045
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5046
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5047
   *
5048
   * @return int the number of characters in
5049
   *           string str having character encoding
5050
   *           encoding. A multi-byte character is
5051
   *           counted as 1.
5052
   */
5053
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5054
  {
5055
    $str = (string)$str;
5056
5057
    if (!isset($str[0])) {
5058
      return 0;
5059
    }
5060
5061
    // init
5062
    self::checkForSupport();
5063
5064
    // INFO: this is only a fallback for old versions
5065
    if ($encoding === true || $encoding === false) {
5066
      $encoding = 'UTF-8';
5067
    }
5068
5069
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5070
      $str = self::clean($str);
5071
    }
5072
5073
    return \mb_strlen($str, $encoding);
5074
  }
5075
5076
  /**
5077
   * Case insensitive string comparisons using a "natural order" algorithm.
5078
   *
5079
   * @param string $str1
5080
   * @param string $str2
5081
   *
5082
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5083
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5084
   *             <strong>0</strong> if they are equal
5085
   */
5086
  public static function strnatcasecmp($str1, $str2)
5087
  {
5088
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5089
  }
5090
5091
  /**
5092
   * String comparisons using a "natural order" algorithm
5093
   *
5094
   * @link  http://php.net/manual/en/function.strnatcmp.php
5095
   *
5096
   * @param string $str1 <p>
5097
   *                     The first string.
5098
   *                     </p>
5099
   * @param string $str2 <p>
5100
   *                     The second string.
5101
   *                     </p>
5102
   *
5103 11
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5104
   * str1 is less than str2; &gt;
5105 11
   * 0 if str1 is greater than
5106 11
   * str2, and 0 if they are equal.
5107
   * @since 4.0
5108 11
   * @since 5.0
5109 2
   */
5110
  public static function strnatcmp($str1, $str2)
5111
  {
5112
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5113 10
  }
5114 10
5115
  /**
5116
   * Binary safe case-insensitive string comparison of the first n characters
5117
   *
5118 10
   * @link  http://php.net/manual/en/function.strncasecmp.php
5119
   *
5120
   * @param string $str1 <p>
5121
   *                     The first string.
5122 10
   *                     </p>
5123
   * @param string $str2 <p>
5124
   *                     The second string.
5125
   *                     </p>
5126 1
   * @param int    $len  <p>
5127 1
   *                     The length of strings to be used in the comparison.
5128 1
   *                     </p>
5129
   *
5130 10
   * @return int &lt; 0 if <i>str1</i> is less than
5131
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5132
   * greater than <i>str2</i>, and 0 if they are equal.
5133 10
   * @since 4.0.4
5134 1
   * @since 5.0
5135 1
   */
5136
  public static function strncasecmp($str1, $str2, $len)
5137 10
  {
5138
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5139
  }
5140
5141
  /**
5142
   * Binary safe string comparison of the first n characters
5143
   *
5144
   * @link  http://php.net/manual/en/function.strncmp.php
5145
   *
5146
   * @param string $str1 <p>
5147
   *                     The first string.
5148
   *                     </p>
5149
   * @param string $str2 <p>
5150
   *                     The second string.
5151
   *                     </p>
5152
   * @param int    $len  <p>
5153
   *                     Number of characters to use in the comparison.
5154
   *                     </p>
5155
   *
5156
   * @return int &lt; 0 if <i>str1</i> is less than
5157
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5158
   * is greater than <i>str2</i>, and 0 if they are
5159
   * equal.
5160
   * @since 4.0
5161
   * @since 5.0
5162
   */
5163
  public static function strncmp($str1, $str2, $len)
5164
  {
5165
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5166
  }
5167
5168
  /**
5169
   * Search a string for any of a set of characters
5170
   *
5171
   * @link  http://php.net/manual/en/function.strpbrk.php
5172
   *
5173
   * @param string $haystack  <p>
5174
   *                          The string where char_list is looked for.
5175
   *                          </p>
5176
   * @param string $char_list <p>
5177
   *                          This parameter is case sensitive.
5178
   *                          </p>
5179
   *
5180
   * @return string a string starting from the character found, or false if it is
5181
   * not found.
5182
   * @since 5.0
5183
   */
5184
  public static function strpbrk($haystack, $char_list)
5185
  {
5186 1
    $haystack = (string)$haystack;
5187
    $char_list = (string)$char_list;
5188 1
5189
    if (!isset($haystack[0], $char_list[0])) {
5190 1
      return false;
5191
    }
5192
5193
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5194
      return substr($haystack, strpos($haystack, $m[0]));
5195
    } else {
5196
      return false;
5197
    }
5198
  }
5199
5200 4
  /**
5201
   * Find position of first occurrence of string in a string.
5202 4
   *
5203
   * @link http://php.net/manual/en/function.mb-strpos.php
5204
   *
5205
   * @param string  $haystack     <p>
5206
   *                              The string being checked.
5207
   *                              </p>
5208
   * @param string  $needle       <p>
5209
   *                              The position counted from the beginning of haystack.
5210
   *                              </p>
5211
   * @param int     $offset       [optional] <p>
5212
   *                              The search offset. If it is not specified, 0 is used.
5213
   *                              </p>
5214
   * @param string  $encoding
5215
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5216
   *
5217
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5218
   *             If needle is not found it returns false.
5219
   */
5220
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5221
  {
5222
    $haystack = (string)$haystack;
5223
    $needle = (string)$needle;
5224
5225
    if (!isset($haystack[0], $needle[0])) {
5226
      return false;
5227
    }
5228
5229
    // init
5230
    self::checkForSupport();
5231
    $offset = (int)$offset;
5232
5233 1
    // iconv and mbstring do not support integer $needle
5234
5235 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5236
      $needle = self::chr($needle);
5237 1
    }
5238
5239
    if ($cleanUtf8 === true) {
5240
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5241
      // iconv_strpos is not tolerant to invalid characters
5242
5243
      $needle = self::clean((string)$needle);
5244
      $haystack = self::clean($haystack);
5245
    }
5246
5247
    if (self::$support['mbstring'] === true) {
5248
5249 1
      // INFO: this is only a fallback for old versions
5250
      if ($encoding === true || $encoding === false) {
5251 1
        $encoding = 'UTF-8';
5252
      }
5253
5254
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5255
    }
5256
5257
    if (self::$support['iconv'] === true) {
5258
      // ignore invalid negative offset to keep compatility
5259
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5260
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5261
    }
5262
5263
    if ($offset > 0) {
5264
      $haystack = self::substr($haystack, $offset);
5265
    }
5266
5267 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5268
      $left = substr($haystack, 0, $pos);
5269
5270
      // negative offset not supported in PHP strpos(), ignoring
5271
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5272
    }
5273
5274
    return false;
5275
  }
5276 10
5277
  /**
5278 10
   * Finds the last occurrence of a character in a string within another.
5279 10
   *
5280
   * @link http://php.net/manual/en/function.mb-strrchr.php
5281 10
   *
5282 2
   * @param string $haystack <p>
5283
   *                         The string from which to get the last occurrence
5284
   *                         of needle
5285
   *                         </p>
5286 9
   * @param string $needle   <p>
5287
   *                         The string to find in haystack
5288 9
   *                         </p>
5289
   * @param bool   $part     [optional] <p>
5290
   *                         Determines which portion of haystack
5291
   *                         this function returns.
5292 9
   *                         If set to true, it returns all of haystack
5293 9
   *                         from the beginning to the last occurrence of needle.
5294
   *                         If set to false, it returns all of haystack
5295 9
   *                         from the last occurrence of needle to the end,
5296
   *                         </p>
5297
   * @param string $encoding [optional] <p>
5298 1
   *                         Character encoding name to use.
5299 1
   *                         If it is omitted, internal character encoding is used.
5300 1
   *                         </p>
5301
   *
5302 9
   * @return string the portion of haystack.
5303 9
   * or false if needle is not found.
5304
   */
5305
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5306
  {
5307
    self::checkForSupport();
5308
5309
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5310
  }
5311
5312
  /**
5313
   * Reverses characters order in the string.
5314
   *
5315
   * @param    string $str The input string
5316
   *
5317
   * @return   string The string with characters in the reverse sequence
5318
   */
5319
  public static function strrev($str)
5320
  {
5321
    return implode(array_reverse(self::split($str)));
5322
  }
5323
5324
  /**
5325
   * Finds the last occurrence of a character in a string within another, case insensitive.
5326
   *
5327
   * @link http://php.net/manual/en/function.mb-strrichr.php
5328
   *
5329
   * @param string $haystack <p>
5330
   *                         The string from which to get the last occurrence
5331
   *                         of needle
5332
   *                         </p>
5333
   * @param string $needle   <p>
5334
   *                         The string to find in haystack
5335
   *                         </p>
5336
   * @param bool   $part     [optional] <p>
5337
   *                         Determines which portion of haystack
5338
   *                         this function returns.
5339 6
   *                         If set to true, it returns all of haystack
5340
   *                         from the beginning to the last occurrence of needle.
5341 6
   *                         If set to false, it returns all of haystack
5342
   *                         from the last occurrence of needle to the end,
5343
   *                         </p>
5344
   * @param string $encoding [optional] <p>
5345 6
   *                         Character encoding name to use.
5346
   *                         If it is omitted, internal character encoding is used.
5347
   *                         </p>
5348
   *
5349
   * @return string the portion of haystack.
5350
   * or false if needle is not found.
5351
   */
5352
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5353
  {
5354
    self::checkForSupport();
5355
5356
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5357
  }
5358
5359
  /**
5360
   * Find position of last occurrence of a case-insensitive string.
5361
   *
5362
   * @param    string $haystack The string to look in
5363
   * @param    string $needle   The string to look for
5364
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5365
   *
5366 1
   * @return   int The position of offset
5367
   */
5368 1
  public static function strripos($haystack, $needle, $offset = 0)
5369
  {
5370 1
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5371
  }
5372
5373
  /**
5374
   * Find position of last occurrence of a string in a string.
5375
   *
5376
   * @link http://php.net/manual/en/function.mb-strrpos.php
5377
   *
5378
   * @param string     $haystack  <p>
5379
   *                              The string being checked, for the last occurrence
5380
   *                              of needle
5381
   *                              </p>
5382
   * @param string|int $needle    <p>
5383 10
   *                              The string to find in haystack.
5384
   *                              Or a code point as int.
5385 10
   *                              </p>
5386 10
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5387 10
   *                              the string. Negative values will stop searching at an arbitrary point
5388
   *                              prior to the end of the string.
5389 10
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5390 1
   *
5391 1
   * @return int the numeric position of
5392 1
   * the last occurrence of needle in the
5393
   * haystack string. If
5394 10
   * needle is not found, it returns false.
5395
   */
5396 10
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5397
  {
5398 10
    $haystack = (string)$haystack;
5399 1
5400 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
5401
      $needle = self::chr($needle);
5402
    }
5403 10
5404 10
    $needle = (string)$needle;
5405
5406 10
    if (!isset($haystack[0], $needle[0])) {
5407
      return false;
5408 10
    }
5409
5410
    // init
5411
    self::checkForSupport();
5412
5413
    $needle = (string)$needle;
5414
    $offset = (int)$offset;
5415
5416
    if ($cleanUtf8 === true) {
5417
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5418
5419
      $needle = self::clean($needle);
5420
      $haystack = self::clean($haystack);
5421
    }
5422
5423
    if (self::$support['mbstring'] === true) {
5424 20
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5425
    }
5426 20
5427
    if (self::$support['iconv'] === true) {
5428 20
      return \grapheme_strrpos($haystack, $needle, $offset);
5429 5
    }
5430
5431
    // fallback
5432
5433 18
    if ($offset > 0) {
5434
      $haystack = self::substr($haystack, $offset);
5435 18
    } elseif ($offset < 0) {
5436
      $haystack = self::substr($haystack, 0, $offset);
5437
    }
5438
5439 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5440
      $left = substr($haystack, 0, $pos);
5441
5442
      // negative offset not supported in PHP strpos(), ignoring
5443
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5444
    }
5445 3
5446
    return false;
5447 3
  }
5448
5449
  /**
5450
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5451
   * mask.
5452
   *
5453
   * @param string $str
5454
   * @param string $mask
5455
   * @param int    $offset
5456
   * @param int    $length
5457
   *
5458
   * @return int|null
5459
   */
5460
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5461
  {
5462 16
    if ($offset || 2147483647 !== $length) {
5463
      $str = self::substr($str, $offset, $length);
5464 16
    }
5465
5466 16
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5467 4
  }
5468
5469
  /**
5470
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5471 15
   *
5472
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5473 15
   *
5474 15
   * @param string $haystack      <p>
5475
   *                              The input string. Must be valid UTF-8.
5476
   *                              </p>
5477
   * @param string $needle        <p>
5478
   *                              The string to look for. Must be valid UTF-8.
5479
   *                              </p>
5480
   * @param bool   $before_needle [optional] <p>
5481
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5482
   *                              haystack before the first occurrence of the needle (excluding the needle).
5483
   *                              </p>
5484
   *
5485
   * @return string the portion of string, or FALSE if needle is not found.
5486
   */
5487
  public static function strstr($haystack, $needle, $before_needle = false)
5488
  {
5489
    self::checkForSupport();
5490
5491
    return \grapheme_strstr($haystack, $needle, $before_needle);
5492
  }
5493
5494
  /**
5495
   * Unicode transformation for case-less matching.
5496
   *
5497
   * @link http://unicode.org/reports/tr21/tr21-5.html
5498
   *
5499
   * @param string $str
5500
   * @param bool   $full
5501
   *
5502
   * @return string
5503 1
   */
5504
  public static function strtocasefold($str, $full = true)
5505 1
  {
5506
    static $fullCaseFold = null;
5507
    static $commonCaseFoldKeys = null;
5508
    static $commonCaseFoldValues = null;
5509
5510
    if ($commonCaseFoldKeys === null) {
5511
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5512
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5513
    }
5514
5515
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5516
5517
    if ($full) {
5518
5519
      if ($fullCaseFold === null) {
5520 1
        $fullCaseFold = self::getData('caseFolding_full');
5521
      }
5522
5523
      /** @noinspection OffsetOperationsInspection */
5524
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5525
    }
5526
5527
    $str = self::clean($str);
5528
5529
    return self::strtolower($str);
5530 1
  }
5531
5532
  /**
5533 1
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5534
   * Make a string lowercase.
5535 1
   *
5536
   * @link http://php.net/manual/en/function.mb-strtolower.php
5537
   *
5538
   * @param string $str <p>
5539
   *                    The string being lowercased.
5540
   *                    </p>
5541
   * @param string $encoding
5542
   *
5543
   * @return string str with all alphabetic characters converted to lowercase.
5544
   */
5545
  public static function strtolower($str, $encoding = 'UTF-8')
5546
  {
5547
    $str = (string)$str;
5548
5549
    if (!isset($str[0])) {
5550
      return '';
5551
    }
5552
5553
    // init
5554
    self::checkForSupport();
5555
5556
    return \mb_strtolower($str, $encoding);
5557
  }
5558 39
5559
  /**
5560 39
   * Generic case sensitive transformation for collation matching.
5561
   *
5562 39
   * @param string $s
5563 9
   *
5564
   * @return string
5565
   */
5566
  protected static function strtonatfold($s)
5567 37
  {
5568
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5569 37
  }
5570
5571
  /**
5572
   * Make a string uppercase.
5573 1
   *
5574 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5575
   *
5576 37
   * @param string $str <p>
5577 22
   *                    The string being uppercased.
5578 22
   *                    </p>
5579 33
   * @param string $encoding
5580
   *
5581
   * @return string str with all alphabetic characters converted to uppercase.
5582 37
   */
5583
  public static function strtoupper($str, $encoding = 'UTF-8')
5584
  {
5585 37
    $str = (string)$str;
5586 1
5587 1
    if (!isset($str[0])) {
5588
      return '';
5589 37
    }
5590
5591
    // init
5592
    self::checkForSupport();
5593
5594
    if (self::$support['mbstring'] === true) {
5595
      return \mb_strtoupper($str, $encoding);
5596
    } else {
5597
5598
      // fallback
5599
5600
      static $caseTableKeys = null;
5601
      static $caseTableValues = null;
5602
5603
      if ($caseTableKeys === null) {
5604
        $caseTable = self::case_table();
5605
        $caseTableKeys = array_keys($caseTable);
5606
        $caseTableValues = array_values($caseTable);
5607
      }
5608
5609
      $str = self::clean($str);
5610
5611
      return str_replace($caseTableKeys, $caseTableValues, $str);
5612
    }
5613
  }
5614
5615
  /**
5616
   * Translate characters or replace sub-strings.
5617
   *
5618 1
   * @link  http://php.net/manual/en/function.strtr.php
5619
   *
5620 1
   * @param string       $str  <p>
5621 1
   *                           The string being translated.
5622
   *                           </p>
5623 1
   * @param string|array $from <p>
5624
   *                           The string replacing from.
5625
   *                           </p>
5626
   * @param string|array $to   <p>
5627
   *                           The string being translated to to.
5628
   *                           </p>
5629
   *
5630
   * @return string This function returns a copy of str,
5631
   * translating all occurrences of each character in
5632
   * from to the corresponding character in
5633
   * to.
5634
   * @since 4.0
5635
   * @since 5.0
5636
   */
5637
  public static function strtr($str, $from, $to = INF)
5638
  {
5639
    if (INF !== $to) {
5640
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5640 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5641
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5641 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5642
      $countFrom = count($from);
5643
      $countTo = count($to);
5644
5645
      if ($countFrom > $countTo) {
5646
        $from = array_slice($from, 0, $countTo);
5647
      } elseif ($countFrom < $countTo) {
5648
        $to = array_slice($to, 0, $countFrom);
5649
      }
5650
5651
      $from = array_combine($from, $to);
5652
    }
5653
5654
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5637 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5655
  }
5656
5657
  /**
5658
   * Return the width of a string.
5659
   *
5660
   * @param string $s
5661
   *
5662
   * @return int
5663
   */
5664
  public static function strwidth($s)
5665 6
  {
5666
    // init
5667
    self::checkForSupport();
5668 6
5669 1
    return \mb_strwidth($s, 'UTF-8');
5670
  }
5671
5672 1
  /**
5673 1
   * Get part of a string.
5674 1
   *
5675 1
   * @link http://php.net/manual/en/function.mb-substr.php
5676
   *
5677
   * @param string  $str       <p>
5678
   *                           The string being checked.
5679 1
   *                           </p>
5680 1
   * @param int     $start     <p>
5681 1
   *                           The first position used in str.
5682 1
   *                           </p>
5683 1
   * @param int     $length    [optional] <p>
5684 1
   *                           The maximum length of the returned string.
5685 1
   *                           </p>
5686 1
   * @param string  $encoding
5687
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5688
   *
5689
   * @return string mb_substr returns the portion of
5690 1
   * str specified by the start and length parameters.
5691 1
   */
5692 1
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5693 1
  {
5694 1
    $str = (string)$str;
5695 1
5696 1
    if (!isset($str[0])) {
5697 1
      return '';
5698
    }
5699
5700 1
    // init
5701 1
    self::checkForSupport();
5702 1
5703 1
    if ($cleanUtf8 === true) {
5704
      // iconv and mbstring are not tolerant to invalid encoding
5705
      // further, their behaviour is inconsistent with that of PHP's substr
5706
5707 1
      $str = self::clean($str);
5708
    }
5709 6
5710 1
    if ($length === null) {
5711 1
      $length = (int)self::strlen($str);
5712 1
    } else {
5713 1
      $length = (int)$length;
5714
    }
5715 1
5716
    if (self::$support['mbstring'] === true) {
5717
5718 6
      // INFO: this is only a fallback for old versions
5719 6
      if ($encoding === true || $encoding === false) {
5720
        $encoding = 'UTF-8';
5721 6
      }
5722 4
5723
      return \mb_substr($str, $start, $length, $encoding);
5724 4
    }
5725 4
5726
    if (self::$support['iconv'] === true) {
5727 6
      return (string)\grapheme_substr($str, $start, $length);
5728
    }
5729 6
5730
    // fallback
5731
5732
    // split to array, and remove invalid characters
5733
    $array = self::split($str);
5734
5735
    // extract relevant part, and join to make sting again
5736
    return implode(array_slice($array, $start, $length));
5737
  }
5738
5739
  /**
5740 1
   * Binary safe comparison of two strings from an offset, up to length characters.
5741
   *
5742 1
   * @param string  $main_str           The main string being compared.
5743
   * @param string  $str                The secondary string being compared.
5744 1
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5745 1
   *                                    end of the string.
5746
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5747
   *                                    the str compared to the length of main_str less the offset.
5748 1
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5749
   *
5750 1
   * @return int
5751 1
   */
5752
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5753 1
  {
5754
    $main_str = self::substr($main_str, $offset, $length);
5755 1
    $str = self::substr($str, 0, self::strlen($main_str));
5756 1
5757
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5758 1
  }
5759
5760 1
  /**
5761
   * Count the number of substring occurrences
5762 1
   *
5763
   * @link  http://php.net/manual/en/function.substr-count.php
5764 1
   *
5765
   * @param string $haystack <p>
5766
   *                         The string to search in
5767
   *                         </p>
5768
   * @param string $needle   <p>
5769
   *                         The substring to search for
5770
   *                         </p>
5771
   * @param int    $offset   [optional] <p>
5772
   *                         The offset where to start counting
5773
   *                         </p>
5774
   * @param int    $length   [optional] <p>
5775 6
   *                         The maximum length after the specified offset to search for the
5776
   *                         substring. It outputs a warning if the offset plus the length is
5777 6
   *                         greater than the haystack length.
5778
   *                         </p>
5779
   *
5780
   * @return int This functions returns an integer.
5781
   * @since 4.0
5782
   * @since 5.0
5783
   */
5784
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5785
  {
5786
    $haystack = (string)$haystack;
5787
    $needle = (string)$needle;
5788
5789
    if (!isset($haystack[0], $needle[0])) {
5790
      return 0;
5791
    }
5792
5793
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5794
      $offset = (int)$offset;
5795
      $length = (int)$length;
5796
5797
      $haystack = self::substr($haystack, $offset, $length);
5798
    }
5799
5800
    self::checkForSupport();
5801
5802
    return \mb_substr_count($haystack, $needle);
5803
  }
5804
5805
  /**
5806
   * Replace text within a portion of a string.
5807
   *
5808
   * source: https://gist.github.com/stemar/8287074
5809
   *
5810
   * @param string|array   $str
5811
   * @param string|array   $replacement
5812 7
   * @param int|array      $start
5813
   * @param null|int|array $length
5814 7
   *
5815
   * @return array|string
5816 7
   */
5817
  public static function substr_replace($str, $replacement, $start, $length = null)
5818 7
  {
5819 2
    if (is_array($str)) {
5820
      $num = count($str);
5821
5822 6
      // $replacement
5823
      if (is_array($replacement)) {
5824 6
        $replacement = array_slice($replacement, 0, $num);
5825 3
      } else {
5826
        $replacement = array_pad(array($replacement), $num, $replacement);
5827 3
      }
5828
5829 3
      // $start
5830 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5831
        $start = array_slice($start, 0, $num);
5832 3
        foreach ($start as &$valueTmp) {
5833
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5834 3
        }
5835 3
        unset($valueTmp);
5836
      } else {
5837
        $start = array_pad(array($start), $num, $start);
5838 3
      }
5839 3
5840 3
      // $length
5841
      if (!isset($length)) {
5842
        $length = array_fill(0, $num, 0);
5843 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5844
        $length = array_slice($length, 0, $num);
5845
        foreach ($length as &$valueTmpV2) {
5846
          if (isset($valueTmpV2)) {
5847
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5848
          } else {
5849
            $valueTmpV2 = 0;
5850
          }
5851
        }
5852 3
        unset($valueTmpV2);
5853
      } else {
5854 1
        $length = array_pad(array($length), $num, $length);
5855 1
      }
5856 1
5857
      // Recursive call
5858 1
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5859 1
    } else {
5860 1
      if (is_array($replacement)) {
5861 1
        if (count($replacement) > 0) {
5862
          $replacement = $replacement[0];
5863 1
        } else {
5864
          $replacement = '';
5865
        }
5866 1
      }
5867
    }
5868
5869 1
    preg_match_all('/./us', (string)$str, $smatches);
5870
    preg_match_all('/./us', (string)$replacement, $rmatches);
5871 3
5872 1
    if ($length === null) {
5873 1
      self::checkForSupport();
5874
5875 3
      $length = \mb_strlen($str);
5876 3
    }
5877
5878 3
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5879 3
5880
    return implode($smatches[0], null);
5881 6
  }
5882
5883
  /**
5884
   * Returns a case swapped version of the string.
5885
   *
5886
   * @param string $str
5887
   * @param string $encoding
5888
   *
5889
   * @return string each character's case swapped
5890
   */
5891
  public static function swapCase($str, $encoding = 'UTF-8')
5892
  {
5893
    $str = (string)$str;
5894
5895
    if (!isset($str[0])) {
5896
      return '';
5897
    }
5898
5899
    $str = self::clean($str);
5900
5901
    $strSwappedCase = preg_replace_callback(
5902
        '/[\S]/u',
5903 2
        function ($match) use ($encoding) {
5904
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5905 2
5906
          if ($match[0] === $marchToUpper) {
5907
            return UTF8::strtolower($match[0], $encoding);
5908
          } else {
5909
            return $marchToUpper;
5910
          }
5911
        },
5912
        $str
5913
    );
5914
5915
    return $strSwappedCase;
5916
  }
5917
5918
  /**
5919
   * alias for "UTF8::to_ascii()"
5920
   *
5921
   * @param string $s The input string e.g. a UTF-8 String
5922
   * @param string $subst_chr
5923
   *
5924
   * @return string
5925
   */
5926
  public static function toAscii($s, $subst_chr = '?')
5927
  {
5928
    return self::to_ascii($s, $subst_chr);
5929 20
  }
5930
5931 20
  /**
5932 2
   * alias for "UTF8::to_latin1()"
5933
   *
5934 2
   * @param $str
5935 2
   *
5936
   * @return string
5937 2
   */
5938
  public static function toLatin1($str)
5939
  {
5940 20
    return self::to_latin1($str);
5941
  }
5942 20
5943 9
  /**
5944
   * alias for "UTF8::to_utf8"
5945
   *
5946 20
   * @param string $str
5947
   *
5948 20
   * @return string
5949
   */
5950 20
  public static function toUTF8($str)
5951 20
  {
5952
    return self::to_utf8($str);
5953 20
  }
5954 20
5955 20
  /**
5956 20
   * convert to ASCII
5957
   *
5958 20
   * @param string $s The input string e.g. a UTF-8 String
5959
   * @param string $subst_chr
5960 18
   *
5961 17
   * @return string
5962 17
   */
5963 17
  public static function to_ascii($s, $subst_chr = '?')
5964 5
  {
5965 5
    static $translitExtra = null;
5966 5
5967
    $s = (string)$s;
5968
5969 20
    if (!isset($s[0])) {
5970
      return '';
5971 18
    }
5972 14
5973 14
    $s = self::clean($s);
5974 14
5975 8
    if (preg_match("/[\x80-\xFF]/", $s)) {
5976 8
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
5977 8
5978
      $glibc = 'glibc' === ICONV_IMPL;
5979
5980 19
      preg_match_all('/./u', $s, $s);
5981
5982 9
      /** @noinspection AlterInForeachInspection */
5983 3
      foreach ($s[0] as &$c) {
5984 3
5985 3
        if (!isset($c[1])) {
5986 6
          continue;
5987 6
        }
5988 6
5989
        if ($glibc) {
5990
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
5991 9
        } else {
5992 6
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
5993 6
5994 6
          if ($t !== false && is_string($t)) {
5995
            if (!isset($t[0])) {
5996
              $t = '?';
5997 20
            } elseif (isset($t[1])) {
5998
              $t = ltrim($t, '\'`"^~');
5999 2
            }
6000 2
          }
6001
        }
6002
6003 2
        if ('?' === $t) {
6004 2
6005 2
          if ($translitExtra === null) {
6006
            $translitExtra = (array)self::getData('translit_extra');
6007
          }
6008 2
6009 18
          if (isset($translitExtra[$c])) {
6010
            $t = $translitExtra[$c];
6011 20
          } else {
6012
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6013 20
6014
            if ($t[0] < "\x80") {
6015
              $t = $t[0];
6016 20
            } else {
6017 20
              $t = $subst_chr;
6018
            }
6019 3
          }
6020 20
        }
6021
6022 20
        if ('?' === $t) {
6023
          $t = self::str_transliterate($c, $subst_chr);
6024
        }
6025 20
6026 20
        $c = $t;
6027 20
      }
6028 2
6029 20
      $s = implode('', $s[0]);
6030
    }
6031 20
6032
    return $s;
6033 20
  }
6034
6035
  /**
6036
   * alias for "UTF8::to_win1252()"
6037
   *
6038
   * @param   string $str
6039
   *
6040
   * @return  array|string
6041
   */
6042
  public static function to_iso8859($str)
6043 2
  {
6044
    return self::to_win1252($str);
6045 2
  }
6046
6047 1
  /**
6048
   * alias for "UTF8::to_win1252()"
6049 1
   *
6050 1
   * @param string|array $str
6051
   *
6052 1
   * @return string|array
6053 2
   */
6054 2
  public static function to_latin1($str)
6055
  {
6056
    return self::to_win1252($str);
6057
  }
6058
6059
  /**
6060
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6061
   *
6062
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6063
   *
6064
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6065
   *
6066
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6067
   *    are followed by any of these:  ("group B")
6068
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6069
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6070
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6071
   * is also a valid unicode character, and will be left unchanged.
6072
   *
6073 26
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6074
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6075 26
   *
6076
   * @param string|array $str Any string or array.
6077 26
   *
6078 5
   * @return string The same string, but UTF8 encoded.
6079
   */
6080
  public static function to_utf8($str)
6081
  {
6082 22
    if (is_array($str)) {
6083 6
      foreach ($str as $k => $v) {
6084
        /** @noinspection AlterInForeachInspection */
6085
        $str[$k] = self::to_utf8($v);
6086 16
      }
6087
6088
      return $str;
6089
    }
6090
6091
    $str = (string)$str;
6092
6093
    if (!isset($str[0])) {
6094
      return $str;
6095
    }
6096 14
6097
    $max = self::strlen($str, '8bit');
6098 14
6099
    $buf = '';
6100
    /** @noinspection ForeachInvariantsInspection */
6101
    for ($i = 0; $i < $max; $i++) {
6102
      $c1 = $str[$i];
6103
6104
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6105
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6106
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6107
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6108
6109
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6110
6111
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6112
            $buf .= $c1 . $c2;
6113
            $i++;
6114
          } else { // not valid UTF8 - convert it
6115
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6116
            $cc2 = ($c1 & "\x3f") | "\x80";
6117
            $buf .= $cc1 . $cc2;
6118
          }
6119
6120 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6121 8
6122
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6123 8
            $buf .= $c1 . $c2 . $c3;
6124 2
            $i += 2;
6125
          } else { // not valid UTF8 - convert it
6126
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6127
            $cc2 = ($c1 & "\x3f") | "\x80";
6128 7
            $buf .= $cc1 . $cc2;
6129 7
          }
6130
6131 7
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6132 1
6133 1 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6134 7
            $buf .= $c1 . $c2 . $c3 . $c4;
6135
            $i += 3;
6136
          } else { // not valid UTF8 - convert it
6137 7
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6138
            $cc2 = ($c1 & "\x3f") | "\x80";
6139 7
            $buf .= $cc1 . $cc2;
6140
          }
6141
6142
        } else { // doesn't look like UTF8, but should be converted
6143 1
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6144 1
          $cc2 = (($c1 & "\x3f") | "\x80");
6145 1
          $buf .= $cc1 . $cc2;
6146 7
        }
6147 7
6148 7
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6149 7
6150 7
        $ordC1 = ord($c1);
6151
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6152 7
          $buf .= self::$win1252ToUtf8[$ordC1];
6153
        } else {
6154
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6155
          $cc2 = (($c1 & "\x3f") | "\x80");
6156
          $buf .= $cc1 . $cc2;
6157
        }
6158
6159
      } else { // it doesn't need conversion
6160
        $buf .= $c1;
6161
      }
6162
    }
6163
6164
    self::checkForSupport();
6165
6166
    // decode unicode escape sequences
6167
    $buf = preg_replace_callback(
6168
        '/\\\\u([0-9a-f]{4})/i',
6169
        function ($match) {
6170
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6171
        },
6172 1
        $buf
6173
    );
6174 1
6175
    // decode UTF-8 codepoints
6176 1
    $buf = preg_replace_callback(
6177 1
        '/&#\d{2,4};/',
6178
        function ($match) {
6179
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6180 1
        },
6181
        $buf
6182 1
    );
6183
6184 1
    return $buf;
6185 1
  }
6186 1
6187 1
  /**
6188
   * Convert a string into "win1252"-encoding.
6189 1
   *
6190 1
   * @param  string|array $str
6191 1
   *
6192
   * @return string|array
6193 1
   */
6194
  protected static function to_win1252($str)
6195
  {
6196
    if (is_array($str)) {
6197
6198
      foreach ($str as $k => $v) {
6199
        /** @noinspection AlterInForeachInspection */
6200
        $str[$k] = self::to_win1252($v);
6201
      }
6202
6203
      return $str;
6204
    }
6205
6206
    $str = (string)$str;
6207
6208
    if (!isset($str[0])) {
6209
      return '';
6210
    }
6211
6212
    return self::utf8_decode($str);
6213
  }
6214
6215
  /**
6216
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6217
   *
6218
   * INFO: This is slower then "trim()"
6219
   *
6220
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
6221
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6222
   *
6223
   * @param    string $str   The string to be trimmed
6224
   * @param    string $chars Optional characters to be stripped
6225
   *
6226
   * @return   string The trimmed string
6227
   */
6228
  public static function trim($str = '', $chars = INF)
6229
  {
6230
    $str = (string)$str;
6231
6232
    if (!isset($str[0])) {
6233
      return '';
6234
    }
6235
6236
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6237
    if ($chars === INF || !$chars) {
6238
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6239
    }
6240
6241
    return self::rtrim(self::ltrim($str, $chars), $chars);
6242
  }
6243
6244
  /**
6245
   * Makes string's first char uppercase.
6246
   *
6247
   * @param    string $str The input string
6248
   *
6249
   * @return   string The resulting string
6250
   */
6251
  public static function ucfirst($str)
6252
  {
6253
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6254
  }
6255
6256
  /**
6257
   * alias for "UTF8::ucfirst"
6258
   *
6259
   * @param $str
6260
   *
6261
   * @return string
6262
   */
6263
  public static function ucword($str)
6264
  {
6265
    return self::ucfirst($str);
6266
  }
6267
6268
  /**
6269
   * Uppercase for all words in the string.
6270
   *
6271
   * @param  string $str
6272
   * @param array   $exceptions
6273
   *
6274
   * @return string
6275
   */
6276
  public static function ucwords($str, $exceptions = array())
6277
  {
6278
    if (!$str) {
6279
      return '';
6280
    }
6281
6282
    // init
6283
    $words = explode(' ', $str);
6284
    $newwords = array();
6285
6286
    if (count($exceptions) > 0) {
6287
      $useExceptions = true;
6288
    } else {
6289
      $useExceptions = false;
6290
    }
6291
6292
    foreach ($words as $word) {
6293
      if (
6294
          ($useExceptions === false)
6295
          ||
6296
          (
6297
              $useExceptions === true
6298
              &&
6299
              !in_array($word, $exceptions, true)
6300
          )
6301
      ) {
6302
        $word = self::ucfirst($word);
6303
      }
6304
      $newwords[] = $word;
6305
    }
6306
6307
    return self::ucfirst(implode(' ', $newwords));
6308
  }
6309
6310
  /**
6311
   * Multi decode html entity & fix urlencoded-win1252-chars.
6312
   *
6313
   * e.g:
6314
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6315
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6316
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6317
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6318
   * 'Düsseldorf'                   => 'Düsseldorf'
6319
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6320
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6321
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6322
   *
6323
   * @param string $str
6324
   *
6325
   * @return string
6326
   */
6327
  public static function urldecode($str)
6328
  {
6329
    $str = (string)$str;
6330
6331
    if (!isset($str[0])) {
6332
      return '';
6333
    }
6334
6335
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6336
6337
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6338
6339
    $str = self::fix_simple_utf8(
6340
        rawurldecode(
6341
            self::html_entity_decode(
6342
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6343
                $flags
6344
            )
6345
        )
6346
    );
6347
6348
    return (string)$str;
6349
  }
6350
6351
  /**
6352
   * Return a array with "urlencoded"-win1252 -> UTF-8
6353
   *
6354
   * @return mixed
6355
   */
6356
  public static function urldecode_fix_win1252_chars()
6357
  {
6358
    static $array = array(
6359
        '%20' => ' ',
6360
        '%21' => '!',
6361
        '%22' => '"',
6362
        '%23' => '#',
6363
        '%24' => '$',
6364
        '%25' => '%',
6365
        '%26' => '&',
6366
        '%27' => "'",
6367
        '%28' => '(',
6368
        '%29' => ')',
6369
        '%2A' => '*',
6370
        '%2B' => '+',
6371
        '%2C' => ',',
6372
        '%2D' => '-',
6373
        '%2E' => '.',
6374
        '%2F' => '/',
6375
        '%30' => '0',
6376
        '%31' => '1',
6377
        '%32' => '2',
6378
        '%33' => '3',
6379
        '%34' => '4',
6380
        '%35' => '5',
6381
        '%36' => '6',
6382
        '%37' => '7',
6383
        '%38' => '8',
6384
        '%39' => '9',
6385
        '%3A' => ':',
6386
        '%3B' => ';',
6387
        '%3C' => '<',
6388
        '%3D' => '=',
6389
        '%3E' => '>',
6390
        '%3F' => '?',
6391
        '%40' => '@',
6392
        '%41' => 'A',
6393
        '%42' => 'B',
6394
        '%43' => 'C',
6395
        '%44' => 'D',
6396
        '%45' => 'E',
6397
        '%46' => 'F',
6398
        '%47' => 'G',
6399
        '%48' => 'H',
6400
        '%49' => 'I',
6401
        '%4A' => 'J',
6402
        '%4B' => 'K',
6403
        '%4C' => 'L',
6404
        '%4D' => 'M',
6405
        '%4E' => 'N',
6406
        '%4F' => 'O',
6407
        '%50' => 'P',
6408
        '%51' => 'Q',
6409
        '%52' => 'R',
6410
        '%53' => 'S',
6411
        '%54' => 'T',
6412
        '%55' => 'U',
6413
        '%56' => 'V',
6414
        '%57' => 'W',
6415
        '%58' => 'X',
6416
        '%59' => 'Y',
6417
        '%5A' => 'Z',
6418
        '%5B' => '[',
6419
        '%5C' => '\\',
6420
        '%5D' => ']',
6421
        '%5E' => '^',
6422
        '%5F' => '_',
6423
        '%60' => '`',
6424
        '%61' => 'a',
6425
        '%62' => 'b',
6426
        '%63' => 'c',
6427
        '%64' => 'd',
6428
        '%65' => 'e',
6429
        '%66' => 'f',
6430
        '%67' => 'g',
6431
        '%68' => 'h',
6432
        '%69' => 'i',
6433
        '%6A' => 'j',
6434
        '%6B' => 'k',
6435
        '%6C' => 'l',
6436
        '%6D' => 'm',
6437
        '%6E' => 'n',
6438
        '%6F' => 'o',
6439
        '%70' => 'p',
6440 6
        '%71' => 'q',
6441
        '%72' => 'r',
6442 6
        '%73' => 's',
6443 6
        '%74' => 't',
6444
        '%75' => 'u',
6445 6
        '%76' => 'v',
6446
        '%77' => 'w',
6447 6
        '%78' => 'x',
6448 5
        '%79' => 'y',
6449
        '%7A' => 'z',
6450
        '%7B' => '{',
6451
        '%7C' => '|',
6452 6
        '%7D' => '}',
6453
        '%7E' => '~',
6454 6
        '%7F' => '',
6455
        '%80' => '`',
6456 6
        '%81' => '',
6457 1
        '%82' => '‚',
6458 1
        '%83' => 'ƒ',
6459 1
        '%84' => '„',
6460
        '%85' => '…',
6461 6
        '%86' => '†',
6462
        '%87' => '‡',
6463
        '%88' => 'ˆ',
6464
        '%89' => '‰',
6465
        '%8A' => 'Š',
6466
        '%8B' => '‹',
6467
        '%8C' => 'Œ',
6468
        '%8D' => '',
6469
        '%8E' => 'Ž',
6470
        '%8F' => '',
6471 6
        '%90' => '',
6472
        '%91' => '‘',
6473 6
        '%92' => '’',
6474
        '%93' => '“',
6475 6
        '%94' => '”',
6476 6
        '%95' => '•',
6477
        '%96' => '–',
6478
        '%97' => '—',
6479 5
        '%98' => '˜',
6480 5
        '%99' => '™',
6481
        '%9A' => 'š',
6482 5
        '%9B' => '›',
6483 1
        '%9C' => 'œ',
6484 1
        '%9D' => '',
6485 1
        '%9E' => 'ž',
6486
        '%9F' => 'Ÿ',
6487 5
        '%A0' => '',
6488
        '%A1' => '¡',
6489
        '%A2' => '¢',
6490
        '%A3' => '£',
6491
        '%A4' => '¤',
6492
        '%A5' => '¥',
6493
        '%A6' => '¦',
6494
        '%A7' => '§',
6495
        '%A8' => '¨',
6496
        '%A9' => '©',
6497
        '%AA' => 'ª',
6498
        '%AB' => '«',
6499
        '%AC' => '¬',
6500
        '%AD' => '',
6501
        '%AE' => '®',
6502
        '%AF' => '¯',
6503
        '%B0' => '°',
6504
        '%B1' => '±',
6505
        '%B2' => '²',
6506
        '%B3' => '³',
6507
        '%B4' => '´',
6508
        '%B5' => 'µ',
6509
        '%B6' => '¶',
6510
        '%B7' => '·',
6511
        '%B8' => '¸',
6512
        '%B9' => '¹',
6513
        '%BA' => 'º',
6514
        '%BB' => '»',
6515
        '%BC' => '¼',
6516
        '%BD' => '½',
6517
        '%BE' => '¾',
6518
        '%BF' => '¿',
6519 1
        '%C0' => 'À',
6520
        '%C1' => 'Á',
6521 1
        '%C2' => 'Â',
6522
        '%C3' => 'Ã',
6523
        '%C4' => 'Ä',
6524
        '%C5' => 'Å',
6525
        '%C6' => 'Æ',
6526
        '%C7' => 'Ç',
6527
        '%C8' => 'È',
6528
        '%C9' => 'É',
6529
        '%CA' => 'Ê',
6530
        '%CB' => 'Ë',
6531
        '%CC' => 'Ì',
6532
        '%CD' => 'Í',
6533 1
        '%CE' => 'Î',
6534
        '%CF' => 'Ï',
6535 1
        '%D0' => 'Ð',
6536
        '%D1' => 'Ñ',
6537
        '%D2' => 'Ò',
6538
        '%D3' => 'Ó',
6539 1
        '%D4' => 'Ô',
6540
        '%D5' => 'Õ',
6541 1
        '%D6' => 'Ö',
6542
        '%D7' => '×',
6543
        '%D8' => 'Ø',
6544 1
        '%D9' => 'Ù',
6545 1
        '%DA' => 'Ú',
6546 1
        '%DB' => 'Û',
6547 1
        '%DC' => 'Ü',
6548 1
        '%DD' => 'Ý',
6549
        '%DE' => 'Þ',
6550
        '%DF' => 'ß',
6551 1
        '%E0' => 'à',
6552
        '%E1' => 'á',
6553
        '%E2' => 'â',
6554
        '%E3' => 'ã',
6555
        '%E4' => 'ä',
6556
        '%E5' => 'å',
6557
        '%E6' => 'æ',
6558
        '%E7' => 'ç',
6559
        '%E8' => 'è',
6560
        '%E9' => 'é',
6561
        '%EA' => 'ê',
6562
        '%EB' => 'ë',
6563
        '%EC' => 'ì',
6564 4
        '%ED' => 'í',
6565
        '%EE' => 'î',
6566 4
        '%EF' => 'ï',
6567
        '%F0' => 'ð',
6568
        '%F1' => 'ñ',
6569
        '%F2' => 'ò',
6570 4
        '%F3' => 'ó',
6571 4
        '%F4' => 'ô',
6572 4
        '%F5' => 'õ',
6573
        '%F6' => 'ö',
6574 4
        '%F7' => '÷',
6575 4
        '%F8' => 'ø',
6576 4
        '%F9' => 'ù',
6577 4
        '%FA' => 'ú',
6578
        '%FB' => 'û',
6579 4
        '%FC' => 'ü',
6580
        '%FD' => 'ý',
6581
        '%FE' => 'þ',
6582
        '%FF' => 'ÿ',
6583
    );
6584 4
6585
    return $array;
6586 4
  }
6587
6588
  /**
6589
   * Decodes an UTF-8 string to ISO-8859-1.
6590
   *
6591 4
   * @param string $str
6592 4
   *
6593
   * @return string
6594 4
   */
6595 4
  public static function utf8_decode($str)
6596 4
  {
6597 4
    static $utf8ToWin1252Keys = null;
6598 4
    static $utf8ToWin1252Values = null;
6599
6600 4
    $str = (string)$str;
6601 4
6602 4
    if (!isset($str[0])) {
6603 4
      return '';
6604
    }
6605 4
6606 3
    // init
6607 3
    self::checkForSupport();
6608 3
6609 3
    $str = self::to_utf8($str);
6610
6611 3
    if ($utf8ToWin1252Keys === null) {
6612
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6613
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6614
    }
6615 3
6616 3
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6617
  }
6618 4
6619
  /**
6620
   * Encodes an ISO-8859-1 string to UTF-8.
6621
   *
6622
   * @param string $str
6623
   *
6624
   * @return string
6625
   */
6626
  public static function utf8_encode($str)
6627
  {
6628
    $str = \utf8_encode($str);
6629
6630
    if (false === strpos($str, "\xC2")) {
6631
      return $str;
6632
    } else {
6633
6634
      static $cp1252ToUtf8Keys = null;
6635
      static $cp1252ToUtf8Values = null;
6636
6637
      if ($cp1252ToUtf8Keys === null) {
6638
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6639
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6640
      }
6641
6642
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6643
    }
6644
  }
6645
6646
  /**
6647
   * fix -> utf8-win1252 chars
6648
   *
6649
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6650
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6651
   * See: http://en.wikipedia.org/wiki/Windows-1252
6652
   *
6653
   * @deprecated use "UTF8::fix_simple_utf8()"
6654
   *
6655
   * @param   string $str
6656
   *
6657
   * @return  string
6658
   */
6659
  public static function utf8_fix_win1252_chars($str)
6660
  {
6661
    return self::fix_simple_utf8($str);
6662
  }
6663
6664
  /**
6665
   * Returns an array with all utf8 whitespace characters.
6666
   *
6667
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6668
   *
6669
   * @author: Derek E. [email protected]
6670
   *
6671
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6672
   *         as defined in above URL
6673
   */
6674
  public static function whitespace_table()
6675
  {
6676
    return self::$whitespaceTable;
6677
  }
6678
6679
  /**
6680
   * Limit the number of words in a string.
6681
   *
6682
   * @param  string $str
6683
   * @param  int    $words
6684
   * @param  string $strAddOn
6685
   *
6686
   * @return string
6687
   */
6688
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6689
  {
6690
    $str = (string)$str;
6691
6692
    if (!isset($str[0])) {
6693
      return '';
6694
    }
6695
6696
    $words = (int)$words;
6697
6698
    if ($words < 1) {
6699
      return '';
6700
    }
6701
6702
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6703
6704
    if (
6705
        !isset($matches[0])
6706
        ||
6707
        self::strlen($str) === self::strlen($matches[0])
6708
    ) {
6709
      return $str;
6710
    }
6711
6712
    return self::rtrim($matches[0]) . $strAddOn;
6713
  }
6714
6715
  /**
6716
   * Wraps a string to a given number of characters
6717
   *
6718
   * @link  http://php.net/manual/en/function.wordwrap.php
6719
   *
6720
   * @param string $str   <p>
6721
   *                      The input string.
6722
   *                      </p>
6723
   * @param int    $width [optional] <p>
6724
   *                      The column width.
6725
   *                      </p>
6726
   * @param string $break [optional] <p>
6727
   *                      The line is broken using the optional
6728
   *                      break parameter.
6729
   *                      </p>
6730
   * @param bool   $cut   [optional] <p>
6731
   *                      If the cut is set to true, the string is
6732
   *                      always wrapped at or before the specified width. So if you have
6733
   *                      a word that is larger than the given width, it is broken apart.
6734
   *                      (See second example).
6735
   *                      </p>
6736
   *
6737
   * @return string the given string wrapped at the specified column.
6738
   * @since 4.0.2
6739
   * @since 5.0
6740
   */
6741
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6742
  {
6743
    $str = (string)$str;
6744
    $break = (string)$break;
6745
6746
    if (!isset($str[0], $break[0])) {
6747
      return '';
6748
    }
6749
6750
    $w = '';
6751
    $strSplit = explode($break, $str);
6752
    $count = count($strSplit);
6753
6754
    if (1 === $count && '' === $strSplit[0]) {
6755
      return '';
6756
    }
6757
6758
    $chars = array();
6759
    /** @noinspection ForeachInvariantsInspection */
6760
    for ($i = 0; $i < $count; ++$i) {
6761
6762
      if ($i) {
6763
        $chars[] = $break;
6764
        $w .= '#';
6765
      }
6766
6767
      $c = $strSplit[$i];
6768
      unset($strSplit[$i]);
6769
6770
      foreach (self::split($c) as $c) {
6771
        $chars[] = $c;
6772
        $w .= ' ' === $c ? ' ' : '?';
6773
      }
6774
    }
6775
6776
    $strReturn = '';
6777
    $j = 0;
6778
    $b = $i = -1;
6779
    $w = wordwrap($w, $width, '#', $cut);
6780
6781
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6782
      for (++$i; $i < $b; ++$i) {
6783
        $strReturn .= $chars[$j];
6784
        unset($chars[$j++]);
6785
      }
6786
6787
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6788
        unset($chars[$j++]);
6789
      }
6790
6791
      $strReturn .= $break;
6792
    }
6793
6794
    return $strReturn . implode('', $chars);
6795
  }
6796
6797
  /**
6798
   * Returns an array of Unicode White Space characters.
6799
   *
6800
   * @return   array An array with numeric code point as key and White Space Character as value.
6801
   */
6802
  public static function ws()
6803
  {
6804
    return self::$whitespace;
6805
  }
6806
6807
}
6808