Completed
Push — master ( af913d...9e7eff )
by Lars
03:37
created

UTF8::str_transliterate()   F

Complexity

Conditions 21
Paths 499

Size

Total Lines 92
Code Lines 50

Duplication

Lines 6
Ratio 6.52 %

Code Coverage

Tests 22
CRAP Score 43.5924

Importance

Changes 5
Bugs 1 Features 0
Metric Value
c 5
b 1
f 0
dl 6
loc 92
ccs 22
cts 35
cp 0.6286
rs 3.0197
cc 21
eloc 50
nc 499
nop 2
crap 43.5924

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Numeric code point => UTF-8 Character
83
   *
84
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
85
   *
86
   * @var array
87
   */
88
  protected static $whitespace = array(
89
    // NUL Byte
90
    0     => "\x0",
91
    // Tab
92
    9     => "\x9",
93
    // New Line
94
    10    => "\xa",
95
    // Vertical Tab
96
    11    => "\xb",
97
    // Carriage Return
98
    13    => "\xd",
99
    // Ordinary Space
100
    32    => "\x20",
101
    // NO-BREAK SPACE
102
    160   => "\xc2\xa0",
103
    // OGHAM SPACE MARK
104
    5760  => "\xe1\x9a\x80",
105
    // MONGOLIAN VOWEL SEPARATOR
106
    6158  => "\xe1\xa0\x8e",
107
    // EN QUAD
108
    8192  => "\xe2\x80\x80",
109
    // EM QUAD
110
    8193  => "\xe2\x80\x81",
111
    // EN SPACE
112
    8194  => "\xe2\x80\x82",
113
    // EM SPACE
114
    8195  => "\xe2\x80\x83",
115
    // THREE-PER-EM SPACE
116
    8196  => "\xe2\x80\x84",
117
    // FOUR-PER-EM SPACE
118
    8197  => "\xe2\x80\x85",
119
    // SIX-PER-EM SPACE
120
    8198  => "\xe2\x80\x86",
121
    // FIGURE SPACE
122
    8199  => "\xe2\x80\x87",
123
    // PUNCTUATION SPACE
124
    8200  => "\xe2\x80\x88",
125
    // THIN SPACE
126
    8201  => "\xe2\x80\x89",
127
    //HAIR SPACE
128
    8202  => "\xe2\x80\x8a",
129
    // LINE SEPARATOR
130
    8232  => "\xe2\x80\xa8",
131
    // PARAGRAPH SEPARATOR
132
    8233  => "\xe2\x80\xa9",
133
    // NARROW NO-BREAK SPACE
134
    8239  => "\xe2\x80\xaf",
135
    // MEDIUM MATHEMATICAL SPACE
136
    8287  => "\xe2\x81\x9f",
137
    // IDEOGRAPHIC SPACE
138
    12288 => "\xe3\x80\x80",
139
  );
140
141
  /**
142
   * @var array
143
   */
144
  protected static $whitespaceTable = array(
145
      'SPACE'                     => "\x20",
146
      'NO-BREAK SPACE'            => "\xc2\xa0",
147
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
148
      'EN QUAD'                   => "\xe2\x80\x80",
149
      'EM QUAD'                   => "\xe2\x80\x81",
150
      'EN SPACE'                  => "\xe2\x80\x82",
151
      'EM SPACE'                  => "\xe2\x80\x83",
152
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
153
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
154
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
155
      'FIGURE SPACE'              => "\xe2\x80\x87",
156
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
157
      'THIN SPACE'                => "\xe2\x80\x89",
158
      'HAIR SPACE'                => "\xe2\x80\x8a",
159
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
160
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
161
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
162
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
163
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
164
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
165
  );
166
167
  /**
168
   * bidirectional text chars
169
   *
170
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
171
   *
172
   * @var array
173
   */
174
  protected static $bidiUniCodeControlsTable = array(
175
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
176
    8234 => "\xE2\x80\xAA",
177
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
178
    8235 => "\xE2\x80\xAB",
179
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
180
    8236 => "\xE2\x80\xAC",
181
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
182
    8237 => "\xE2\x80\xAD",
183
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
184
    8238 => "\xE2\x80\xAE",
185
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
186
    8294 => "\xE2\x81\xA6",
187
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
188
    8295 => "\xE2\x81\xA7",
189
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
190
    8296 => "\xE2\x81\xA8",
191
    // POP DIRECTIONAL ISOLATE
192
    8297 => "\xE2\x81\xA9",
193
  );
194
195
  /**
196
   * @var array
197
   */
198
  protected static $commonCaseFold = array(
199
      'ſ'            => 's',
200
      "\xCD\x85"     => 'ι',
201
      'ς'            => 'σ',
202
      "\xCF\x90"     => 'β',
203
      "\xCF\x91"     => 'θ',
204
      "\xCF\x95"     => 'φ',
205
      "\xCF\x96"     => 'π',
206
      "\xCF\xB0"     => 'κ',
207
      "\xCF\xB1"     => 'ρ',
208
      "\xCF\xB5"     => 'ε',
209
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
210
      "\xE1\xBE\xBE" => 'ι',
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $brokenUtf8ToUtf8 = array(
217
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
218
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
219
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
220
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
221
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
222
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
223
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
224
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
225
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
226
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
227
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
228
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
229
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
230
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
231
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
232
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
233
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
234
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
235
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
236
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
237
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
238
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
239
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
240
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
241
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
242
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
243
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
244
      'ü'       => 'ü',
245
      'ä'       => 'ä',
246
      'ö'       => 'ö',
247
      'Ö'       => 'Ö',
248
      'ß'       => 'ß',
249
      'Ã '       => 'à',
250
      'á'       => 'á',
251
      'â'       => 'â',
252
      'ã'       => 'ã',
253
      'ù'       => 'ù',
254
      'ú'       => 'ú',
255
      'û'       => 'û',
256
      'Ù'       => 'Ù',
257
      'Ú'       => 'Ú',
258
      'Û'       => 'Û',
259
      'Ü'       => 'Ü',
260
      'ò'       => 'ò',
261
      'ó'       => 'ó',
262
      'ô'       => 'ô',
263
      'è'       => 'è',
264
      'é'       => 'é',
265
      'ê'       => 'ê',
266
      'ë'       => 'ë',
267
      'À'       => 'À',
268
      'Á'       => 'Á',
269
      'Â'       => 'Â',
270
      'Ã'       => 'Ã',
271
      'Ä'       => 'Ä',
272
      'Ã…'       => 'Å',
273
      'Ç'       => 'Ç',
274
      'È'       => 'È',
275
      'É'       => 'É',
276
      'Ê'       => 'Ê',
277
      'Ë'       => 'Ë',
278
      'ÃŒ'       => 'Ì',
279
      'Í'       => 'Í',
280
      'ÃŽ'       => 'Î',
281
      'Ï'       => 'Ï',
282
      'Ñ'       => 'Ñ',
283
      'Ã’'       => 'Ò',
284
      'Ó'       => 'Ó',
285
      'Ô'       => 'Ô',
286
      'Õ'       => 'Õ',
287
      'Ø'       => 'Ø',
288
      'Ã¥'       => 'å',
289
      'æ'       => 'æ',
290
      'ç'       => 'ç',
291
      'ì'       => 'ì',
292
      'í'       => 'í',
293
      'î'       => 'î',
294
      'ï'       => 'ï',
295
      'ð'       => 'ð',
296
      'ñ'       => 'ñ',
297
      'õ'       => 'õ',
298
      'ø'       => 'ø',
299
      'ý'       => 'ý',
300
      'ÿ'       => 'ÿ',
301
      '€'      => '€',
302
  );
303
304
  /**
305
   * @var array
306
   */
307
  protected static $utf8ToWin1252 = array(
308
      "\xe2\x82\xac" => "\x80", // EURO SIGN
309
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
310
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
311
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
312
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
313
      "\xe2\x80\xa0" => "\x86", // DAGGER
314
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
315
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
316
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
317
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
318
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
319
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
320
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
321
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
322
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
323
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
324
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
325
      "\xe2\x80\xa2" => "\x95", // BULLET
326
      "\xe2\x80\x93" => "\x96", // EN DASH
327
      "\xe2\x80\x94" => "\x97", // EM DASH
328
      "\xcb\x9c"     => "\x98", // SMALL TILDE
329
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
330
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
331
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
332
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
333
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
334
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
335
  );
336
337
  /**
338
   * @var array
339
   */
340
  protected static $utf8MSWord = array(
341
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
342
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
343
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
344
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
345
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
346
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
347
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
348
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
349
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
350
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
351
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
352
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
353
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
354
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
355
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
356
  );
357
358
  protected static $iconvEncoding = array(
359
      'ANSI_X3.4-1968',
360
      'ANSI_X3.4-1986',
361
      'ASCII',
362
      'CP367',
363
      'IBM367',
364
      'ISO-IR-6',
365
      'ISO646-US',
366
      'ISO_646.IRV:1991',
367
      'US',
368
      'US-ASCII',
369
      'CSASCII',
370
      'UTF-8',
371
      'ISO-10646-UCS-2',
372
      'UCS-2',
373
      'CSUNICODE',
374
      'UCS-2BE',
375
      'UNICODE-1-1',
376
      'UNICODEBIG',
377
      'CSUNICODE11',
378
      'UCS-2LE',
379
      'UNICODELITTLE',
380
      'ISO-10646-UCS-4',
381
      'UCS-4',
382
      'CSUCS4',
383
      'UCS-4BE',
384
      'UCS-4LE',
385
      'UTF-16',
386
      'UTF-16BE',
387
      'UTF-16LE',
388
      'UTF-32',
389
      'UTF-32BE',
390
      'UTF-32LE',
391
      'UNICODE-1-1-UTF-7',
392
      'UTF-7',
393
      'CSUNICODE11UTF7',
394
      'UCS-2-INTERNAL',
395
      'UCS-2-SWAPPED',
396
      'UCS-4-INTERNAL',
397
      'UCS-4-SWAPPED',
398
      'C99',
399
      'JAVA',
400
      'CP819',
401
      'IBM819',
402
      'ISO-8859-1',
403
      'ISO-IR-100',
404
      'ISO8859-1',
405
      'ISO_8859-1',
406
      'ISO_8859-1:1987',
407
      'L1',
408
      'LATIN1',
409
      'CSISOLATIN1',
410
      'ISO-8859-2',
411
      'ISO-IR-101',
412
      'ISO8859-2',
413
      'ISO_8859-2',
414
      'ISO_8859-2:1987',
415
      'L2',
416
      'LATIN2',
417
      'CSISOLATIN2',
418
      'ISO-8859-3',
419
      'ISO-IR-109',
420
      'ISO8859-3',
421
      'ISO_8859-3',
422
      'ISO_8859-3:1988',
423
      'L3',
424
      'LATIN3',
425
      'CSISOLATIN3',
426
      'ISO-8859-4',
427
      'ISO-IR-110',
428
      'ISO8859-4',
429
      'ISO_8859-4',
430
      'ISO_8859-4:1988',
431
      'L4',
432
      'LATIN4',
433
      'CSISOLATIN4',
434
      'CYRILLIC',
435
      'ISO-8859-5',
436
      'ISO-IR-144',
437
      'ISO8859-5',
438
      'ISO_8859-5',
439
      'ISO_8859-5:1988',
440
      'CSISOLATINCYRILLIC',
441
      'ARABIC',
442
      'ASMO-708',
443
      'ECMA-114',
444
      'ISO-8859-6',
445
      'ISO-IR-127',
446
      'ISO8859-6',
447
      'ISO_8859-6',
448
      'ISO_8859-6:1987',
449
      'CSISOLATINARABIC',
450
      'ECMA-118',
451
      'ELOT_928',
452
      'GREEK',
453
      'GREEK8',
454
      'ISO-8859-7',
455
      'ISO-IR-126',
456
      'ISO8859-7',
457
      'ISO_8859-7',
458
      'ISO_8859-7:1987',
459
      'ISO_8859-7:2003',
460
      'CSISOLATINGREEK',
461
      'HEBREW',
462
      'ISO-8859-8',
463
      'ISO-IR-138',
464
      'ISO8859-8',
465
      'ISO_8859-8',
466
      'ISO_8859-8:1988',
467
      'CSISOLATINHEBREW',
468
      'ISO-8859-9',
469
      'ISO-IR-148',
470
      'ISO8859-9',
471
      'ISO_8859-9',
472
      'ISO_8859-9:1989',
473
      'L5',
474
      'LATIN5',
475
      'CSISOLATIN5',
476
      'ISO-8859-10',
477
      'ISO-IR-157',
478
      'ISO8859-10',
479
      'ISO_8859-10',
480
      'ISO_8859-10:1992',
481
      'L6',
482
      'LATIN6',
483
      'CSISOLATIN6',
484
      'ISO-8859-11',
485
      'ISO8859-11',
486
      'ISO_8859-11',
487
      'ISO-8859-13',
488
      'ISO-IR-179',
489
      'ISO8859-13',
490
      'ISO_8859-13',
491
      'L7',
492
      'LATIN7',
493
      'ISO-8859-14',
494
      'ISO-CELTIC',
495
      'ISO-IR-199',
496
      'ISO8859-14',
497
      'ISO_8859-14',
498
      'ISO_8859-14:1998',
499
      'L8',
500
      'LATIN8',
501
      'ISO-8859-15',
502
      'ISO-IR-203',
503
      'ISO8859-15',
504
      'ISO_8859-15',
505
      'ISO_8859-15:1998',
506
      'LATIN-9',
507
      'ISO-8859-16',
508
      'ISO-IR-226',
509
      'ISO8859-16',
510
      'ISO_8859-16',
511
      'ISO_8859-16:2001',
512
      'L10',
513
      'LATIN10',
514
      'KOI8-R',
515
      'CSKOI8R',
516
      'KOI8-U',
517
      'KOI8-RU',
518
      'CP1250',
519
      'MS-EE',
520
      'WINDOWS-1250',
521
      'CP1251',
522
      'MS-CYRL',
523
      'WINDOWS-1251',
524
      'CP1252',
525
      'MS-ANSI',
526
      'WINDOWS-1252',
527
      'CP1253',
528
      'MS-GREEK',
529
      'WINDOWS-1253',
530
      'CP1254',
531
      'MS-TURK',
532
      'WINDOWS-1254',
533
      'CP1255',
534
      'MS-HEBR',
535
      'WINDOWS-1255',
536
      'CP1256',
537
      'MS-ARAB',
538
      'WINDOWS-1256',
539
      'CP1257',
540
      'WINBALTRIM',
541
      'WINDOWS-1257',
542
      'CP1258',
543
      'WINDOWS-1258',
544
      '850',
545
      'CP850',
546
      'IBM850',
547
      'CSPC850MULTILINGUAL',
548
      '862',
549
      'CP862',
550
      'IBM862',
551
      'CSPC862LATINHEBREW',
552
      '866',
553
      'CP866',
554
      'IBM866',
555
      'CSIBM866',
556
      'MAC',
557
      'MACINTOSH',
558
      'MACROMAN',
559
      'CSMACINTOSH',
560
      'MACCENTRALEUROPE',
561
      'MACICELAND',
562
      'MACCROATIAN',
563
      'MACROMANIA',
564
      'MACCYRILLIC',
565
      'MACUKRAINE',
566
      'MACGREEK',
567
      'MACTURKISH',
568
      'MACHEBREW',
569
      'MACARABIC',
570
      'MACTHAI',
571
      'HP-ROMAN8',
572
      'R8',
573
      'ROMAN8',
574
      'CSHPROMAN8',
575
      'NEXTSTEP',
576
      'ARMSCII-8',
577
      'GEORGIAN-ACADEMY',
578
      'GEORGIAN-PS',
579
      'KOI8-T',
580
      'CP154',
581
      'CYRILLIC-ASIAN',
582
      'PT154',
583
      'PTCP154',
584
      'CSPTCP154',
585
      'KZ-1048',
586
      'RK1048',
587
      'STRK1048-2002',
588
      'CSKZ1048',
589
      'MULELAO-1',
590
      'CP1133',
591
      'IBM-CP1133',
592
      'ISO-IR-166',
593
      'TIS-620',
594
      'TIS620',
595
      'TIS620-0',
596
      'TIS620.2529-1',
597
      'TIS620.2533-0',
598
      'TIS620.2533-1',
599
      'CP874',
600
      'WINDOWS-874',
601
      'VISCII',
602
      'VISCII1.1-1',
603
      'CSVISCII',
604
      'TCVN',
605
      'TCVN-5712',
606
      'TCVN5712-1',
607
      'TCVN5712-1:1993',
608
      'ISO-IR-14',
609
      'ISO646-JP',
610
      'JIS_C6220-1969-RO',
611
      'JP',
612
      'CSISO14JISC6220RO',
613
      'JISX0201-1976',
614
      'JIS_X0201',
615
      'X0201',
616
      'CSHALFWIDTHKATAKANA',
617
      'ISO-IR-87',
618
      'JIS0208',
619
      'JIS_C6226-1983',
620
      'JIS_X0208',
621
      'JIS_X0208-1983',
622
      'JIS_X0208-1990',
623
      'X0208',
624
      'CSISO87JISX0208',
625
      'ISO-IR-159',
626
      'JIS_X0212',
627
      'JIS_X0212-1990',
628
      'JIS_X0212.1990-0',
629
      'X0212',
630
      'CSISO159JISX02121990',
631
      'CN',
632
      'GB_1988-80',
633
      'ISO-IR-57',
634
      'ISO646-CN',
635
      'CSISO57GB1988',
636
      'CHINESE',
637
      'GB_2312-80',
638
      'ISO-IR-58',
639
      'CSISO58GB231280',
640
      'CN-GB-ISOIR165',
641
      'ISO-IR-165',
642
      'ISO-IR-149',
643
      'KOREAN',
644
      'KSC_5601',
645
      'KS_C_5601-1987',
646
      'KS_C_5601-1989',
647
      'CSKSC56011987',
648
      'EUC-JP',
649
      'EUCJP',
650
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
651
      'CSEUCPKDFMTJAPANESE',
652
      'MS_KANJI',
653
      'SHIFT-JIS',
654
      'SHIFT_JIS',
655
      'SJIS',
656
      'CSSHIFTJIS',
657
      'CP932',
658
      'ISO-2022-JP',
659
      'CSISO2022JP',
660
      'ISO-2022-JP-1',
661
      'ISO-2022-JP-2',
662
      'CSISO2022JP2',
663
      'CN-GB',
664
      'EUC-CN',
665
      'EUCCN',
666
      'GB2312',
667
      'CSGB2312',
668
      'GBK',
669
      'CP936',
670
      'MS936',
671
      'WINDOWS-936',
672
      'GB18030',
673
      'ISO-2022-CN',
674
      'CSISO2022CN',
675
      'ISO-2022-CN-EXT',
676
      'HZ',
677
      'HZ-GB-2312',
678
      'EUC-TW',
679
      'EUCTW',
680
      'CSEUCTW',
681
      'BIG-5',
682
      'BIG-FIVE',
683
      'BIG5',
684
      'BIGFIVE',
685
      'CN-BIG5',
686
      'CSBIG5',
687
      'CP950',
688
      'BIG5-HKSCS:1999',
689
      'BIG5-HKSCS:2001',
690
      'BIG5-HKSCS',
691
      'BIG5-HKSCS:2004',
692
      'BIG5HKSCS',
693
      'EUC-KR',
694
      'EUCKR',
695
      'CSEUCKR',
696
      'CP949',
697
      'UHC',
698
      'CP1361',
699
      'JOHAB',
700
      'ISO-2022-KR',
701
      'CSISO2022KR',
702
      'CP856',
703
      'CP922',
704
      'CP943',
705
      'CP1046',
706
      'CP1124',
707
      'CP1129',
708
      'CP1161',
709
      'IBM-1161',
710
      'IBM1161',
711
      'CSIBM1161',
712
      'CP1162',
713
      'IBM-1162',
714
      'IBM1162',
715
      'CSIBM1162',
716
      'CP1163',
717
      'IBM-1163',
718
      'IBM1163',
719
      'CSIBM1163',
720
      'DEC-KANJI',
721
      'DEC-HANYU',
722
      '437',
723
      'CP437',
724
      'IBM437',
725
      'CSPC8CODEPAGE437',
726
      'CP737',
727
      'CP775',
728
      'IBM775',
729
      'CSPC775BALTIC',
730
      '852',
731
      'CP852',
732
      'IBM852',
733
      'CSPCP852',
734
      'CP853',
735
      '855',
736
      'CP855',
737
      'IBM855',
738
      'CSIBM855',
739
      '857',
740
      'CP857',
741
      'IBM857',
742
      'CSIBM857',
743
      'CP858',
744
      '860',
745
      'CP860',
746
      'IBM860',
747
      'CSIBM860',
748
      '861',
749
      'CP-IS',
750
      'CP861',
751
      'IBM861',
752
      'CSIBM861',
753
      '863',
754
      'CP863',
755
      'IBM863',
756
      'CSIBM863',
757
      'CP864',
758
      'IBM864',
759
      'CSIBM864',
760
      '865',
761
      'CP865',
762
      'IBM865',
763
      'CSIBM865',
764
      '869',
765
      'CP-GR',
766
      'CP869',
767
      'IBM869',
768
      'CSIBM869',
769
      'CP1125',
770
      'EUC-JISX0213',
771
      'SHIFT_JISX0213',
772
      'ISO-2022-JP-3',
773
      'BIG5-2003',
774
      'ISO-IR-230',
775
      'TDS565',
776
      'ATARI',
777
      'ATARIST',
778
      'RISCOS-LATIN1',
779
  );
780
781
  /**
782
   * @var array
783
   */
784
  private static $support = array();
785
786
  /**
787
   * __construct()
788
   */
789
  public function __construct()
790 1
  {
791
    self::checkForSupport();
792 1
  }
793 1
794
  /**
795
   * Returns a single UTF-8 character from string.
796
   *
797
   * @param    string $str A UTF-8 string.
798
   * @param    int    $pos The position of character to return.
799
   *
800
   * @return   string Single Multi-Byte character.
801
   */
802
  public static function access($str, $pos)
803 1
  {
804
    // Return the character at the specified position: $str[1] like functionality.
805
806
    return self::substr($str, $pos, 1);
807 1
  }
808
809
  /**
810
   * Prepends BOM character to the string and returns the whole string.
811
   *
812
   * INFO: If BOM already existed there, the Input string is returned.
813
   *
814
   * @param    string $str The input string
815
   *
816
   * @return   string The output string that contains BOM
817
   */
818
  public static function add_bom_to_string($str)
819
  {
820
    if (!self::is_bom(substr($str, 0, 3))) {
821
      $str = self::bom() . $str;
822
    }
823
824
    return $str;
825
  }
826
827
  /**
828
   * Returns the Byte Order Mark Character.
829
   *
830
   * @return   string Byte Order Mark
831
   */
832
  public static function bom()
833 2
  {
834
    return "\xEF\xBB\xBF";
835 2
  }
836
837
  /**
838
   * @alias of UTF8::chr_map()
839
   *
840
   * @param $callback
841
   * @param $str
842
   *
843
   * @return array
844
   */
845
  public static function callback($callback, $str)
846 1
  {
847
    return self::chr_map($callback, $str);
848 1
  }
849
850
  /**
851
   * Returns an array of all lower and upper case UTF-8 encoded characters.
852
   *
853
   * @return   string An array with lower case chars as keys and upper chars as values.
854
   */
855
  protected static function case_table()
856
  {
857
    static $case = array(
858
859
      // lower => upper
860
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
861
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
862
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
863
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
864
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
865
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
866
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
867
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
868
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
869
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
870
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
871
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
872
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
873
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
874
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
875
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
876
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
877
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
878
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
879
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
880
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
881
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
882
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
883
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
884
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
885
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
886
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
887
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
888
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
889
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
890
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
891
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
892
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
893
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
894
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
895
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
896
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
897
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
898
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
899
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
900
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
901
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
902
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
903
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
904
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
905
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
906
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
907
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
908
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
909
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
910
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
911
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
912
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
913
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
914
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
915
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
916
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
917
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
918
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
919
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
920
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
921
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
922
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
923
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
924
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
925
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
926
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
927
      "\xea\x9e\x87"     => "\xea\x9e\x86",
928
      "\xea\x9e\x85"     => "\xea\x9e\x84",
929
      "\xea\x9e\x83"     => "\xea\x9e\x82",
930
      "\xea\x9e\x81"     => "\xea\x9e\x80",
931
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
932
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
933
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
934
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
935
      "\xea\x9d\xad"     => "\xea\x9d\xac",
936
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
937
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
938
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
939
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
940
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
941
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
942
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
943
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
944
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
945
      "\xea\x9d\x99"     => "\xea\x9d\x98",
946
      "\xea\x9d\x97"     => "\xea\x9d\x96",
947
      "\xea\x9d\x95"     => "\xea\x9d\x94",
948
      "\xea\x9d\x93"     => "\xea\x9d\x92",
949
      "\xea\x9d\x91"     => "\xea\x9d\x90",
950
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
951
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
952
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
953
      "\xea\x9d\x89"     => "\xea\x9d\x88",
954
      "\xea\x9d\x87"     => "\xea\x9d\x86",
955
      "\xea\x9d\x85"     => "\xea\x9d\x84",
956
      "\xea\x9d\x83"     => "\xea\x9d\x82",
957
      "\xea\x9d\x81"     => "\xea\x9d\x80",
958
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
959
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
960
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
961
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
962
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
963
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
964
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
965
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
966
      "\xea\x9c\xad"     => "\xea\x9c\xac",
967
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
968
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
969
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
970
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
971
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
972
      "\xea\x9a\x97"     => "\xea\x9a\x96",
973
      "\xea\x9a\x95"     => "\xea\x9a\x94",
974
      "\xea\x9a\x93"     => "\xea\x9a\x92",
975
      "\xea\x9a\x91"     => "\xea\x9a\x90",
976
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
977
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
978
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
979
      "\xea\x9a\x89"     => "\xea\x9a\x88",
980
      "\xea\x9a\x87"     => "\xea\x9a\x86",
981
      "\xea\x9a\x85"     => "\xea\x9a\x84",
982
      "\xea\x9a\x83"     => "\xea\x9a\x82",
983
      "\xea\x9a\x81"     => "\xea\x9a\x80",
984
      "\xea\x99\xad"     => "\xea\x99\xac",
985
      "\xea\x99\xab"     => "\xea\x99\xaa",
986
      "\xea\x99\xa9"     => "\xea\x99\xa8",
987
      "\xea\x99\xa7"     => "\xea\x99\xa6",
988
      "\xea\x99\xa5"     => "\xea\x99\xa4",
989
      "\xea\x99\xa3"     => "\xea\x99\xa2",
990
      "\xea\x99\x9f"     => "\xea\x99\x9e",
991
      "\xea\x99\x9d"     => "\xea\x99\x9c",
992
      "\xea\x99\x9b"     => "\xea\x99\x9a",
993
      "\xea\x99\x99"     => "\xea\x99\x98",
994
      "\xea\x99\x97"     => "\xea\x99\x96",
995
      "\xea\x99\x95"     => "\xea\x99\x94",
996
      "\xea\x99\x93"     => "\xea\x99\x92",
997
      "\xea\x99\x91"     => "\xea\x99\x90",
998
      "\xea\x99\x8f"     => "\xea\x99\x8e",
999
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1000
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1001
      "\xea\x99\x89"     => "\xea\x99\x88",
1002
      "\xea\x99\x87"     => "\xea\x99\x86",
1003
      "\xea\x99\x85"     => "\xea\x99\x84",
1004
      "\xea\x99\x83"     => "\xea\x99\x82",
1005
      "\xea\x99\x81"     => "\xea\x99\x80",
1006
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1007
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1008
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1009
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1010
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1011
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1012
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1013
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1014
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1015
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1016
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1017
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1018
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1019
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1020
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1021
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1022
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1023
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1024
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1025
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1026
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1027
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1028
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1029
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1030
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1031
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1032
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1033
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1034
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1035
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1036
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1037
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1038
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1039
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1040
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1041
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1042
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1043
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1044
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1045
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1046
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1047
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1048
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1049
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1050
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1051
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1052
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1053
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1054
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1055
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1056
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1057
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1058
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1059
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1060
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1061
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1062
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1063
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1064
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1065
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1066
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1067
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1068
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1069
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1070
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1071
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1072
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1073
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1074
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1075
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1076
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1077
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1078
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1079
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1080
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1081
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1082
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1083
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1084
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1085
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1086
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1087
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1088
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1089
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1090
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1091
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1092
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1093
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1094
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1095
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1096
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1097
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1098
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1099
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1100
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1101
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1102
      "\xe2\xb1\xa5"     => "\xc8\xba",
1103
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1104
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1105
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1106
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1107
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1108
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1109
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1110
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1111
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1112
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1113
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1114
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1115
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1116
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1117
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1118
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1119
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1120
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1121
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1122
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1123
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1124
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1125
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1126
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1127
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1128
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1129
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1130
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1131
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1132
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1133
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1134
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1135
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1136
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1137
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1138
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1139
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1140
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1141
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1142
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1143
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1144
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1145
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1146
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1147
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1148
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1149
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1150
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1151
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1152
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1153
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1154
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1155
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1156
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1157
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1158
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1159
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1160
      "\xe1\xbe\xbe"     => "\xce\x99",
1161
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1162
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1163
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1164
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1165
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1166
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1167
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1168
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1169
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1170
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1171
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1172
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1173
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1174
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1175
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1176
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1177
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1178
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1179
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1180
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1181
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1182
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1183
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1184
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1185
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1186
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1187
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1188
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1189
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1190
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1191
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1192
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1193
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1194
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1195
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1196
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1197
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1198
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1199
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1200
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1201
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1202
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1203
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1204
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1205
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1206
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1207
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1208
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1209
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1210
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1211
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1212
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1213
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1214
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1215
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1216
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1217
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1218
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1219
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1220
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1221
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1222
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1223
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1224
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1225
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1226
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1227
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1228
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1229
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1230
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1231
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1232
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1233
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1234
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1235
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1236
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1237
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1238
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1239
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1240
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1241
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1242
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1243
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1244
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1245
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1246
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1247
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1248
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1249
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1250
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1251
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1252
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1253
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1254
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1255
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1256
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1257
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1258
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1259
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1260
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1261
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1262
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1263
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1264
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1265
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1266
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1267
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1268
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1269
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1270
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1271
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1272
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1273
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1274
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1275
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1276
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1277
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1278
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1279
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1280
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1281
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1282
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1283
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1284
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1285
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1286
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1287
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1288
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1289
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1290
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1291
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1292
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1293
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1294
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1295
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1296
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1297
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1298
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1299
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1300
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1301
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1302
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1303
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1304
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1305
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1306
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1307
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1308
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1309
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1310
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1311
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1312
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1313
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1314
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1315
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1316
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1317
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1318
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1319
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1320
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1321
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1322
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1323
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1324
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1325
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1326
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1327
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1328
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1329
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1330
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1331
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1332
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1333
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1334
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1335
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1336
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1337
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1338
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1339
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1340
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1341
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1342
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1343
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1344
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1345
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1346
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1347
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1348
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1349
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1350
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1351
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1352
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1353
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1354
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1355
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1356
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1357
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1358
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1359
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1360
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1361
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1362
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1363
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1364
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1365
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1366
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1367
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1368
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1369
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1370
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1371
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1372
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1373
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1374
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1375
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1376
      "\xd6\x86"         => "\xd5\x96",
1377
      "\xd6\x85"         => "\xd5\x95",
1378
      "\xd6\x84"         => "\xd5\x94",
1379
      "\xd6\x83"         => "\xd5\x93",
1380
      "\xd6\x82"         => "\xd5\x92",
1381
      "\xd6\x81"         => "\xd5\x91",
1382
      "\xd6\x80"         => "\xd5\x90",
1383
      "\xd5\xbf"         => "\xd5\x8f",
1384
      "\xd5\xbe"         => "\xd5\x8e",
1385
      "\xd5\xbd"         => "\xd5\x8d",
1386
      "\xd5\xbc"         => "\xd5\x8c",
1387
      "\xd5\xbb"         => "\xd5\x8b",
1388
      "\xd5\xba"         => "\xd5\x8a",
1389
      "\xd5\xb9"         => "\xd5\x89",
1390
      "\xd5\xb8"         => "\xd5\x88",
1391
      "\xd5\xb7"         => "\xd5\x87",
1392
      "\xd5\xb6"         => "\xd5\x86",
1393
      "\xd5\xb5"         => "\xd5\x85",
1394
      "\xd5\xb4"         => "\xd5\x84",
1395
      "\xd5\xb3"         => "\xd5\x83",
1396
      "\xd5\xb2"         => "\xd5\x82",
1397
      "\xd5\xb1"         => "\xd5\x81",
1398
      "\xd5\xb0"         => "\xd5\x80",
1399
      "\xd5\xaf"         => "\xd4\xbf",
1400
      "\xd5\xae"         => "\xd4\xbe",
1401
      "\xd5\xad"         => "\xd4\xbd",
1402
      "\xd5\xac"         => "\xd4\xbc",
1403
      "\xd5\xab"         => "\xd4\xbb",
1404
      "\xd5\xaa"         => "\xd4\xba",
1405
      "\xd5\xa9"         => "\xd4\xb9",
1406
      "\xd5\xa8"         => "\xd4\xb8",
1407
      "\xd5\xa7"         => "\xd4\xb7",
1408
      "\xd5\xa6"         => "\xd4\xb6",
1409
      "\xd5\xa5"         => "\xd4\xb5",
1410
      "\xd5\xa4"         => "\xd4\xb4",
1411
      "\xd5\xa3"         => "\xd4\xb3",
1412
      "\xd5\xa2"         => "\xd4\xb2",
1413
      "\xd5\xa1"         => "\xd4\xb1",
1414
      "\xd4\xa5"         => "\xd4\xa4",
1415
      "\xd4\xa3"         => "\xd4\xa2",
1416
      "\xd4\xa1"         => "\xd4\xa0",
1417
      "\xd4\x9f"         => "\xd4\x9e",
1418
      "\xd4\x9d"         => "\xd4\x9c",
1419
      "\xd4\x9b"         => "\xd4\x9a",
1420
      "\xd4\x99"         => "\xd4\x98",
1421
      "\xd4\x97"         => "\xd4\x96",
1422
      "\xd4\x95"         => "\xd4\x94",
1423
      "\xd4\x93"         => "\xd4\x92",
1424
      "\xd4\x91"         => "\xd4\x90",
1425
      "\xd4\x8f"         => "\xd4\x8e",
1426
      "\xd4\x8d"         => "\xd4\x8c",
1427
      "\xd4\x8b"         => "\xd4\x8a",
1428
      "\xd4\x89"         => "\xd4\x88",
1429
      "\xd4\x87"         => "\xd4\x86",
1430
      "\xd4\x85"         => "\xd4\x84",
1431
      "\xd4\x83"         => "\xd4\x82",
1432
      "\xd4\x81"         => "\xd4\x80",
1433
      "\xd3\xbf"         => "\xd3\xbe",
1434
      "\xd3\xbd"         => "\xd3\xbc",
1435
      "\xd3\xbb"         => "\xd3\xba",
1436
      "\xd3\xb9"         => "\xd3\xb8",
1437
      "\xd3\xb7"         => "\xd3\xb6",
1438
      "\xd3\xb5"         => "\xd3\xb4",
1439
      "\xd3\xb3"         => "\xd3\xb2",
1440
      "\xd3\xb1"         => "\xd3\xb0",
1441
      "\xd3\xaf"         => "\xd3\xae",
1442
      "\xd3\xad"         => "\xd3\xac",
1443
      "\xd3\xab"         => "\xd3\xaa",
1444
      "\xd3\xa9"         => "\xd3\xa8",
1445
      "\xd3\xa7"         => "\xd3\xa6",
1446
      "\xd3\xa5"         => "\xd3\xa4",
1447
      "\xd3\xa3"         => "\xd3\xa2",
1448
      "\xd3\xa1"         => "\xd3\xa0",
1449
      "\xd3\x9f"         => "\xd3\x9e",
1450
      "\xd3\x9d"         => "\xd3\x9c",
1451
      "\xd3\x9b"         => "\xd3\x9a",
1452
      "\xd3\x99"         => "\xd3\x98",
1453
      "\xd3\x97"         => "\xd3\x96",
1454
      "\xd3\x95"         => "\xd3\x94",
1455
      "\xd3\x93"         => "\xd3\x92",
1456
      "\xd3\x91"         => "\xd3\x90",
1457
      "\xd3\x8f"         => "\xd3\x80",
1458
      "\xd3\x8e"         => "\xd3\x8d",
1459
      "\xd3\x8c"         => "\xd3\x8b",
1460
      "\xd3\x8a"         => "\xd3\x89",
1461
      "\xd3\x88"         => "\xd3\x87",
1462
      "\xd3\x86"         => "\xd3\x85",
1463
      "\xd3\x84"         => "\xd3\x83",
1464
      "\xd3\x82"         => "\xd3\x81",
1465
      "\xd2\xbf"         => "\xd2\xbe",
1466
      "\xd2\xbd"         => "\xd2\xbc",
1467
      "\xd2\xbb"         => "\xd2\xba",
1468
      "\xd2\xb9"         => "\xd2\xb8",
1469
      "\xd2\xb7"         => "\xd2\xb6",
1470
      "\xd2\xb5"         => "\xd2\xb4",
1471
      "\xd2\xb3"         => "\xd2\xb2",
1472
      "\xd2\xb1"         => "\xd2\xb0",
1473
      "\xd2\xaf"         => "\xd2\xae",
1474
      "\xd2\xad"         => "\xd2\xac",
1475
      "\xd2\xab"         => "\xd2\xaa",
1476
      "\xd2\xa9"         => "\xd2\xa8",
1477
      "\xd2\xa7"         => "\xd2\xa6",
1478
      "\xd2\xa5"         => "\xd2\xa4",
1479
      "\xd2\xa3"         => "\xd2\xa2",
1480
      "\xd2\xa1"         => "\xd2\xa0",
1481
      "\xd2\x9f"         => "\xd2\x9e",
1482
      "\xd2\x9d"         => "\xd2\x9c",
1483
      "\xd2\x9b"         => "\xd2\x9a",
1484
      "\xd2\x99"         => "\xd2\x98",
1485
      "\xd2\x97"         => "\xd2\x96",
1486
      "\xd2\x95"         => "\xd2\x94",
1487
      "\xd2\x93"         => "\xd2\x92",
1488
      "\xd2\x91"         => "\xd2\x90",
1489
      "\xd2\x8f"         => "\xd2\x8e",
1490
      "\xd2\x8d"         => "\xd2\x8c",
1491
      "\xd2\x8b"         => "\xd2\x8a",
1492
      "\xd2\x81"         => "\xd2\x80",
1493
      "\xd1\xbf"         => "\xd1\xbe",
1494
      "\xd1\xbd"         => "\xd1\xbc",
1495
      "\xd1\xbb"         => "\xd1\xba",
1496
      "\xd1\xb9"         => "\xd1\xb8",
1497
      "\xd1\xb7"         => "\xd1\xb6",
1498
      "\xd1\xb5"         => "\xd1\xb4",
1499
      "\xd1\xb3"         => "\xd1\xb2",
1500
      "\xd1\xb1"         => "\xd1\xb0",
1501
      "\xd1\xaf"         => "\xd1\xae",
1502
      "\xd1\xad"         => "\xd1\xac",
1503
      "\xd1\xab"         => "\xd1\xaa",
1504
      "\xd1\xa9"         => "\xd1\xa8",
1505
      "\xd1\xa7"         => "\xd1\xa6",
1506
      "\xd1\xa5"         => "\xd1\xa4",
1507
      "\xd1\xa3"         => "\xd1\xa2",
1508
      "\xd1\xa1"         => "\xd1\xa0",
1509
      "\xd1\x9f"         => "\xd0\x8f",
1510
      "\xd1\x9e"         => "\xd0\x8e",
1511
      "\xd1\x9d"         => "\xd0\x8d",
1512
      "\xd1\x9c"         => "\xd0\x8c",
1513
      "\xd1\x9b"         => "\xd0\x8b",
1514
      "\xd1\x9a"         => "\xd0\x8a",
1515
      "\xd1\x99"         => "\xd0\x89",
1516
      "\xd1\x98"         => "\xd0\x88",
1517
      "\xd1\x97"         => "\xd0\x87",
1518
      "\xd1\x96"         => "\xd0\x86",
1519
      "\xd1\x95"         => "\xd0\x85",
1520
      "\xd1\x94"         => "\xd0\x84",
1521
      "\xd1\x93"         => "\xd0\x83",
1522
      "\xd1\x92"         => "\xd0\x82",
1523
      "\xd1\x91"         => "\xd0\x81",
1524
      "\xd1\x90"         => "\xd0\x80",
1525
      "\xd1\x8f"         => "\xd0\xaf",
1526
      "\xd1\x8e"         => "\xd0\xae",
1527
      "\xd1\x8d"         => "\xd0\xad",
1528
      "\xd1\x8c"         => "\xd0\xac",
1529
      "\xd1\x8b"         => "\xd0\xab",
1530
      "\xd1\x8a"         => "\xd0\xaa",
1531
      "\xd1\x89"         => "\xd0\xa9",
1532
      "\xd1\x88"         => "\xd0\xa8",
1533
      "\xd1\x87"         => "\xd0\xa7",
1534
      "\xd1\x86"         => "\xd0\xa6",
1535
      "\xd1\x85"         => "\xd0\xa5",
1536
      "\xd1\x84"         => "\xd0\xa4",
1537
      "\xd1\x83"         => "\xd0\xa3",
1538
      "\xd1\x82"         => "\xd0\xa2",
1539
      "\xd1\x81"         => "\xd0\xa1",
1540
      "\xd1\x80"         => "\xd0\xa0",
1541
      "\xd0\xbf"         => "\xd0\x9f",
1542
      "\xd0\xbe"         => "\xd0\x9e",
1543
      "\xd0\xbd"         => "\xd0\x9d",
1544
      "\xd0\xbc"         => "\xd0\x9c",
1545
      "\xd0\xbb"         => "\xd0\x9b",
1546
      "\xd0\xba"         => "\xd0\x9a",
1547
      "\xd0\xb9"         => "\xd0\x99",
1548
      "\xd0\xb8"         => "\xd0\x98",
1549
      "\xd0\xb7"         => "\xd0\x97",
1550
      "\xd0\xb6"         => "\xd0\x96",
1551
      "\xd0\xb5"         => "\xd0\x95",
1552
      "\xd0\xb4"         => "\xd0\x94",
1553
      "\xd0\xb3"         => "\xd0\x93",
1554
      "\xd0\xb2"         => "\xd0\x92",
1555
      "\xd0\xb1"         => "\xd0\x91",
1556
      "\xd0\xb0"         => "\xd0\x90",
1557
      "\xcf\xbb"         => "\xcf\xba",
1558
      "\xcf\xb8"         => "\xcf\xb7",
1559
      "\xcf\xb5"         => "\xce\x95",
1560
      "\xcf\xb2"         => "\xcf\xb9",
1561
      "\xcf\xb1"         => "\xce\xa1",
1562
      "\xcf\xb0"         => "\xce\x9a",
1563
      "\xcf\xaf"         => "\xcf\xae",
1564
      "\xcf\xad"         => "\xcf\xac",
1565
      "\xcf\xab"         => "\xcf\xaa",
1566
      "\xcf\xa9"         => "\xcf\xa8",
1567
      "\xcf\xa7"         => "\xcf\xa6",
1568
      "\xcf\xa5"         => "\xcf\xa4",
1569
      "\xcf\xa3"         => "\xcf\xa2",
1570
      "\xcf\xa1"         => "\xcf\xa0",
1571
      "\xcf\x9f"         => "\xcf\x9e",
1572
      "\xcf\x9d"         => "\xcf\x9c",
1573
      "\xcf\x9b"         => "\xcf\x9a",
1574
      "\xcf\x99"         => "\xcf\x98",
1575
      "\xcf\x97"         => "\xcf\x8f",
1576
      "\xcf\x96"         => "\xce\xa0",
1577
      "\xcf\x95"         => "\xce\xa6",
1578
      "\xcf\x91"         => "\xce\x98",
1579
      "\xcf\x90"         => "\xce\x92",
1580
      "\xcf\x8e"         => "\xce\x8f",
1581
      "\xcf\x8d"         => "\xce\x8e",
1582
      "\xcf\x8c"         => "\xce\x8c",
1583
      "\xcf\x8b"         => "\xce\xab",
1584
      "\xcf\x8a"         => "\xce\xaa",
1585
      "\xcf\x89"         => "\xce\xa9",
1586
      "\xcf\x88"         => "\xce\xa8",
1587
      "\xcf\x87"         => "\xce\xa7",
1588
      "\xcf\x86"         => "\xce\xa6",
1589
      "\xcf\x85"         => "\xce\xa5",
1590
      "\xcf\x84"         => "\xce\xa4",
1591
      "\xcf\x83"         => "\xce\xa3",
1592
      "\xcf\x82"         => "\xce\xa3",
1593
      "\xcf\x81"         => "\xce\xa1",
1594
      "\xcf\x80"         => "\xce\xa0",
1595
      "\xce\xbf"         => "\xce\x9f",
1596
      "\xce\xbe"         => "\xce\x9e",
1597
      "\xce\xbd"         => "\xce\x9d",
1598
      "\xce\xbc"         => "\xce\x9c",
1599
      "\xce\xbb"         => "\xce\x9b",
1600
      "\xce\xba"         => "\xce\x9a",
1601
      "\xce\xb9"         => "\xce\x99",
1602
      "\xce\xb8"         => "\xce\x98",
1603
      "\xce\xb7"         => "\xce\x97",
1604
      "\xce\xb6"         => "\xce\x96",
1605
      "\xce\xb5"         => "\xce\x95",
1606
      "\xce\xb4"         => "\xce\x94",
1607
      "\xce\xb3"         => "\xce\x93",
1608
      "\xce\xb2"         => "\xce\x92",
1609
      "\xce\xb1"         => "\xce\x91",
1610
      "\xce\xaf"         => "\xce\x8a",
1611
      "\xce\xae"         => "\xce\x89",
1612
      "\xce\xad"         => "\xce\x88",
1613
      "\xce\xac"         => "\xce\x86",
1614
      "\xcd\xbd"         => "\xcf\xbf",
1615
      "\xcd\xbc"         => "\xcf\xbe",
1616
      "\xcd\xbb"         => "\xcf\xbd",
1617
      "\xcd\xb7"         => "\xcd\xb6",
1618
      "\xcd\xb3"         => "\xcd\xb2",
1619
      "\xcd\xb1"         => "\xcd\xb0",
1620
      "\xca\x92"         => "\xc6\xb7",
1621
      "\xca\x8c"         => "\xc9\x85",
1622
      "\xca\x8b"         => "\xc6\xb2",
1623
      "\xca\x8a"         => "\xc6\xb1",
1624
      "\xca\x89"         => "\xc9\x84",
1625
      "\xca\x88"         => "\xc6\xae",
1626
      "\xca\x83"         => "\xc6\xa9",
1627
      "\xca\x80"         => "\xc6\xa6",
1628
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1629
      "\xc9\xb5"         => "\xc6\x9f",
1630
      "\xc9\xb2"         => "\xc6\x9d",
1631
      "\xc9\xb1"         => "\xe2\xb1\xae",
1632
      "\xc9\xaf"         => "\xc6\x9c",
1633
      "\xc9\xab"         => "\xe2\xb1\xa2",
1634
      "\xc9\xa9"         => "\xc6\x96",
1635
      "\xc9\xa8"         => "\xc6\x97",
1636
      "\xc9\xa5"         => "\xea\x9e\x8d",
1637
      "\xc9\xa3"         => "\xc6\x94",
1638
      "\xc9\xa0"         => "\xc6\x93",
1639
      "\xc9\x9b"         => "\xc6\x90",
1640
      "\xc9\x99"         => "\xc6\x8f",
1641
      "\xc9\x97"         => "\xc6\x8a",
1642
      "\xc9\x96"         => "\xc6\x89",
1643
      "\xc9\x94"         => "\xc6\x86",
1644
      "\xc9\x93"         => "\xc6\x81",
1645
      "\xc9\x92"         => "\xe2\xb1\xb0",
1646
      "\xc9\x91"         => "\xe2\xb1\xad",
1647
      "\xc9\x90"         => "\xe2\xb1\xaf",
1648
      "\xc9\x8f"         => "\xc9\x8e",
1649
      "\xc9\x8d"         => "\xc9\x8c",
1650
      "\xc9\x8b"         => "\xc9\x8a",
1651
      "\xc9\x89"         => "\xc9\x88",
1652
      "\xc9\x87"         => "\xc9\x86",
1653
      "\xc9\x82"         => "\xc9\x81",
1654
      "\xc9\x80"         => "\xe2\xb1\xbf",
1655
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1656
      "\xc8\xbc"         => "\xc8\xbb",
1657
      "\xc8\xb3"         => "\xc8\xb2",
1658
      "\xc8\xb1"         => "\xc8\xb0",
1659
      "\xc8\xaf"         => "\xc8\xae",
1660
      "\xc8\xad"         => "\xc8\xac",
1661
      "\xc8\xab"         => "\xc8\xaa",
1662
      "\xc8\xa9"         => "\xc8\xa8",
1663
      "\xc8\xa7"         => "\xc8\xa6",
1664
      "\xc8\xa5"         => "\xc8\xa4",
1665
      "\xc8\xa3"         => "\xc8\xa2",
1666
      "\xc8\x9f"         => "\xc8\x9e",
1667
      "\xc8\x9d"         => "\xc8\x9c",
1668
      "\xc8\x9b"         => "\xc8\x9a",
1669
      "\xc8\x99"         => "\xc8\x98",
1670
      "\xc8\x97"         => "\xc8\x96",
1671
      "\xc8\x95"         => "\xc8\x94",
1672
      "\xc8\x93"         => "\xc8\x92",
1673
      "\xc8\x91"         => "\xc8\x90",
1674
      "\xc8\x8f"         => "\xc8\x8e",
1675
      "\xc8\x8d"         => "\xc8\x8c",
1676
      "\xc8\x8b"         => "\xc8\x8a",
1677
      "\xc8\x89"         => "\xc8\x88",
1678
      "\xc8\x87"         => "\xc8\x86",
1679
      "\xc8\x85"         => "\xc8\x84",
1680
      "\xc8\x83"         => "\xc8\x82",
1681
      "\xc8\x81"         => "\xc8\x80",
1682
      "\xc7\xbf"         => "\xc7\xbe",
1683
      "\xc7\xbd"         => "\xc7\xbc",
1684
      "\xc7\xbb"         => "\xc7\xba",
1685
      "\xc7\xb9"         => "\xc7\xb8",
1686
      "\xc7\xb5"         => "\xc7\xb4",
1687
      "\xc7\xb3"         => "\xc7\xb2",
1688
      "\xc7\xaf"         => "\xc7\xae",
1689
      "\xc7\xad"         => "\xc7\xac",
1690
      "\xc7\xab"         => "\xc7\xaa",
1691
      "\xc7\xa9"         => "\xc7\xa8",
1692
      "\xc7\xa7"         => "\xc7\xa6",
1693
      "\xc7\xa5"         => "\xc7\xa4",
1694
      "\xc7\xa3"         => "\xc7\xa2",
1695
      "\xc7\xa1"         => "\xc7\xa0",
1696
      "\xc7\x9f"         => "\xc7\x9e",
1697
      "\xc7\x9d"         => "\xc6\x8e",
1698
      "\xc7\x9c"         => "\xc7\x9b",
1699
      "\xc7\x9a"         => "\xc7\x99",
1700
      "\xc7\x98"         => "\xc7\x97",
1701
      "\xc7\x96"         => "\xc7\x95",
1702
      "\xc7\x94"         => "\xc7\x93",
1703
      "\xc7\x92"         => "\xc7\x91",
1704
      "\xc7\x90"         => "\xc7\x8f",
1705
      "\xc7\x8e"         => "\xc7\x8d",
1706
      "\xc7\x8c"         => "\xc7\x8b",
1707
      "\xc7\x89"         => "\xc7\x88",
1708
      "\xc7\x86"         => "\xc7\x85",
1709
      "\xc6\xbf"         => "\xc7\xb7",
1710
      "\xc6\xbd"         => "\xc6\xbc",
1711
      "\xc6\xb9"         => "\xc6\xb8",
1712
      "\xc6\xb6"         => "\xc6\xb5",
1713
      "\xc6\xb4"         => "\xc6\xb3",
1714
      "\xc6\xb0"         => "\xc6\xaf",
1715
      "\xc6\xad"         => "\xc6\xac",
1716
      "\xc6\xa8"         => "\xc6\xa7",
1717
      "\xc6\xa5"         => "\xc6\xa4",
1718
      "\xc6\xa3"         => "\xc6\xa2",
1719
      "\xc6\xa1"         => "\xc6\xa0",
1720
      "\xc6\x9e"         => "\xc8\xa0",
1721
      "\xc6\x9a"         => "\xc8\xbd",
1722
      "\xc6\x99"         => "\xc6\x98",
1723
      "\xc6\x95"         => "\xc7\xb6",
1724
      "\xc6\x92"         => "\xc6\x91",
1725
      "\xc6\x8c"         => "\xc6\x8b",
1726
      "\xc6\x88"         => "\xc6\x87",
1727
      "\xc6\x85"         => "\xc6\x84",
1728
      "\xc6\x83"         => "\xc6\x82",
1729
      "\xc6\x80"         => "\xc9\x83",
1730
      "\xc5\xbf"         => "\x53",
1731
      "\xc5\xbe"         => "\xc5\xbd",
1732
      "\xc5\xbc"         => "\xc5\xbb",
1733
      "\xc5\xba"         => "\xc5\xb9",
1734
      "\xc5\xb7"         => "\xc5\xb6",
1735
      "\xc5\xb5"         => "\xc5\xb4",
1736
      "\xc5\xb3"         => "\xc5\xb2",
1737
      "\xc5\xb1"         => "\xc5\xb0",
1738
      "\xc5\xaf"         => "\xc5\xae",
1739
      "\xc5\xad"         => "\xc5\xac",
1740
      "\xc5\xab"         => "\xc5\xaa",
1741
      "\xc5\xa9"         => "\xc5\xa8",
1742
      "\xc5\xa7"         => "\xc5\xa6",
1743
      "\xc5\xa5"         => "\xc5\xa4",
1744
      "\xc5\xa3"         => "\xc5\xa2",
1745
      "\xc5\xa1"         => "\xc5\xa0",
1746
      "\xc5\x9f"         => "\xc5\x9e",
1747
      "\xc5\x9d"         => "\xc5\x9c",
1748
      "\xc5\x9b"         => "\xc5\x9a",
1749
      "\xc5\x99"         => "\xc5\x98",
1750
      "\xc5\x97"         => "\xc5\x96",
1751
      "\xc5\x95"         => "\xc5\x94",
1752
      "\xc5\x93"         => "\xc5\x92",
1753
      "\xc5\x91"         => "\xc5\x90",
1754
      "\xc5\x8f"         => "\xc5\x8e",
1755
      "\xc5\x8d"         => "\xc5\x8c",
1756
      "\xc5\x8b"         => "\xc5\x8a",
1757
      "\xc5\x88"         => "\xc5\x87",
1758
      "\xc5\x86"         => "\xc5\x85",
1759
      "\xc5\x84"         => "\xc5\x83",
1760
      "\xc5\x82"         => "\xc5\x81",
1761
      "\xc5\x80"         => "\xc4\xbf",
1762
      "\xc4\xbe"         => "\xc4\xbd",
1763
      "\xc4\xbc"         => "\xc4\xbb",
1764
      "\xc4\xba"         => "\xc4\xb9",
1765
      "\xc4\xb7"         => "\xc4\xb6",
1766
      "\xc4\xb5"         => "\xc4\xb4",
1767
      "\xc4\xb3"         => "\xc4\xb2",
1768
      "\xc4\xb1"         => "\x49",
1769
      "\xc4\xaf"         => "\xc4\xae",
1770
      "\xc4\xad"         => "\xc4\xac",
1771
      "\xc4\xab"         => "\xc4\xaa",
1772
      "\xc4\xa9"         => "\xc4\xa8",
1773
      "\xc4\xa7"         => "\xc4\xa6",
1774
      "\xc4\xa5"         => "\xc4\xa4",
1775
      "\xc4\xa3"         => "\xc4\xa2",
1776
      "\xc4\xa1"         => "\xc4\xa0",
1777
      "\xc4\x9f"         => "\xc4\x9e",
1778
      "\xc4\x9d"         => "\xc4\x9c",
1779
      "\xc4\x9b"         => "\xc4\x9a",
1780
      "\xc4\x99"         => "\xc4\x98",
1781
      "\xc4\x97"         => "\xc4\x96",
1782
      "\xc4\x95"         => "\xc4\x94",
1783
      "\xc4\x93"         => "\xc4\x92",
1784
      "\xc4\x91"         => "\xc4\x90",
1785
      "\xc4\x8f"         => "\xc4\x8e",
1786
      "\xc4\x8d"         => "\xc4\x8c",
1787
      "\xc4\x8b"         => "\xc4\x8a",
1788
      "\xc4\x89"         => "\xc4\x88",
1789
      "\xc4\x87"         => "\xc4\x86",
1790
      "\xc4\x85"         => "\xc4\x84",
1791
      "\xc4\x83"         => "\xc4\x82",
1792
      "\xc4\x81"         => "\xc4\x80",
1793
      "\xc3\xbf"         => "\xc5\xb8",
1794
      "\xc3\xbe"         => "\xc3\x9e",
1795
      "\xc3\xbd"         => "\xc3\x9d",
1796
      "\xc3\xbc"         => "\xc3\x9c",
1797
      "\xc3\xbb"         => "\xc3\x9b",
1798
      "\xc3\xba"         => "\xc3\x9a",
1799
      "\xc3\xb9"         => "\xc3\x99",
1800
      "\xc3\xb8"         => "\xc3\x98",
1801
      "\xc3\xb6"         => "\xc3\x96",
1802
      "\xc3\xb5"         => "\xc3\x95",
1803
      "\xc3\xb4"         => "\xc3\x94",
1804
      "\xc3\xb3"         => "\xc3\x93",
1805
      "\xc3\xb2"         => "\xc3\x92",
1806
      "\xc3\xb1"         => "\xc3\x91",
1807
      "\xc3\xb0"         => "\xc3\x90",
1808
      "\xc3\xaf"         => "\xc3\x8f",
1809
      "\xc3\xae"         => "\xc3\x8e",
1810
      "\xc3\xad"         => "\xc3\x8d",
1811
      "\xc3\xac"         => "\xc3\x8c",
1812
      "\xc3\xab"         => "\xc3\x8b",
1813
      "\xc3\xaa"         => "\xc3\x8a",
1814
      "\xc3\xa9"         => "\xc3\x89",
1815
      "\xc3\xa8"         => "\xc3\x88",
1816
      "\xc3\xa7"         => "\xc3\x87",
1817
      "\xc3\xa6"         => "\xc3\x86",
1818
      "\xc3\xa5"         => "\xc3\x85",
1819
      "\xc3\xa4"         => "\xc3\x84",
1820
      "\xc3\xa3"         => "\xc3\x83",
1821
      "\xc3\xa2"         => "\xc3\x82",
1822
      "\xc3\xa1"         => "\xc3\x81",
1823
      "\xc3\xa0"         => "\xc3\x80",
1824
      "\xc2\xb5"         => "\xce\x9c",
1825
      "\x7a"             => "\x5a",
1826
      "\x79"             => "\x59",
1827
      "\x78"             => "\x58",
1828
      "\x77"             => "\x57",
1829
      "\x76"             => "\x56",
1830
      "\x75"             => "\x55",
1831
      "\x74"             => "\x54",
1832
      "\x73"             => "\x53",
1833
      "\x72"             => "\x52",
1834
      "\x71"             => "\x51",
1835
      "\x70"             => "\x50",
1836
      "\x6f"             => "\x4f",
1837
      "\x6e"             => "\x4e",
1838
      "\x6d"             => "\x4d",
1839
      "\x6c"             => "\x4c",
1840
      "\x6b"             => "\x4b",
1841
      "\x6a"             => "\x4a",
1842
      "\x69"             => "\x49",
1843
      "\x68"             => "\x48",
1844
      "\x67"             => "\x47",
1845
      "\x66"             => "\x46",
1846
      "\x65"             => "\x45",
1847
      "\x64"             => "\x44",
1848
      "\x63"             => "\x43",
1849
      "\x62"             => "\x42",
1850
      "\x61"             => "\x41",
1851
1852
    );
1853
1854
    return $case;
1855
  }
1856
1857
  /**
1858
   * check for UTF8-Support
1859
   */
1860
  public static function checkForSupport()
1861 157
  {
1862
    if (!isset(self::$support['mbstring'])) {
1863 157
1864
      self::$support['mbstring'] = self::mbstring_loaded();
1865 1
      self::$support['iconv'] = self::iconv_loaded();
1866 1
      self::$support['intl'] = self::intl_loaded();
1867 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1868 1
    }
1869 1
  }
1870 157
1871
  /**
1872
   * Generates a UTF-8 encoded character from the given code point.
1873
   *
1874
   * @param    int $code_point The code point for which to generate a character.
1875
   *
1876
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1877
   */
1878
  public static function chr($code_point)
1879 8
  {
1880
    self::checkForSupport();
1881 8
1882
    $i = (int)$code_point;
1883 8
1884
    if ($i !== $code_point) {
1885
      $i = (int)self::hex_to_int($code_point);
1886
    }
1887
1888
    if (!$i) {
1889
      return '';
1890 8
    }
1891
1892
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1893
  }
1894
1895
  /**
1896
   * Applies callback to all characters of a string.
1897
   *
1898
   * @param    string $callback The callback function.
1899
   * @param    string $str      UTF-8 string to run callback on.
1900
   *
1901
   * @return   array The outcome of callback.
1902 1
   */
1903
1904 1
  public static function chr_map($callback, $str)
1905
  {
1906 1
    $chars = self::split($str);
1907
1908
    return array_map($callback, $chars);
1909
  }
1910
1911
  /**
1912
   * Generates an array of byte length of each character of a Unicode string.
1913
   *
1914
   * 1 byte => U+0000  - U+007F
1915
   * 2 byte => U+0080  - U+07FF
1916
   * 3 byte => U+0800  - U+FFFF
1917
   * 4 byte => U+10000 - U+10FFFF
1918
   *
1919
   * @param    string $str The original Unicode string.
1920
   *
1921 2
   * @return   array An array of byte lengths of each character.
1922
   */
1923 2
  public static function chr_size_list($str)
1924 2
  {
1925
    if (!$str) {
1926
      return array();
1927 2
    }
1928
1929
    return array_map('strlen', self::split($str));
1930
  }
1931
1932
  /**
1933
   * Get a decimal code representation of a specific character.
1934
   *
1935
   * @param   string $chr The input character
1936
   *
1937 2
   * @return  int
1938
   */
1939 2
  public static function chr_to_decimal($chr)
1940 2
  {
1941 2
    $chr = (string)$chr;
1942
    $code = self::ord($chr[0]);
1943 2
    $bytes = 1;
1944
1945 2
    if (!($code & 0x80)) {
1946
      // 0xxxxxxx
1947
      return $code;
1948 2
    }
1949
1950 2
    if (($code & 0xe0) === 0xc0) {
1951 2
      // 110xxxxx
1952 2
      $bytes = 2;
1953
      $code &= ~0xc0;
1954 1
    } elseif (($code & 0xf0) === 0xe0) {
1955 1
      // 1110xxxx
1956 1
      $bytes = 3;
1957
      $code &= ~0xe0;
1958
    } elseif (($code & 0xf8) === 0xf0) {
1959
      // 11110xxx
1960
      $bytes = 4;
1961
      $code &= ~0xf0;
1962 2
    }
1963
1964 2
    for ($i = 2; $i <= $bytes; $i++) {
1965 2
      // 10xxxxxx
1966
      $code = ($code << 6) + (self::ord($chr[$i - 1]) & ~0x80);
1967 2
    }
1968
1969
    return $code;
1970
  }
1971
1972
  /**
1973
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1974
   *
1975
   * @param    string $chr The input character
1976
   * @param    string $pfix
1977
   *
1978
   * @return   string The code point encoded as U+xxxx
1979
   */
1980
  public static function chr_to_hex($chr, $pfix = 'U+')
1981
  {
1982
    return self::int_to_hex(self::ord($chr), $pfix);
1983
  }
1984
1985
  /**
1986
   * Splits a string into smaller chunks and multiple lines, using the specified
1987
   * line ending character.
1988
   *
1989
   * @param    string $body     The original string to be split.
1990
   * @param    int    $chunklen The maximum character length of a chunk.
1991
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
1992
   *
1993 1
   * @return   string The chunked string
1994
   */
1995 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1996
  {
1997
    return implode($end, self::split($body, $chunklen));
1998
  }
1999
2000
  /**
2001
   * accepts a string and removes all non-UTF-8 characters from it.
2002
   *
2003
   * @param string $str                     The string to be sanitized.
2004
   * @param bool   $remove_bom
2005
   * @param bool   $normalize_whitespace
2006
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2007
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2008
   *
2009 35
   * @return string Clean UTF-8 encoded string
2010
   */
2011
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2012
  {
2013
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2014
    // caused connection reset problem on larger strings
2015
2016
    $regx = '/
2017
      (
2018
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2019
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2020
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2021
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2022
        ){1,100}                      # ...one or more times
2023
      )
2024 35
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2025 35
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2026
    /x';
2027 35
    $str = preg_replace($regx, '$1', $str);
2028 35
2029
    $str = self::replace_diamond_question_mark($str, '');
2030 35
    $str = self::remove_invisible_characters($str);
2031 7
2032 7
    if ($normalize_whitespace === true) {
2033
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2034 35
    }
2035 1
2036 1
    if ($normalize_msword === true) {
2037
      $str = self::normalize_msword($str);
2038 35
    }
2039 4
2040 4
    if ($remove_bom === true) {
2041
      $str = self::removeBOM($str);
2042 35
    }
2043
2044
    return $str;
2045
  }
2046
2047
  /**
2048
   * Clean-up a and show only printable UTF-8 chars at the end.
2049
   *
2050
   * @param string $str
2051
   *
2052 3
   * @return string
2053
   */
2054 3
  public static function cleanup($str)
2055
  {
2056 3
    $str = (string)$str;
2057 1
2058
    if (!isset($str[0])) {
2059
      return '';
2060
    }
2061 3
2062
    // fixed ISO <-> UTF-8 Errors
2063
    $str = self::fix_simple_utf8($str);
2064
2065
    // remove all none UTF-8 symbols
2066
    // && remove diamond question mark (�)
2067
    // && remove remove invisible characters (e.g. "\0")
2068 3
    // && remove BOM
2069
    // && normalize whitespace chars (but keep non-breaking-spaces)
2070 3
    $str = self::clean($str, true, true, false, true);
2071
2072
    return (string)$str;
2073
  }
2074
2075
  /**
2076
   * Accepts a string and returns an array of Unicode code points.
2077
   *
2078
   * @param    mixed $arg     A UTF-8 encoded string or an array of such strings.
2079
   * @param    bool  $u_style If True, will return code points in U+xxxx format,
2080
   *                          default, code points will be returned as integers.
2081
   *
2082 3
   * @return   array The array of code points
2083
   */
2084 3
  public static function codepoints($arg, $u_style = false)
2085 3
  {
2086 3
    if (is_string($arg)) {
2087
      $arg = self::split($arg);
2088 3
    }
2089
2090 3
    $arg = array_map(
2091 3
        array(
2092 3
            '\\voku\\helper\\UTF8',
2093
            'ord',
2094 3
        ),
2095
        $arg
2096 3
    );
2097
2098
    if ($u_style) {
2099
      $arg = array_map(
2100
          array(
2101
              '\\voku\\helper\\UTF8',
2102
              'int_to_hex',
2103
          ),
2104
          $arg
2105
      );
2106 3
    }
2107
2108
    return $arg;
2109
  }
2110
2111
  /**
2112
   * Returns count of characters used in a string.
2113
   *
2114
   * @param    string $str The input string.
2115
   *
2116
   * @return   array An associative array of Character as keys and
2117 3
   *           their count as values.
2118
   */
2119 3
  public static function count_chars($str) // there is no $mode parameters
2120
  {
2121 3
    $array = array_count_values(self::split($str));
2122
2123 3
    ksort($array);
2124
2125
    return $array;
2126
  }
2127
2128
  /**
2129
   * Get a UTF-8 character from its decimal code representation.
2130
   *
2131
   * @param   int $code Code.
2132
   *
2133 1
   * @return  string
2134
   */
2135 1
  public static function decimal_to_chr($code)
2136
  {
2137 1
    self::checkForSupport();
2138 1
2139 1
    return \mb_convert_encoding(
2140
        '&#x' . dechex($code) . ';',
2141 1
        'UTF-8',
2142
        'HTML-ENTITIES'
2143
    );
2144
  }
2145
2146
  /**
2147
   * encode a string
2148
   *
2149
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2150
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2151
   *
2152
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2153
   * @param string $str      the string
2154
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2155 11
   *                         otherwise we auto-detect the current string-encoding
2156
   *
2157 11
   * @return string
2158
   */
2159 11
  public static function encode($encoding, $str, $force = true)
2160 11
  {
2161
    $str = (string)$str;
2162
    $encoding = (string)$encoding;
2163 1
2164 1
    if (!isset($str[0], $encoding[0])) {
2165
      return $str;
2166
    }
2167
2168
    $encoding = self::normalizeEncoding($encoding);
2169
    $encodingDetected = self::str_detect_encoding($str);
2170
2171
    if (
2172
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2173
        &&
2174
        (
2175
            $force === true
2176
            ||
2177
            $encodingDetected !== $encoding
2178
        )
2179
    ) {
2180
      self::checkForSupport();
2181
2182 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2183
          $encoding === 'UTF-8'
2184
          &&
2185
          (
2186
              $force === true
2187
              || $encodingDetected === 'UTF-8'
2188
              || $encodingDetected === 'WINDOWS-1252'
2189
              || $encodingDetected === 'ISO-8859-1'
2190
          )
2191
      ) {
2192
        return self::to_utf8($str);
2193
      }
2194
2195 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2196
          $encoding === 'ISO-8859-1'
2197
          &&
2198
          (
2199
              $force === true
2200
              || $encodingDetected === 'ISO-8859-1'
2201
              || $encodingDetected === 'UTF-8'
2202
          )
2203
      ) {
2204
        return self::to_win1252($str);
2205
      }
2206
2207
      $strEncoded = \mb_convert_encoding(
2208
          $str,
2209
          $encoding,
2210
          $encodingDetected
2211
      );
2212
2213
      if ($strEncoded) {
2214
        return $strEncoded;
2215
      }
2216
    }
2217
2218
    return $str;
2219
  }
2220
2221
  /**
2222
   * Callback function for preg_replace_callback use.
2223
   *
2224
   * @param  array $matches PREG matches
2225
   *
2226
   * @return string
2227
   */
2228
  protected static function entityCallback($matches)
2229
  {
2230
    self::checkForSupport();
2231
2232
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2233
2234
    if ($return === "'") {
2235
      return '&#x27;';
2236
    }
2237
2238
    return $return;
2239
  }
2240
2241
  /**
2242
   * Reads entire file into a string.
2243
   *
2244
   * WARNING: do not use UTF-8 Option fir binary-files (e.g.: images) !!!
2245
   *
2246
   * @link http://php.net/manual/en/function.file-get-contents.php
2247
   *
2248
   * @param string   $filename      <p>
2249
   *                                Name of the file to read.
2250
   *                                </p>
2251
   * @param int      $flags         [optional] <p>
2252 2
   *                                Prior to PHP 6, this parameter is called
2253
   *                                use_include_path and is a bool.
2254
   *                                As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2255 2
   *                                to trigger include path
2256 2
   *                                search.
2257
   *                                </p>
2258 2
   *                                <p>
2259 2
   *                                The value of flags can be any combination of
2260
   *                                the following flags (with some restrictions), joined with the
2261
   *                                binary OR (|)
2262
   *                                operator.
2263 2
   *                                </p>
2264 2
   *                                <p>
2265
   *                                <table>
2266 2
   *                                Available flags
2267 2
   *                                <tr valign="top">
2268
   *                                <td>Flag</td>
2269 2
   *                                <td>Description</td>
2270 1
   *                                </tr>
2271 1
   *                                <tr valign="top">
2272 2
   *                                <td>
2273
   *                                FILE_USE_INCLUDE_PATH
2274
   *                                </td>
2275
   *                                <td>
2276 2
   *                                Search for filename in the include directory.
2277
   *                                See include_path for more
2278
   *                                information.
2279
   *                                </td>
2280 2
   *                                </tr>
2281 2
   *                                <tr valign="top">
2282
   *                                <td>
2283 2
   *                                FILE_TEXT
2284
   *                                </td>
2285 2
   *                                <td>
2286 1
   *                                As of PHP 6, the default encoding of the read
2287 1
   *                                data is UTF-8. You can specify a different encoding by creating a
2288 1
   *                                custom context or by changing the default using
2289 1
   *                                stream_default_encoding. This flag cannot be
2290 1
   *                                used with FILE_BINARY.
2291 1
   *                                </td>
2292
   *                                </tr>
2293 2
   *                                <tr valign="top">
2294 2
   *                                <td>
2295 2
   *                                FILE_BINARY
2296 2
   *                                </td>
2297
   *                                <td>
2298
   *                                With this flag, the file is read in binary mode. This is the default
2299 2
   *                                setting and cannot be used with FILE_TEXT.
2300
   *                                </td>
2301
   *                                </tr>
2302
   *                                </table>
2303
   *                                </p>
2304
   * @param resource $context       [optional] <p>
2305
   *                                A valid context resource created with
2306
   *                                stream_context_create. If you don't need to use a
2307
   *                                custom context, you can skip this parameter by &null;.
2308
   *                                </p>
2309 1
   * @param int      $offset        [optional] <p>
2310
   *                                The offset where the reading starts.
2311 1
   *                                </p>
2312
   * @param int      $maxlen        [optional] <p>
2313
   *                                Maximum length of data read. The default is to read until end
2314
   *                                of file is reached.
2315
   *                                </p>
2316
   * @param int      $timeout
2317
   *
2318
   * @param boolean  $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used non
2319
   *                                default utf-8 chars
2320
   *
2321
   * @return string The function returns the read data or false on failure.
2322
   */
2323 7
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2324
  {
2325 7
    // init
2326 7
    $timeout = (int)$timeout;
2327 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2328
2329 1
    if ($timeout && $context === null) {
2330 2
      $context = stream_context_create(
2331 2
          array(
2332 7
              'http' =>
2333 1
                  array(
2334 1
                      'timeout' => $timeout,
2335 1
                  ),
2336 1
          )
2337 7
      );
2338 7
    }
2339
2340
    if (is_int($maxlen)) {
2341
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2342 7
    } else {
2343 7
      $data = file_get_contents($filename, $flags, $context, $offset);
2344 1
    }
2345 1
2346 7
    // return false on error
2347
    if ($data === false) {
2348 7
      return false;
2349 5
    }
2350 5
2351 4
    if ($convertToUtf8 === true) {
2352
      self::checkForSupport();
2353
2354
      $data = self::encode('UTF-8', $data, false);
2355 7
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2356
    }
2357
2358
    // clean utf-8 string
2359
    return $data;
2360 7
  }
2361 7
2362 7
  /**
2363
   * Checks if a file starts with BOM character.
2364 7
   *
2365
   * @param    string $file_path Path to a valid file.
2366
   *
2367
   * @return   bool True if the file has BOM at the start, False otherwise.
2368
   */
2369
  public static function file_has_bom($file_path)
2370
  {
2371
    return self::is_bom(file_get_contents($file_path, null, null, -1, 3));
2372
  }
2373
2374
  /**
2375
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2376
   *
2377
   * @param mixed  $var
2378
   * @param int    $normalization_form
2379
   * @param string $leading_combining
2380
   *
2381
   * @return mixed
2382
   */
2383
  public static function filter($var, $normalization_form = 4, $leading_combining = '◌')
2384
  {
2385
    switch (gettype($var)) {
2386 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2387
        foreach ($var as $k => $v) {
2388
          /** @noinspection AlterInForeachInspection */
2389
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2390
        }
2391
        break;
2392 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2393
        foreach ($var as $k => $v) {
2394
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2395
        }
2396
        break;
2397
      case 'string':
2398 View Code Duplication
        if (false !== strpos($var, "\r")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2399
          // Workaround https://bugs.php.net/65732
2400
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2401
        }
2402 View Code Duplication
        if (preg_match('/[\x80-\xFF]/', $var)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2403
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2404
            $n = '-';
2405
          } else {
2406
            $n = \Normalizer::normalize($var, $normalization_form);
2407
2408
            if (isset($n[0])) {
2409
              $var = $n;
2410
            } else {
2411
              $var = self::encode('UTF-8', $var);
2412
            }
2413
2414
          }
2415
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2416
            // Prevent leading combining chars
2417 1
            // for NFC-safe concatenations.
2418
            $var = $leading_combining . $var;
2419 1
          }
2420 1
        }
2421 1
        break;
2422 1
    }
2423
2424
    return $var;
2425 1
  }
2426
2427
  /**
2428
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2429
   *
2430
   * @param int    $type
2431
   * @param string $var
2432
   * @param int    $filter
2433
   * @param mixed  $option
2434
   *
2435
   * @return mixed
2436
   */
2437 1 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2438
  {
2439 1
    if (4 > func_num_args()) {
2440 1
      $var = filter_input($type, $var, $filter);
2441 1
    } else {
2442 1
      $var = filter_input($type, $var, $filter, $option);
2443
    }
2444
2445 1
    return self::filter($var);
2446
  }
2447
2448
  /**
2449
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2450
   *
2451
   * @param int   $type
2452
   * @param mixed $definition
2453
   * @param bool  $add_empty
2454
   *
2455
   * @return mixed
2456
   */
2457 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2458
  {
2459 1
    if (2 > func_num_args()) {
2460
      $a = filter_input_array($type);
2461
    } else {
2462
      $a = filter_input_array($type, $definition, $add_empty);
2463
    }
2464
2465
    return self::filter($a);
2466
  }
2467
2468
  /**
2469 8
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2470
   *
2471 8
   * @param mixed $var
2472 8
   * @param int   $filter
2473
   * @param mixed $option
2474 8
   *
2475
   * @return mixed
2476 8
   */
2477 2 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2478
  {
2479
    if (3 > func_num_args()) {
2480 8
      $var = filter_var($var, $filter);
2481 1
    } else {
2482 1
      $var = filter_var($var, $filter, $option);
2483 1
    }
2484
2485 8
    return self::filter($var);
2486
  }
2487
2488
  /**
2489
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2490
   *
2491
   * @param array $data
2492
   * @param mixed $definition
2493
   * @param bool  $add_empty
2494
   *
2495 1
   * @return mixed
2496
   */
2497 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2498
  {
2499
    if (2 > func_num_args()) {
2500
      $a = filter_var_array($data);
2501
    } else {
2502
      $a = filter_var_array($data, $definition, $add_empty);
2503
    }
2504
2505
    return self::filter($a);
2506
  }
2507 1
2508 1
  /**
2509 1
   * Checks if the number of Unicode characters in a string are not
2510 1
   * more than the specified integer.
2511 1
   *
2512
   * @param    string $str      The original string to be checked.
2513 1
   * @param    int    $box_size The size in number of chars to be checked against string.
2514
   *
2515
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2516
   */
2517
  public static function fits_inside($str, $box_size)
2518
  {
2519
    return (self::strlen($str) <= $box_size);
2520
  }
2521
2522
  /**
2523 1
   * Fixing a broken UTF-8 string.
2524
   *
2525 1
   * @param string $str
2526
   *
2527 1
   * @return string
2528 1
   */
2529
  public static function fix_simple_utf8($str)
2530
  {
2531 1
    static $brokenUtf8ToUtf8Keys = null;
2532
    static $brokenUtf8ToUtf8Values = null;
2533 1
2534 1
    $str = (string)$str;
2535 1
2536 1
    if (!isset($str[0])) {
2537 1
      return '';
2538 1
    }
2539 1
2540 1
    if ($brokenUtf8ToUtf8Keys === null) {
2541 1
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2542 1
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2543 1
    }
2544
2545
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2546
  }
2547
2548
  /**
2549
   * Fix a double (or multiple) encoded UTF8 string.
2550
   *
2551
   * @param array|string $str
2552
   *
2553
   * @return string
2554
   */
2555
  public static function fix_utf8($str)
2556
  {
2557
    if (is_array($str)) {
2558
2559
      foreach ($str as $k => $v) {
2560
        /** @noinspection AlterInForeachInspection */
2561
        $str[$k] = self::fix_utf8($v);
2562
      }
2563 1
2564 1
      return $str;
2565
    }
2566
2567
    $last = '';
2568
    while ($last !== $str) {
2569
      $last = $str;
2570
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2570 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2571
    }
2572
2573
    return $str;
2574
  }
2575
2576
  /**
2577
   * Get character of a specific character.
2578
   *
2579
   * @param   string $chr Character.
2580
   *
2581
   * @return  string 'RTL' or 'LTR'
2582
   */
2583
  public static function getCharDirection($chr)
2584
  {
2585
    $c = static::chr_to_decimal($chr);
2586
2587
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2588
      return 'LTR';
2589
    }
2590
2591
    if (0x85e >= $c) {
2592
2593
      if (0x5be === $c ||
2594
          0x5c0 === $c ||
2595
          0x5c3 === $c ||
2596
          0x5c6 === $c ||
2597
          (0x5d0 <= $c && 0x5ea >= $c) ||
2598
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2599
          0x608 === $c ||
2600
          0x60b === $c ||
2601
          0x60d === $c ||
2602
          0x61b === $c ||
2603
          (0x61e <= $c && 0x64a >= $c) ||
2604
          (0x66d <= $c && 0x66f >= $c) ||
2605
          (0x671 <= $c && 0x6d5 >= $c) ||
2606
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2607
          (0x6ee <= $c && 0x6ef >= $c) ||
2608
          (0x6fa <= $c && 0x70d >= $c) ||
2609
          0x710 === $c ||
2610
          (0x712 <= $c && 0x72f >= $c) ||
2611
          (0x74d <= $c && 0x7a5 >= $c) ||
2612
          0x7b1 === $c ||
2613
          (0x7c0 <= $c && 0x7ea >= $c) ||
2614
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2615
          0x7fa === $c ||
2616
          (0x800 <= $c && 0x815 >= $c) ||
2617
          0x81a === $c ||
2618
          0x824 === $c ||
2619
          0x828 === $c ||
2620
          (0x830 <= $c && 0x83e >= $c) ||
2621
          (0x840 <= $c && 0x858 >= $c) ||
2622
          0x85e === $c
2623 2
      ) {
2624
        return 'RTL';
2625 2
      }
2626 2
2627 2
    } elseif (0x200f === $c) {
2628
2629
      return 'RTL';
2630
2631
    } elseif (0xfb1d <= $c) {
2632
2633
      if (0xfb1d === $c ||
2634
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2635
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2636
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2637
          0xfb3e === $c ||
2638
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2639
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2640 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2641
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2642 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2643 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2644
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2645 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2646 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
2647
          (0x10800 <= $c && 0x10805 >= $c) ||
2648
          0x10808 === $c ||
2649
          (0x1080a <= $c && 0x10835 >= $c) ||
2650 1
          (0x10837 <= $c && 0x10838 >= $c) ||
2651
          0x1083c === $c ||
2652 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2653 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2654 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2655
          (0x10920 <= $c && 0x10939 >= $c) ||
2656 1
          0x1093f === $c ||
2657 1
          0x10a00 === $c ||
2658 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2659 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2660 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2661
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2662 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2663
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2664 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2665 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2666
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2667
          (0x10b78 <= $c && 0x10b7f >= $c)
2668
      ) {
2669 1
        return 'RTL';
2670 1
      }
2671
    }
2672 1
2673
    return 'LTR';
2674 1
  }
2675 1
2676 1
  /**
2677
   * get data from "/data/*.ser"
2678 1
   *
2679
   * @param string $file
2680
   *
2681
   * @return bool|string|array|int false on error
2682
   */
2683
  protected static function getData($file)
2684
  {
2685
    $file = __DIR__ . '/data/' . $file . '.php';
2686
    if (file_exists($file)) {
2687
      /** @noinspection PhpIncludeInspection */
2688
      return require $file;
2689
    } else {
2690
      return false;
2691
    }
2692
  }
2693
2694
  /**
2695
   * Creates a random string of UTF-8 characters.
2696
   *
2697
   * @param    int $len The length of string in characters.
2698
   *
2699
   * @return   string String consisting of random characters.
2700
   */
2701
  public static function hash($len = 8)
2702
  {
2703
    static $chars = array();
2704
    static $chars_len = null;
2705
2706
    if ($len <= 0) {
2707
      return '';
2708 1
    }
2709
2710 1
    // init
2711 1
    self::checkForSupport();
2712
2713 1
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2714 1
      if (self::$support['pcre_utf8'] === true) {
2715 1
        $chars = array_map(
2716 1
            array(
2717 1
                '\\voku\\helper\\UTF8',
2718 1
                'chr',
2719
            ),
2720
            range(48, 79)
2721
        );
2722
2723
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2724
2725
        $chars = array_values(array_filter($chars));
2726
      } else {
2727
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2728
      }
2729
2730
      $chars_len = count($chars);
2731
    }
2732
2733
    $hash = '';
2734
2735
    for (; $len; --$len) {
2736
      $hash .= $chars[mt_rand() % $chars_len];
2737
    }
2738
2739
    return $hash;
2740
  }
2741
2742
  /**
2743
   * Converts hexadecimal U+xxxx code point representation to Integer.
2744
   *
2745
   * INFO: opposite to UTF8::int_to_hex( )
2746
   *
2747
   * @param    string $str The hexadecimal code point representation.
2748
   *
2749
   * @return   int The code point, or 0 on failure.
2750
   */
2751
  public static function hex_to_int($str)
2752
  {
2753
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2754
      return intval($match[1], 16);
2755
    }
2756
2757
    return 0;
2758
  }
2759
2760
  /**
2761
   * Converts a UTF-8 string to a series of HTML numbered entities.
2762
   *
2763
   * e.g.: &#123;&#39;&#1740;
2764
   *
2765
   * @param  string $str The Unicode string to be encoded as numbered entities.
2766
   *
2767
   * @return string HTML numbered entities.
2768
   */
2769
  public static function html_encode($str)
2770
  {
2771
    return implode(
2772
        array_map(
2773
            array(
2774
                '\\voku\\helper\\UTF8',
2775
                'single_chr_html_encode',
2776
            ),
2777
            self::split($str)
2778
        )
2779
    );
2780
  }
2781
2782
  /**
2783
   * UTF-8 version of html_entity_decode()
2784
   *
2785
   * The reason we are not using html_entity_decode() by itself is because
2786
   * while it is not technically correct to leave out the semicolon
2787
   * at the end of an entity most browsers will still interpret the entity
2788
   * correctly. html_entity_decode() does not convert entities without
2789
   * semicolons, so we are left with our own little solution here. Bummer.
2790 15
   *
2791
   * Convert all HTML entities to their applicable characters
2792 15
   *
2793
   * @link http://php.net/manual/en/function.html-entity-decode.php
2794 15
   *
2795 3
   * @param string $str      <p>
2796
   *                         The input string.
2797
   *                         </p>
2798 15
   * @param int    $flags    [optional] <p>
2799 4
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2800
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2801
   *                         <table>
2802 15
   *                         Available <i>flags</i> constants
2803 3
   *                         <tr valign="top">
2804 3
   *                         <td>Constant Name</td>
2805 3
   *                         <td>Description</td>
2806
   *                         </tr>
2807
   *                         <tr valign="top">
2808 3
   *                         <td><b>ENT_COMPAT</b></td>
2809
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2810
   *                         </tr>
2811 15
   *                         <tr valign="top">
2812
   *                         <td><b>ENT_QUOTES</b></td>
2813 15
   *                         <td>Will convert both double and single quotes.</td>
2814
   *                         </tr>
2815
   *                         <tr valign="top">
2816 15
   *                         <td><b>ENT_NOQUOTES</b></td>
2817 15
   *                         <td>Will leave both double and single quotes unconverted.</td>
2818 15
   *                         </tr>
2819
   *                         <tr valign="top">
2820 15
   *                         <td><b>ENT_HTML401</b></td>
2821
   *                         <td>
2822 15
   *                         Handle code as HTML 4.01.
2823
   *                         </td>
2824 15
   *                         </tr>
2825
   *                         <tr valign="top">
2826
   *                         <td><b>ENT_XML1</b></td>
2827
   *                         <td>
2828
   *                         Handle code as XML 1.
2829
   *                         </td>
2830
   *                         </tr>
2831
   *                         <tr valign="top">
2832
   *                         <td><b>ENT_XHTML</b></td>
2833
   *                         <td>
2834 12
   *                         Handle code as XHTML.
2835
   *                         </td>
2836 12
   *                         </tr>
2837
   *                         <tr valign="top">
2838 12
   *                         <td><b>ENT_HTML5</b></td>
2839
   *                         <td>
2840 12
   *                         Handle code as HTML 5.
2841 5
   *                         </td>
2842
   *                         </tr>
2843
   *                         </table>
2844 11
   *                         </p>
2845
   * @param string $encoding [optional] <p>
2846
   *                         Encoding to use.
2847
   *                         </p>
2848
   *
2849
   * @return string the decoded string.
2850
   */
2851
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2852
  {
2853
    $str = (string)$str;
2854
2855
    if (!isset($str[0])) {
2856
      return '';
2857
    }
2858
2859
    if (strpos($str, '&') === false) {
2860
      return $str;
2861
    }
2862
2863
    if ($flags === null) {
2864
      if (Bootup::is_php('5.4') === true) {
2865
        $flags = ENT_COMPAT | ENT_HTML5;
2866
      } else {
2867
        $flags = ENT_COMPAT;
2868
      }
2869
    }
2870
2871
    do {
2872
      $str_compare = $str;
2873
2874
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'entityCallback'), $str);
2875
2876
      // decode numeric & UTF16 two byte entities
2877
      $str = html_entity_decode(
2878
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2879
          $flags,
2880
          $encoding
2881
      );
2882
2883
    } while ($str_compare !== $str);
2884
2885
    return $str;
2886
  }
2887
2888
  /**
2889
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2890
   *
2891
   * @link http://php.net/manual/en/function.htmlentities.php
2892
   *
2893
   * @param string $str           <p>
2894
   *                              The input string.
2895
   *                              </p>
2896
   * @param int    $flags         [optional] <p>
2897
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2898
   *                              invalid code unit sequences and the used document type. The default is
2899
   *                              ENT_COMPAT | ENT_HTML401.
2900
   *                              <table>
2901
   *                              Available <i>flags</i> constants
2902
   *                              <tr valign="top">
2903
   *                              <td>Constant Name</td>
2904
   *                              <td>Description</td>
2905
   *                              </tr>
2906
   *                              <tr valign="top">
2907
   *                              <td><b>ENT_COMPAT</b></td>
2908
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2909
   *                              </tr>
2910
   *                              <tr valign="top">
2911
   *                              <td><b>ENT_QUOTES</b></td>
2912
   *                              <td>Will convert both double and single quotes.</td>
2913
   *                              </tr>
2914
   *                              <tr valign="top">
2915
   *                              <td><b>ENT_NOQUOTES</b></td>
2916
   *                              <td>Will leave both double and single quotes unconverted.</td>
2917
   *                              </tr>
2918
   *                              <tr valign="top">
2919
   *                              <td><b>ENT_IGNORE</b></td>
2920
   *                              <td>
2921
   *                              Silently discard invalid code unit sequences instead of returning
2922
   *                              an empty string. Using this flag is discouraged as it
2923
   *                              may have security implications.
2924
   *                              </td>
2925
   *                              </tr>
2926
   *                              <tr valign="top">
2927
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2928
   *                              <td>
2929
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2930
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2931
   *                              </td>
2932
   *                              </tr>
2933
   *                              <tr valign="top">
2934
   *                              <td><b>ENT_DISALLOWED</b></td>
2935
   *                              <td>
2936
   *                              Replace invalid code points for the given document type with a
2937
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2938
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2939
   *                              instance, to ensure the well-formedness of XML documents with
2940
   *                              embedded external content.
2941
   *                              </td>
2942
   *                              </tr>
2943
   *                              <tr valign="top">
2944
   *                              <td><b>ENT_HTML401</b></td>
2945
   *                              <td>
2946
   *                              Handle code as HTML 4.01.
2947
   *                              </td>
2948
   *                              </tr>
2949
   *                              <tr valign="top">
2950 2
   *                              <td><b>ENT_XML1</b></td>
2951
   *                              <td>
2952 2
   *                              Handle code as XML 1.
2953
   *                              </td>
2954
   *                              </tr>
2955
   *                              <tr valign="top">
2956
   *                              <td><b>ENT_XHTML</b></td>
2957
   *                              <td>
2958
   *                              Handle code as XHTML.
2959
   *                              </td>
2960
   *                              </tr>
2961
   *                              <tr valign="top">
2962
   *                              <td><b>ENT_HTML5</b></td>
2963
   *                              <td>
2964
   *                              Handle code as HTML 5.
2965
   *                              </td>
2966
   *                              </tr>
2967
   *                              </table>
2968
   *                              </p>
2969
   * @param string $encoding      [optional] <p>
2970
   *                              Like <b>htmlspecialchars</b>,
2971
   *                              <b>htmlentities</b> takes an optional third argument
2972
   *                              <i>encoding</i> which defines encoding used in
2973
   *                              conversion.
2974
   *                              Although this argument is technically optional, you are highly
2975
   *                              encouraged to specify the correct value for your code.
2976
   *                              </p>
2977
   * @param bool   $double_encode [optional] <p>
2978
   *                              When <i>double_encode</i> is turned off PHP will not
2979
   *                              encode existing html entities. The default is to convert everything.
2980
   *                              </p>
2981
   *
2982
   *
2983
   * @return string the encoded string.
2984
   * </p>
2985
   * <p>
2986
   * If the input <i>string</i> contains an invalid code unit
2987
   * sequence within the given <i>encoding</i> an empty string
2988
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2989
   * <b>ENT_SUBSTITUTE</b> flags are set.
2990
   */
2991
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2992
  {
2993
    return htmlentities($str, $flags, $encoding, $double_encode);
2994
  }
2995
2996
  /**
2997
   * Convert special characters to HTML entities: UTF-8 version of htmlspecialchars()
2998
   *
2999
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3000
   *
3001
   * @param string $str           <p>
3002
   *                              The string being converted.
3003
   *                              </p>
3004
   * @param int    $flags         [optional] <p>
3005
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3006
   *                              invalid code unit sequences and the used document type. The default is
3007
   *                              ENT_COMPAT | ENT_HTML401.
3008
   *                              <table>
3009
   *                              Available <i>flags</i> constants
3010
   *                              <tr valign="top">
3011
   *                              <td>Constant Name</td>
3012
   *                              <td>Description</td>
3013
   *                              </tr>
3014
   *                              <tr valign="top">
3015
   *                              <td><b>ENT_COMPAT</b></td>
3016
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3017
   *                              </tr>
3018
   *                              <tr valign="top">
3019
   *                              <td><b>ENT_QUOTES</b></td>
3020
   *                              <td>Will convert both double and single quotes.</td>
3021
   *                              </tr>
3022
   *                              <tr valign="top">
3023
   *                              <td><b>ENT_NOQUOTES</b></td>
3024
   *                              <td>Will leave both double and single quotes unconverted.</td>
3025
   *                              </tr>
3026
   *                              <tr valign="top">
3027
   *                              <td><b>ENT_IGNORE</b></td>
3028
   *                              <td>
3029
   *                              Silently discard invalid code unit sequences instead of returning
3030
   *                              an empty string. Using this flag is discouraged as it
3031
   *                              may have security implications.
3032
   *                              </td>
3033
   *                              </tr>
3034
   *                              <tr valign="top">
3035
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3036
   *                              <td>
3037
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3038
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3039
   *                              </td>
3040
   *                              </tr>
3041
   *                              <tr valign="top">
3042
   *                              <td><b>ENT_DISALLOWED</b></td>
3043
   *                              <td>
3044
   *                              Replace invalid code points for the given document type with a
3045
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3046
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3047
   *                              instance, to ensure the well-formedness of XML documents with
3048
   *                              embedded external content.
3049
   *                              </td>
3050
   *                              </tr>
3051
   *                              <tr valign="top">
3052
   *                              <td><b>ENT_HTML401</b></td>
3053
   *                              <td>
3054
   *                              Handle code as HTML 4.01.
3055
   *                              </td>
3056
   *                              </tr>
3057
   *                              <tr valign="top">
3058
   *                              <td><b>ENT_XML1</b></td>
3059
   *                              <td>
3060
   *                              Handle code as XML 1.
3061
   *                              </td>
3062 1
   *                              </tr>
3063
   *                              <tr valign="top">
3064 1
   *                              <td><b>ENT_XHTML</b></td>
3065
   *                              <td>
3066
   *                              Handle code as XHTML.
3067
   *                              </td>
3068
   *                              </tr>
3069
   *                              <tr valign="top">
3070
   *                              <td><b>ENT_HTML5</b></td>
3071
   *                              <td>
3072 1
   *                              Handle code as HTML 5.
3073
   *                              </td>
3074 1
   *                              </tr>
3075
   *                              </table>
3076
   *                              </p>
3077
   * @param string $encoding      [optional] <p>
3078
   *                              Defines encoding used in conversion.
3079
   *                              </p>
3080
   *                              <p>
3081
   *                              For the purposes of this function, the encodings
3082
   *                              ISO-8859-1, ISO-8859-15,
3083
   *                              UTF-8, cp866,
3084
   *                              cp1251, cp1252, and
3085
   *                              KOI8-R are effectively equivalent, provided the
3086
   *                              <i>string</i> itself is valid for the encoding, as
3087
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3088
   *                              the same positions in all of these encodings.
3089
   *                              </p>
3090
   * @param bool   $double_encode [optional] <p>
3091
   *                              When <i>double_encode</i> is turned off PHP will not
3092
   *                              encode existing html entities, the default is to convert everything.
3093
   *                              </p>
3094
   *
3095
   * @return string The converted string.
3096
   * </p>
3097
   * <p>
3098
   * If the input <i>string</i> contains an invalid code unit
3099
   * sequence within the given <i>encoding</i> an empty string
3100
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3101
   * <b>ENT_SUBSTITUTE</b> flags are set.
3102
   */
3103 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3104
  {
3105 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3106
  }
3107
3108
  /**
3109
   * checks whether iconv is available on the server
3110
   *
3111
   * @return   bool True if available, False otherwise
3112
   */
3113
  public static function iconv_loaded()
3114
  {
3115 1
    return extension_loaded('iconv') ? true : false;
3116
  }
3117 1
3118
  /**
3119
   * Converts Integer to hexadecimal U+xxxx code point representation.
3120
   *
3121
   * @param    int    $int The integer to be converted to hexadecimal code point.
3122
   * @param    string $pfix
3123
   *
3124
   * @return   string The code point, or empty string on failure.
3125
   */
3126
  public static function int_to_hex($int, $pfix = 'U+')
3127 1
  {
3128
    if (ctype_digit((string)$int)) {
3129 1
      $hex = dechex((int)$int);
3130
3131
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3132
3133
      return $pfix . $hex;
3134
    }
3135
3136
    return '';
3137
  }
3138
3139
  /**
3140
   * checks whether intl is available on the server
3141
   *
3142
   * @return   bool True if available, False otherwise
3143
   */
3144
  public static function intl_loaded()
3145
  {
3146
    return extension_loaded('intl') ? true : false;
3147
  }
3148
3149
  /**
3150
   * alias for "UTF8::is_ascii()"
3151
   *
3152
   * @param string $str
3153
   *
3154
   * @return boolean
3155
   */
3156
  public static function isAscii($str)
3157
  {
3158
    return self::is_ascii($str);
3159
  }
3160
3161
  /**
3162
   * alias for "UTF8::is_base64"
3163
   *
3164
   * @param string $str
3165
   *
3166
   * @return bool
3167
   */
3168
  public static function isBase64($str)
3169
  {
3170
    return self::is_base64($str);
3171
  }
3172
3173
  /**
3174
   * alias for "UTF8::is_bom"
3175
   *
3176
   * @param string $utf8_chr
3177
   *
3178
   * @return boolean
3179 16
   */
3180
  public static function isBom($utf8_chr)
3181 16
  {
3182
    return self::is_bom($utf8_chr);
3183
  }
3184
3185
  /**
3186
   * Try to check if a string is a json-string...
3187
   *
3188
   * @param $str
3189
   *
3190
   * @return bool
3191
   */
3192 4
  public static function isJson($str)
3193
  {
3194 4
    $str = (string)$str;
3195
3196
    if (!isset($str[0])) {
3197
      return false;
3198
    }
3199
3200
    if (
3201
        is_object(json_decode($str))
3202
        &&
3203
        json_last_error() === JSON_ERROR_NONE
3204 1
    ) {
3205
      return true;
3206 1
    } else {
3207
      return false;
3208 1
    }
3209 1
  }
3210
3211
  /**
3212 1
   * check if string contains any html-tags <lall>
3213 1
   *
3214
   * @param string $str
3215 1
   *
3216
   * @return boolean
3217
   */
3218
  public static function isHtml($str)
3219
  {
3220
    $str = (string)$str;
3221
3222
    if (!isset($str[0])) {
3223
      return false;
3224
    }
3225
3226 4
    // init
3227
    $matches = array();
3228
3229 4
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3230
3231
    if (count($matches) == 0) {
3232 4
      return false;
3233
    } else {
3234 4
      return true;
3235 4
    }
3236 4
  }
3237 4
3238 3
  /**
3239
   * alias for "UTF8::is_utf8"
3240 4
   *
3241
   * @param string $str
3242
   *
3243
   * @return bool
3244
   */
3245
  public static function isUtf8($str)
3246
  {
3247
    return self::is_utf8($str);
3248
  }
3249
3250
  /**
3251
   * Checks if a string is 7 bit ASCII.
3252
   *
3253
   * @param    string $str The string to check.
3254
   *
3255
   * @return   bool <strong>true</strong> if it is ASCII<br />
3256
   *                <strong>false</strong> otherwise
3257
   */
3258
  public static function is_ascii($str)
3259
  {
3260
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3261
  }
3262
3263
  /**
3264
   * Returns true if the string is base64 encoded, false otherwise.
3265
   *
3266
   * @param string $str
3267
   *
3268
   * @return bool Whether or not $str is base64 encoded
3269
   */
3270
  public static function is_base64($str)
3271
  {
3272
    $str = (string)$str;
3273 2
3274
    if (!isset($str[0])) {
3275 2
      return false;
3276
    }
3277
3278
    if (base64_encode(base64_decode($str, true)) === $str) {
3279
      return true;
3280
    } else {
3281
      return false;
3282
    }
3283
  }
3284
3285 2
  /**
3286
   * Check if the input is binary... (is look like a hack)
3287 2
   *
3288 2
   * @param string $input
3289
   *
3290 2
   * @return bool
3291 2
   */
3292 2
  public static function is_binary($input)
3293 2
  {
3294 2
3295 2
    $testLength = strlen($input);
3296 2
3297 2
    if (
3298 2
        preg_match('~^[01]+$~', $input)
3299 1
        ||
3300 1
        substr_count($input, "\x00") > 0
3301 2
        ||
3302 2
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3303 2
    ) {
3304
      return true;
3305 2
    } else {
3306 2
      return false;
3307 2
    }
3308 2
  }
3309 2
3310 2
  /**
3311 2
   * Check if the file is binary.
3312 2
   *
3313 2
   * @param string $file
3314 1
   *
3315 1
   * @return boolean
3316 2
   */
3317 2
  public static function is_binary_file($file)
3318 2
  {
3319
    try {
3320 2
      $fp = fopen($file, 'r');
3321 1
      $block = fread($fp, 512);
3322 1
      fclose($fp);
3323
    } catch (\Exception $e) {
3324 1
      $block = '';
3325
    }
3326
3327
    return self::is_binary($block);
3328 2
  }
3329
3330 2
  /**
3331
   * Checks if the given string is exactly "UTF8 - Byte Order Mark".
3332
   *
3333
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3334
   *
3335
   * @param    string $utf8_chr The input string.
3336
   *
3337
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3338
   */
3339
  public static function is_bom($utf8_chr)
3340 2
  {
3341
    return ($utf8_chr === self::bom());
3342 2
  }
3343 2
3344
  /**
3345 2
   * Check if the string is UTF-16.
3346 2
   *
3347 2
   * @param string $str
3348 2
   *
3349 2
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3350 2
   */
3351 2 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3352 2
  {
3353 2
    if (self::is_binary($str)) {
3354
      self::checkForSupport();
3355
3356 2
      $maybeUTF16LE = 0;
3357 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3358 2
      if ($test !== false && strlen($test) > 1) {
3359
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3360 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3361 2
        if ($test3 === $test) {
3362 2
          $strChars = self::count_chars($str);
3363 1
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3364 1
            if (in_array($test3char, $strChars, true) === true) {
3365 1
              $maybeUTF16LE++;
3366 1
            }
3367 1
          }
3368 1
        }
3369
      }
3370
3371 1
      $maybeUTF16BE = 0;
3372 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3373 1
      if ($test !== false && strlen($test) > 1) {
3374
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3375 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3376
        if ($test3 === $test) {
3377
          $strChars = self::count_chars($str);
3378
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3379
            if (in_array($test3char, $strChars, true) === true) {
3380
              $maybeUTF16BE++;
3381
            }
3382
          }
3383 2
        }
3384
      }
3385 2
3386
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3387
        if ($maybeUTF16LE > $maybeUTF16BE) {
3388
          return 1;
3389
        } else {
3390
          return 2;
3391
        }
3392
      }
3393
3394
    }
3395
3396
    return false;
3397 34
  }
3398
3399 34
  /**
3400
   * Check if the string is UTF-32.
3401 34
   *
3402 3
   * @param string $str
3403
   *
3404
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3405 32
   */
3406 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3407
  {
3408
    if (self::is_binary($str)) {
3409
      self::checkForSupport();
3410
3411
      $maybeUTF32LE = 0;
3412
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3413
      if ($test !== false && strlen($test) > 1) {
3414
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3415 32
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3416
        if ($test3 === $test) {
3417 32
          $strChars = self::count_chars($str);
3418 32
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3419 32
            if (in_array($test3char, $strChars, true) === true) {
3420
              $maybeUTF32LE++;
3421
            }
3422 32
          }
3423 32
        }
3424 32
      }
3425
3426
      $maybeUTF32BE = 0;
3427 32
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3428
      if ($test !== false && strlen($test) > 1) {
3429 30
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3430 32
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3431
        if ($test3 === $test) {
3432 28
          $strChars = self::count_chars($str);
3433 28
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3434 28
            if (in_array($test3char, $strChars, true) === true) {
3435 28
              $maybeUTF32BE++;
3436 30
            }
3437
          }
3438 13
        }
3439 13
      }
3440 13
3441 13
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3442 23
        if ($maybeUTF32LE > $maybeUTF32BE) {
3443
          return 1;
3444 6
        } else {
3445 6
          return 2;
3446 6
        }
3447 6
      }
3448 12
3449
    }
3450
3451
    return false;
3452
  }
3453
3454
  /**
3455
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3456
   *
3457 3
   * @see    http://hsivonen.iki.fi/php-utf8/
3458 3
   *
3459 3
   * @param    string $str The string to be checked.
3460 3
   *
3461 7
   * @return   bool
3462
   */
3463 3
  public static function is_utf8($str)
3464 3
  {
3465 3
    $str = (string)$str;
3466 3
3467 3
    if (!isset($str[0])) {
3468
      return true;
3469
    }
3470
3471 3
    if (self::pcre_utf8_support() !== true) {
3472
3473 32
      // If even just the first character can be matched, when the /u
3474
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3475
      // invalid, nothing at all will match, even if the string contains
3476 30
      // some valid sequences
3477
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3478 28
3479 28
    } else {
3480 28
3481 28
      $mState = 0; // cached expected number of octets after the current octet
3482
      // until the beginning of the next UTF8 character sequence
3483
      $mUcs4 = 0; // cached Unicode character
3484
      $mBytes = 1; // cached expected number of octets in the current sequence
3485
      $len = strlen($str);
3486 28
3487
      /** @noinspection ForeachInvariantsInspection */
3488
      for ($i = 0; $i < $len; $i++) {
3489
        $in = ord($str[$i]);
3490
        if ($mState === 0) {
3491
          // When mState is zero we expect either a US-ASCII character or a
3492 28
          // multi-octet sequence.
3493 28
          if (0 === (0x80 & $in)) {
3494 28
            // US-ASCII, pass straight through.
3495 28
            $mBytes = 1;
3496 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3497 28
            // First octet of 2 octet sequence.
3498
            $mUcs4 = $in;
3499 28
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3500 28
            $mState = 1;
3501 5
            $mBytes = 2;
3502
          } elseif (0xE0 === (0xF0 & $in)) {
3503
            // First octet of 3 octet sequence.
3504 28
            $mUcs4 = $in;
3505 28
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3506 28
            $mState = 2;
3507 28
            $mBytes = 3;
3508 28 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3509
            // First octet of 4 octet sequence.
3510
            $mUcs4 = $in;
3511
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3512
            $mState = 3;
3513 13
            $mBytes = 4;
3514
          } elseif (0xF8 === (0xFC & $in)) {
3515
            /* First octet of 5 octet sequence.
3516 32
            *
3517
            * This is illegal because the encoded codepoint must be either
3518 14
            * (a) not the shortest form or
3519
            * (b) outside the Unicode range of 0-0x10FFFF.
3520
            * Rather than trying to resynchronize, we will carry on until the end
3521
            * of the sequence and let the later error handling code catch it.
3522
            */
3523
            $mUcs4 = $in;
3524
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3525
            $mState = 4;
3526
            $mBytes = 5;
3527 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3528
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3529
            $mUcs4 = $in;
3530
            $mUcs4 = ($mUcs4 & 1) << 30;
3531
            $mState = 5;
3532
            $mBytes = 6;
3533
          } else {
3534
            /* Current octet is neither in the US-ASCII range nor a legal first
3535
             * octet of a multi-octet sequence.
3536
             */
3537
            return false;
3538
          }
3539
        } else {
3540
          // When mState is non-zero, we expect a continuation of the multi-octet
3541
          // sequence
3542
          if (0x80 === (0xC0 & $in)) {
3543
            // Legal continuation.
3544
            $shift = ($mState - 1) * 6;
3545
            $tmp = $in;
3546
            $tmp = ($tmp & 0x0000003F) << $shift;
3547
            $mUcs4 |= $tmp;
3548
            /**
3549
             * End of the multi-octet sequence. mUcs4 now contains the final
3550
             * Unicode code point to be output
3551
             */
3552
            if (0 === --$mState) {
3553
              /*
3554
              * Check for illegal sequences and code points.
3555
              */
3556
              // From Unicode 3.1, non-shortest form is illegal
3557
              if (
3558 2
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3559
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3560 2
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3561
                  (4 < $mBytes) ||
3562 2
                  // From Unicode 3.2, surrogate characters are illegal.
3563 2
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3564 2
                  // Code points outside the Unicode range are illegal.
3565
                  ($mUcs4 > 0x10FFFF)
3566
              ) {
3567
                return false;
3568 2
              }
3569
              // initialize UTF8 cache
3570
              $mState = 0;
3571
              $mUcs4 = 0;
3572
              $mBytes = 1;
3573
            }
3574
          } else {
3575
            /**
3576
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3577
             * Incomplete multi-octet sequence.
3578
             */
3579
            return false;
3580
          }
3581
        }
3582
      }
3583
3584
      return true;
3585
    }
3586
  }
3587
3588
  /**
3589
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3590
   * Decodes a JSON string
3591
   *
3592
   * @link http://php.net/manual/en/function.json-decode.php
3593
   *
3594
   * @param string $json    <p>
3595
   *                        The <i>json</i> string being decoded.
3596
   *                        </p>
3597
   *                        <p>
3598
   *                        This function only works with UTF-8 encoded strings.
3599
   *                        </p>
3600
   *                        <p>PHP implements a superset of
3601
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3602
   *                        only supports these values when they are nested inside an array or an object.
3603
   *                        </p>
3604
   * @param bool   $assoc   [optional] <p>
3605
   *                        When <b>TRUE</b>, returned objects will be converted into
3606
   *                        associative arrays.
3607 1
   *                        </p>
3608
   * @param int    $depth   [optional] <p>
3609 1
   *                        User specified recursion depth.
3610
   *                        </p>
3611 1
   * @param int    $options [optional] <p>
3612
   *                        Bitmask of JSON decode options. Currently only
3613
   *                        <b>JSON_BIGINT_AS_STRING</b>
3614 1
   *                        is supported (default is to cast large integers as floats)
3615
   *                        </p>
3616
   *
3617 1
   * @return mixed the value encoded in <i>json</i> in appropriate
3618
   * PHP type. Values true, false and
3619
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3620
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3621
   * <i>json</i> cannot be decoded or if the encoded
3622
   * data is deeper than the recursion limit.
3623
   */
3624
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3625
  {
3626
    $json = self::filter($json);
3627 6
3628
    if (Bootup::is_php('5.4') === true) {
3629 6
      $json = json_decode($json, $assoc, $depth, $options);
3630
    } else {
3631
      $json = json_decode($json, $assoc, $depth);
3632
    }
3633
3634
    return $json;
3635
  }
3636
3637
  /**
3638
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3639
   * Returns the JSON representation of a value
3640
   *
3641
   * @link http://php.net/manual/en/function.json-encode.php
3642 24
   *
3643
   * @param mixed $value   <p>
3644 24
   *                       The <i>value</i> being encoded. Can be any type except
3645
   *                       a resource.
3646 24
   *                       </p>
3647 2
   *                       <p>
3648
   *                       All string data must be UTF-8 encoded.
3649
   *                       </p>
3650 23
   *                       <p>PHP implements a superset of
3651
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3652 23
   *                       only supports these values when they are nested inside an array or an object.
3653
   *                       </p>
3654
   * @param int   $options [optional] <p>
3655
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3656
   *                       <b>JSON_HEX_TAG</b>,
3657
   *                       <b>JSON_HEX_AMP</b>,
3658
   *                       <b>JSON_HEX_APOS</b>,
3659
   *                       <b>JSON_NUMERIC_CHECK</b>,
3660
   *                       <b>JSON_PRETTY_PRINT</b>,
3661
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3662 1
   *                       <b>JSON_FORCE_OBJECT</b>,
3663
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3664 1
   *                       constants is described on
3665
   *                       the JSON constants page.
3666
   *                       </p>
3667
   * @param int   $depth   [optional] <p>
3668 1
   *                       Set the maximum depth. Must be greater than zero.
3669
   *                       </p>
3670
   *
3671
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3672
   */
3673
  public static function json_encode($value, $options = 0, $depth = 512)
3674
  {
3675
    $value = self::filter($value);
3676
3677
    if (Bootup::is_php('5.5')) {
3678
      $json = json_encode($value, $options, $depth);
3679 1
    } else {
3680
      $json = json_encode($value, $options);
3681 1
    }
3682 1
3683 1
    return $json;
3684
  }
3685 1
3686
  /**
3687
   * Makes string's first char lowercase.
3688
   *
3689
   * @param    string $str The input string
3690
   *
3691
   * @return   string The resulting string
3692
   */
3693
  public static function lcfirst($str)
3694 2
  {
3695
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3696 2
  }
3697
3698 2
  /**
3699 2
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3700 2
   *
3701
   * WARNING: This is much slower then "ltrim()" !!!!
3702 2
   *
3703
   * @param    string $str   The string to be trimmed
3704
   * @param    string $chars Optional characters to be stripped
3705
   *
3706
   * @return   string The string with unwanted characters stripped from the left
3707
   */
3708 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3709
  {
3710
    $str = (string)$str;
3711
3712 1
    if (!isset($str[0])) {
3713
      return '';
3714 1
    }
3715
3716
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3717
3718 1
    return preg_replace("/^{$chars}+/u", '', $str);
3719
  }
3720
3721
  /**
3722
   * Returns the UTF-8 character with the maximum code point in the given data.
3723
   *
3724
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3725
   *
3726
   * @return   string The character with the highest code point than others.
3727
   */
3728 13 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3729
  {
3730 13
    if (is_array($arg)) {
3731
      $arg = implode($arg);
3732 13
    }
3733
3734
    return self::chr(max(self::codepoints($arg)));
3735 13
  }
3736 13
3737 13
  /**
3738 13
   * Calculates and returns the maximum number of bytes taken by any
3739 13
   * UTF-8 encoded character in the given string.
3740 13
   *
3741 13
   * @param    string $str The original Unicode string.
3742 13
   *
3743 13
   * @return   int An array of byte lengths of each character.
3744 13
   */
3745 13
  public static function max_chr_width($str)
3746 13
  {
3747 13
    $bytes = self::chr_size_list($str);
3748 13
    if (count($bytes) > 0) {
3749
      return (int)max($bytes);
3750 13
    } else {
3751 2
      return 0;
3752
    }
3753
  }
3754 13
3755
  /**
3756
   * checks whether mbstring is available on the server
3757
   *
3758
   * @return   bool True if available, False otherwise
3759
   */
3760
  public static function mbstring_loaded()
3761
  {
3762
    $return = extension_loaded('mbstring');
3763
3764 2
    if ($return === true) {
3765
      \mb_internal_encoding('UTF-8');
3766 2
    }
3767 2
3768
    return $return;
3769 2
  }
3770 1
3771 1
  /**
3772 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3773
   *
3774 2
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3775
   *
3776
   * @return   string The character with the lowest code point than others.
3777
   */
3778 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3779
  {
3780
    if (is_array($arg)) {
3781
      $arg = implode($arg);
3782
    }
3783
3784
    return self::chr(min(self::codepoints($arg)));
3785
  }
3786 8
3787
  /**
3788 8
   * Normalize the encoding-name input.
3789 8
   *
3790
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3791 8
   *
3792
   * @return string e.g.: ISO-8859-1, UTF-8, ISO-8859-5 etc.
3793 8
   */
3794
  public static function normalizeEncoding($encoding)
3795 2
  {
3796
    static $staticNormalizeEncodingCache = array();
3797 2
3798
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3799 1
      return $staticNormalizeEncodingCache[$encoding];
3800 1
    }
3801
3802 2
    if (!$encoding) {
3803 2
      return $encoding;
3804
    } else {
3805 8
      $encodingOrig = $encoding;
3806 8
    }
3807 1
3808 1
    $encoding = (string)$encoding;
3809
    if (!isset($encoding[0])) {
3810 8
      return '';
3811 8
    }
3812
3813 8
    $encoding = strtoupper($encoding);
3814
3815
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3816
3817
    $equivalences = array(
3818
        'ISO88591'    => 'ISO-8859-1',
3819
        'ISO8859'     => 'ISO-8859-1',
3820
        'ISO'         => 'ISO-8859-1',
3821
        'LATIN1'      => 'ISO-8859-1',
3822
        'LATIN'       => 'ISO-8859-1',
3823
        'UTF16'       => 'UTF-16',
3824
        'UTF32'       => 'UTF-32',
3825
        'UTF8'        => 'UTF-8',
3826 1
        'UTF'         => 'UTF-8',
3827
        'UTF7'        => 'UTF-7',
3828 1
        'WIN1252'     => 'ISO-8859-1',
3829 1
        'WINDOWS1252' => 'ISO-8859-1',
3830
        'WINDOWS1251' => 'ISO-8859-5',
3831
    );
3832
3833
    if (!empty($equivalences[$encodingUpperHelper])) {
3834
      $encoding = $equivalences[$encodingUpperHelper];
3835
    }
3836
3837
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3838
3839
    return $encoding;
3840
  }
3841
3842 1
  /**
3843
   * Normalize MS Word special characters.
3844 1
   *
3845
   * @param string $str The string to be normalized.
3846
   *
3847
   * @return string
3848
   */
3849
  public static function normalize_msword($str)
3850
  {
3851
    static $utf8MSWordKeys = null;
3852
    static $utf8MSWordValues = null;
3853
3854
    if ($utf8MSWordKeys === null) {
3855 15
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
3856
      $utf8MSWordValues = array_values(self::$utf8MSWord);
3857 15
    }
3858 2
3859
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
3860
  }
3861 14
3862 14
  /**
3863
   * Normalize the whitespace.
3864 14
   *
3865 2
   * @param string $str                     The string to be normalized.
3866
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
3867
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
3868 13
   *
3869 7
   * @return string
3870
   */
3871
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3872 12
  {
3873 8
    static $whitespaces = array();
3874
    static $bidiUniCodeControls = null;
3875
3876 10
    $cacheKey = (int)$keepNonBreakingSpace;
3877
3878
    if (!isset($whitespaces[$cacheKey])) {
3879
3880
      $whitespaces[$cacheKey] = self::$whitespaceTable;
3881
3882
      if ($keepNonBreakingSpace === true) {
3883
        /** @noinspection OffsetOperationsInspection */
3884
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
3885
      }
3886
3887
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
3888
    }
3889
3890
    if ($keepBidiUnicodeControls === false) {
3891
      if ($bidiUniCodeControls === null) {
3892
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
3893
      }
3894
3895
      $str = str_replace($bidiUniCodeControls, '', $str);
3896
    }
3897 1
3898
    return str_replace($whitespaces[$cacheKey], ' ', $str);
3899
  }
3900 1
3901
  /**
3902 1
   * Format a number with grouped thousands.
3903
   *
3904 1
   * @param float  $number
3905 1
   * @param int    $decimals
3906
   * @param string $dec_point
3907
   * @param string $thousands_sep
3908
   *
3909
   * @return string
3910
   */
3911
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3912 33
  {
3913
    $thousands_sep = (string)$thousands_sep;
3914
    $dec_point = (string)$dec_point;
3915 33
3916
    if (
3917
      isset($thousands_sep[1], $dec_point[1])
3918
      &&
3919
      Bootup::is_php('5.4') === true
3920
    ) {
3921
        return str_replace(
3922
            array(
3923
                '.',
3924
                ',',
3925
            ),
3926 1
            array(
3927
                $dec_point,
3928 1
                $thousands_sep,
3929 1
            ),
3930
            number_format($number, $decimals, '.', ',')
3931
        );
3932 1
    }
3933
3934 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3935
  }
3936
3937 1
  /**
3938
   * Calculates Unicode code point of the given UTF-8 encoded character.
3939
   *
3940 1
   * @param    string $s The character of which to calculate code point.
3941
   *
3942
   * @return   int Unicode code point of the given character,<br />
3943
   *           0 on invalid UTF-8 byte sequence.
3944 1
   */
3945
  public static function ord($s)
3946 1
  {
3947
    if (!$s  && $s !== '0') {
3948
      return 0;
3949 1
    }
3950
3951
    $s = unpack('C*', substr($s, 0, 4));
3952 1
    $a = $s ? $s[1] : 0;
3953
3954
    if (0xF0 <= $a && isset($s[4])) {
3955
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
3956 1
    }
3957
3958 1
    if (0xE0 <= $a && isset($s[3])) {
3959 1
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
3960 1
    }
3961 1
3962 1
    if (0xC0 <= $a && isset($s[2])) {
3963
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
3964
    }
3965
3966
    return $a;
3967
  }
3968
3969
  /**
3970
   * Parses the string into variables.
3971
   *
3972
   * WARNING: This differs from parse_str() by returning the results
3973
   *    instead of placing them in the local scope!
3974
   *
3975 7
   * @link http://php.net/manual/en/function.parse-str.php
3976
   *
3977 7
   * @param string $str     <p>
3978
   *                        The input string.
3979
   *                        </p>
3980 7
   * @param array  $result  <p>
3981 2
   *                        If the second parameter arr is present,
3982 2
   *                        variables are stored in this variable as array elements instead.
3983 7
   *                        </p>
3984
   *
3985 7
   * @return void
3986
   */
3987
  public static function parse_str($str, &$result)
3988 3
  {
3989 1
    // init
3990 1
    self::checkForSupport();
3991
3992
    $str = self::filter($str);
3993
3994 3
    \mb_parse_str($str, $result);
3995 1
  }
3996 1
3997 3
  /**
3998
   * checks if \u modifier is available that enables Unicode support in PCRE.
3999 7
   *
4000
   * @return   bool True if support is available, false otherwise
4001
   */
4002 3
  public static function pcre_utf8_support()
4003 1
  {
4004 1
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4005
    return (bool)@preg_match('//u', '');
4006
  }
4007
4008 3
  /**
4009 1
   * Create an array containing a range of UTF-8 characters.
4010 1
   *
4011 3
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4012
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4013 7
   *
4014
   * @return   array
4015
   */
4016
  public static function range($var1, $var2)
4017
  {
4018
    if (!$var1 || !$var2) {
4019
      return array();
4020
    }
4021
4022 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4023
      $start = (int)$var1;
4024 1
    } elseif (ctype_xdigit($var1)) {
4025
      $start = (int)self::hex_to_int($var1);
4026 1
    } else {
4027 1
      $start = self::ord($var1);
4028 1
    }
4029
4030 1
    if (!$start) {
4031 1
      return array();
4032 1
    }
4033 1
4034 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4035
      $end = (int)$var2;
4036 1
    } elseif (ctype_xdigit($var2)) {
4037
      $end = (int)self::hex_to_int($var2);
4038
    } else {
4039
      $end = self::ord($var2);
4040
    }
4041
4042
    if (!$end) {
4043
      return array();
4044
    }
4045
4046
    return array_map(
4047
        array(
4048
            '\\voku\\helper\\UTF8',
4049
            'chr',
4050
        ),
4051
        range($start, $end)
4052 36
    );
4053
  }
4054
4055 36
  /**
4056
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4057
   *
4058
   * @param string $str
4059 36
   *
4060 36
   * @return string
4061 36
   */
4062 36
  public static function removeBOM($str = '')
4063
  {
4064 36
    // INFO: https://en.wikipedia.org/wiki/Byte_order_mark
4065
4066
    if (0 === strpos($str, "\xef\xbb\xbf")) { // UTF-8 BOM
4067 36
      $str = substr($str, 3);
4068 36
    } elseif (0 === strpos($str, '')) { // UTF-8 BOM as "WINDOWS-1252"
4069
      $str = substr($str, 6); // INFO: one char has (maybe) more then one byte ...
4070 36
    } elseif (0 === strpos($str, "\x00\x00\xfe\xff")) { // UTF-32 (BE) BOM
4071
      $str = substr($str, 4);
4072
    } elseif (0 === strpos($str, "\xff\xfe\x00\x00")) { // UTF-32 (LE) BOM
4073
      $str = substr($str, 4);
4074
    } elseif (0 === strpos($str, "\xfe\xff")) { // UTF-16 (BE) BOM
4075
      $str = substr($str, 2);
4076
    } elseif (0 === strpos($str, 'þÿ')) { // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4077
      $str = substr($str, 4);
4078
    } elseif (0 === strpos($str, "\xff\xfe")) { // UTF-16 (LE) BOM
4079
      $str = substr($str, 2);
4080
    } elseif (0 === strpos($str, 'ÿþ')) { // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4081 36
      $str = substr($str, 4);
4082
    }
4083 36
4084
    return $str;
4085 36
  }
4086 36
4087 36
  /**
4088
   * Removes duplicate occurrences of a string in another string.
4089 36
   *
4090 36
   * @param    string       $str  The base string
4091 36
   * @param    string|array $what String to search for in the base string
4092
   *
4093 36
   * @return   string The result string with removed duplicates
4094
   */
4095
  public static function remove_duplicates($str, $what = ' ')
4096
  {
4097
    if (is_string($what)) {
4098
      $what = array($what);
4099
    }
4100
4101
    if (is_array($what)) {
4102
      foreach ($what as $item) {
4103
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4104
      }
4105
    }
4106 23
4107
    return $str;
4108 23
  }
4109
4110 23
  /**
4111 5
   * Remove Invisible Characters
4112
   *
4113
   * This prevents sandwiching null characters
4114 19
   * between ascii characters, like Java\0script.
4115
   *
4116 19
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4117
   *
4118
   * @param  string $str
4119
   * @param  bool   $url_encoded
4120
   *
4121
   * @return  string
4122
   */
4123
  public static function remove_invisible_characters($str, $url_encoded = true)
4124
  {
4125
    // init
4126
    $non_displayables = array();
4127 40
4128
    // every control character except newline (dec 10),
4129 40
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4130
    if ($url_encoded) {
4131 40
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4132
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4133 40
    }
4134 30
4135
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4136
4137 16
    do {
4138
      $str = preg_replace($non_displayables, '', $str, -1, $count);
4139 16
    } while ($count !== 0);
4140 15
4141
    return $str;
4142 15
  }
4143 14
4144 15
  /**
4145 1
   * replace diamond question mark (�)
4146 1
   *
4147
   * @param string $str
4148
   * @param string $unknown
4149 16
   *
4150
   * @return string
4151 16
   */
4152
  public static function replace_diamond_question_mark($str, $unknown = '?')
4153 16
  {
4154 16
    return str_replace(
4155 16
        array(
4156
            "\xEF\xBF\xBD",
4157
            '�',
4158
        ),
4159 16
        array(
4160
            $unknown,
4161 16
            $unknown,
4162
        ),
4163
        $str
4164
    );
4165
  }
4166
4167
  /**
4168
   * Strip whitespace or other characters from end of a UTF-8 string.
4169
   *
4170
   * WARNING: This is much slower then "rtrim()" !!!!
4171
   *
4172
   * @param    string $str   The string to be trimmed
4173
   * @param    string $chars Optional characters to be stripped
4174
   *
4175
   * @return   string The string with unwanted characters stripped from the right
4176
   */
4177 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4178
  {
4179
    $str = (string)$str;
4180
4181 2
    if (!isset($str[0])) {
4182
      return '';
4183 2
    }
4184 1
4185
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4186
4187 2
    return preg_replace("/{$chars}+$/u", '', $str);
4188
  }
4189
4190
  /**
4191
   * rxClass
4192
   *
4193
   * @param string $s
4194
   * @param string $class
4195
   *
4196
   * @return string
4197
   */
4198
  protected static function rxClass($s, $class = '')
4199 25
  {
4200
    static $rxClassCache = array();
4201 25
4202
    $cacheKey = $s . $class;
4203 25
4204 5
    if (isset($rxClassCache[$cacheKey])) {
4205
      return $rxClassCache[$cacheKey];
4206
    }
4207
4208 24
    $class = array($class);
4209 24
4210 24
    /** @noinspection SuspiciousLoopInspection */
4211
    foreach (self::str_split($s) as $s) {
4212 24
      if ('-' === $s) {
4213
        $class[0] = '-' . $class[0];
4214 24
      } elseif (!isset($s[2])) {
4215
        $class[0] .= preg_quote($s, '/');
4216
      } elseif (1 === self::strlen($s)) {
4217
        $class[0] .= $s;
4218 24
      } else {
4219 24
        $class[] = $s;
4220 24
      }
4221 24
    }
4222 24
4223
    $class[0] = '[' . $class[0] . ']';
4224 24
4225
    if (1 === count($class)) {
4226
      $return = $class[0];
4227
    } else {
4228
      $return = '(?:' . implode('|', $class) . ')';
4229
    }
4230
4231
    $rxClassCache[$cacheKey] = $return;
4232
4233
    return $return;
4234
  }
4235
4236
  /**
4237
   * Echo native UTF8-Support libs, e.g. for debugging.
4238
   */
4239
  public static function showSupport()
4240
  {
4241
    foreach (self::$support as $utf8Support) {
4242
      echo $utf8Support . "\n<br>";
4243
    }
4244
  }
4245
4246
  /**
4247
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4248
   *
4249
   * @param    string $chr The Unicode character to be encoded as numbered entity.
4250
   *
4251
   * @return   string The HTML numbered entity.
4252
   */
4253
  public static function single_chr_html_encode($chr)
4254
  {
4255
    if (!$chr) {
4256 24
      return '';
4257 5
    }
4258
4259 5
    return '&#' . self::ord($chr) . ';';
4260 5
  }
4261
4262 24
  /**
4263
   * Convert a string to an array of Unicode characters.
4264
   *
4265
   * @param    string  $str       The string to split into array.
4266 24
   * @param    int     $length    Max character length of each array element.
4267
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4268
   *
4269
   * @return   array An array containing chunks of the string.
4270
   */
4271
  public static function split($str, $length = 1, $cleanUtf8 = false)
4272
  {
4273
    $str = (string)$str;
4274
4275
    if (!isset($str[0])) {
4276
      return array();
4277 3
    }
4278
4279
    // init
4280
    self::checkForSupport();
4281
    $str = (string)$str;
4282
    $ret = array();
4283
4284 3
    if (self::$support['pcre_utf8'] === true) {
4285 2
4286 1
      if ($cleanUtf8 === true) {
4287 2
        $str = self::clean($str);
4288 1
      }
4289 2
4290
      preg_match_all('/./us', $str, $retArray);
4291 2
      if (isset($retArray[0])) {
4292
        $ret = $retArray[0];
4293
      }
4294 2
      unset($retArray);
4295
4296
    } else {
4297
4298
      // fallback
4299
4300 3
      $len = strlen($str);
4301 1
4302
      /** @noinspection ForeachInvariantsInspection */
4303
      for ($i = 0; $i < $len; $i++) {
4304
        if (($str[$i] & "\x80") === "\x00") {
4305
          $ret[] = $str[$i];
4306
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4307
          if (($str[$i + 1] & "\xC0") === "\x80") {
4308
            $ret[] = $str[$i] . $str[$i + 1];
4309
4310 3
            $i++;
4311 3
          }
4312 3 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4313 3
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4314 3
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4315 3
4316 3
            $i += 2;
4317 3
          }
4318
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4319 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4320 3
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4321 3
4322 3
            $i += 3;
4323 3
          }
4324
        }
4325
      }
4326
    }
4327
4328
    if ($length > 1) {
4329
      $ret = array_chunk($ret, $length);
4330
4331
      $ret = array_map('implode', $ret);
4332
    }
4333
4334
    if (isset($ret[0]) && $ret[0] === '') {
4335
      return array();
4336
    }
4337
4338
    return $ret;
4339
  }
4340
4341
  /**
4342
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4343
   *
4344
   * @param string $str
4345
   *
4346
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4347
   *                      otherwise it will return false.
4348
   */
4349
  public static function str_detect_encoding($str)
4350
  {
4351
4352
    //
4353 13
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4354
    //
4355 13
4356
    if (self::is_binary($str)) {
4357
      if (self::is_utf16($str) === 1) {
4358 13
        return 'UTF-16LE';
4359 13
      } elseif (self::is_utf16($str) === 2) {
4360 1
        return 'UTF-16BE';
4361 1
      } elseif (self::is_utf32($str) === 1) {
4362 12
        return 'UTF-32LE';
4363
      } elseif (self::is_utf32($str) === 2) {
4364 13
        return 'UTF-32BE';
4365
      }
4366 13
    }
4367 13
4368
    //
4369 13
    // 2.) simple check for ASCII chars
4370
    //
4371
4372
    if (self::is_ascii($str) === true) {
4373
      return 'ASCII';
4374
    }
4375
4376
    //
4377
    // 3.) simple check for UTF-8 chars
4378
    //
4379
4380
    if (self::is_utf8($str) === true) {
4381 1
      return 'UTF-8';
4382
    }
4383 1
4384
    //
4385
    // 4.) check via "\mb_detect_encoding()"
4386
    //
4387 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4388
4389 1
    $detectOrder = array(
4390
        'windows-1251',
4391
        'ISO-8859-1',
4392
        'ASCII',
4393 1
        'UTF-8',
4394 1
    );
4395
4396
    self::checkForSupport();
4397 1
4398 1
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4399 1
    if ($encoding) {
4400 1
      return $encoding;
4401
    }
4402 1
4403
    //
4404
    // 5.) check via "iconv()"
4405 1
    //
4406
4407
    $md5 = md5($str);
4408 1
    foreach (self::$iconvEncoding as $encodingTmp) {
4409
      # INFO: //IGNORE and //TRANSLIT still throw notice
4410
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4411
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4412
        return $encodingTmp;
4413
      }
4414
    }
4415
4416
    return false;
4417
  }
4418
4419
  /**
4420
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4421 2
   *
4422
   * @link  http://php.net/manual/en/function.str-ireplace.php
4423 2
   *
4424
   * @param mixed $search  <p>
4425 2
   *                       Every replacement with search array is
4426 2
   *                       performed on the result of previous replacement.
4427
   *                       </p>
4428 2
   * @param mixed $replace <p>
4429
   *                       </p>
4430
   * @param mixed $subject <p>
4431 2
   *                       If subject is an array, then the search and
4432 2
   *                       replace is performed with every entry of
4433 2
   *                       subject, and the return value is an array as
4434 2
   *                       well.
4435 2
   *                       </p>
4436
   * @param int   $count   [optional] <p>
4437 2
   *                       The number of matched and replaced needles will
4438 2
   *                       be returned in count which is passed by
4439 2
   *                       reference.
4440 2
   *                       </p>
4441 2
   *
4442 2
   * @return mixed a string or an array of replacements.
4443
   * @since 5.0
4444 2
   */
4445 2
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4446 2
  {
4447 2
    $search = (array)$search;
4448 2
4449 2
    /** @noinspection AlterInForeachInspection */
4450
    foreach ($search as &$s) {
4451 2
      if ('' === $s .= '') {
4452
        $s = '/^(?<=.)$/';
4453
      } else {
4454 2
        $s = '/' . preg_quote($s, '/') . '/ui';
4455
      }
4456
    }
4457
4458
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4459
    $count = $replace;
4460
4461
    return $subject;
4462
  }
4463
4464
  /**
4465
   * Limit the number of characters in a string, but also after the next word.
4466
   *
4467
   * @param  string $str
4468
   * @param  int    $length
4469
   * @param  string $strAddOn
4470
   *
4471
   * @return string
4472
   */
4473
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4474
  {
4475 1
    $str = (string)$str;
4476
4477 1
    if (!isset($str[0])) {
4478
      return '';
4479 1
    }
4480
4481
    $length = (int)$length;
4482
4483
    if (self::strlen($str) <= $length) {
4484
      return $str;
4485
    }
4486
4487
    if (self::substr($str, $length - 1, 1) === ' ') {
4488
      return self::substr($str, 0, $length - 1) . $strAddOn;
4489
    }
4490
4491
    $str = self::substr($str, 0, $length);
4492
    $array = explode(' ', $str);
4493
    array_pop($array);
4494
    $new_str = implode(' ', $array);
4495
4496
    if ($new_str === '') {
4497
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4498
    } else {
4499
      $str = $new_str . $strAddOn;
4500
    }
4501
4502
    return $str;
4503
  }
4504
4505
  /**
4506
   * Pad a UTF-8 string to given length with another string.
4507
   *
4508
   * @param    string $input      The input string
4509
   * @param    int    $pad_length The length of return string
4510
   * @param    string $pad_string String to use for padding the input string
4511
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4512 12
   *
4513
   * @return   string Returns the padded string
4514 12
   */
4515
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4516
  {
4517
    $input_length = self::strlen($input);
4518
4519
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4520
      $ps_length = self::strlen($pad_string);
4521
4522
      $diff = $pad_length - $input_length;
4523
4524
      switch ($pad_type) {
4525 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4526
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4527
          $pre = self::substr($pre, 0, $diff);
4528
          $post = '';
4529
          break;
4530
4531
        case STR_PAD_BOTH:
4532
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4533
          $pre = self::substr($pre, 0, (int)$diff / 2);
4534
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4535
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4536
          break;
4537
4538
        case STR_PAD_RIGHT:
4539 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4540
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4541
          $post = self::substr($post, 0, $diff);
4542 1
          $pre = '';
4543
      }
4544 1
4545
      return $pre . $input . $post;
4546 1
    }
4547 1
4548 1
    return $input;
4549
  }
4550 1
4551 1
  /**
4552 1
   * Repeat a string.
4553 1
   *
4554
   * @param string $input      <p>
4555
   *                           The string to be repeated.
4556 1
   *                           </p>
4557
   * @param int    $multiplier <p>
4558
   *                           Number of time the input string should be
4559
   *                           repeated.
4560
   *                           </p>
4561
   *                           <p>
4562
   *                           multiplier has to be greater than or equal to 0.
4563
   *                           If the multiplier is set to 0, the function
4564
   *                           will return an empty string.
4565
   *                           </p>
4566
   *
4567 17
   * @return string the repeated string.
4568
   */
4569
  public static function str_repeat($input, $multiplier)
4570 17
  {
4571
    $input = self::filter($input);
4572 17
4573
    return str_repeat($input, $multiplier);
4574
  }
4575
4576
  /**
4577
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4578 17
   *
4579 17
   * (PHP 4, PHP 5)<br/>
4580 17
   * Replace all occurrences of the search string with the replacement string
4581 17
   *
4582 17
   * @link http://php.net/manual/en/function.str-replace.php
4583 16
   *
4584 16
   * @param mixed $search  <p>
4585 17
   *                       The value being searched for, otherwise known as the needle.
4586
   *                       An array may be used to designate multiple needles.
4587
   *                       </p>
4588
   * @param mixed $replace <p>
4589
   *                       The replacement value that replaces found search
4590 17
   *                       values. An array may be used to designate multiple replacements.
4591 17
   *                       </p>
4592
   * @param mixed $subject <p>
4593
   *                       The string or array being searched and replaced on,
4594 1
   *                       otherwise known as the haystack.
4595 1
   *                       </p>
4596
   *                       <p>
4597
   *                       If subject is an array, then the search and
4598 1
   *                       replace is performed with every entry of
4599 1
   *                       subject, and the return value is an array as
4600 1
   *                       well.
4601 1
   *                       </p>
4602 1
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4603
   *
4604 1
   * @return mixed This function returns a string or an array with the replaced values.
4605
   */
4606 1
  public static function str_replace($search, $replace, $subject, &$count = null)
4607
  {
4608
    return str_replace($search, $replace, $subject, $count);
4609
  }
4610
4611
  /**
4612
   * Shuffles all the characters in the string.
4613
   *
4614
   * @param    string $str The input string
4615
   *
4616 1
   * @return   string The shuffled string.
4617
   */
4618 1
  public static function str_shuffle($str)
4619
  {
4620 1
    $array = self::split($str);
4621
4622
    shuffle($array);
4623
4624
    return implode('', $array);
4625 1
  }
4626 1
4627
  /**
4628
   * Sort all characters according to code points.
4629 1
   *
4630 1
   * @param    string $str    A UTF-8 string.
4631 1
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4632
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4633 1
   *
4634
   * @return   string String of sorted characters
4635
   */
4636
  public static function str_sort($str, $unique = false, $desc = false)
4637
  {
4638
    $array = self::codepoints($str);
4639
4640
    if ($unique) {
4641
      $array = array_flip(array_flip($array));
4642
    }
4643
4644
    if ($desc) {
4645
      arsort($array);
4646
    } else {
4647
      asort($array);
4648
    }
4649
4650
    return self::string($array);
4651
  }
4652
4653
  /**
4654 8
   * Convert a string to an array.
4655
   *
4656 8
   * @param string $str
4657
   * @param int    $len
4658 8
   *
4659
   * @return array
4660 8
   */
4661 2
  public static function str_split($str, $len = 1)
4662
  {
4663
    // init
4664 7
    self::checkForSupport();
4665
    $len = (int)$len;
4666 7
4667 7
    if ($len < 1) {
4668 7
      return str_split($str, $len);
4669
    }
4670 7
4671
    if (self::$support['intl'] === true) {
4672 7
      $a = array();
4673 6
      $p = 0;
4674
      $l = strlen($str);
4675
      while ($p < $l) {
4676 4
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4677
      }
4678
    } else {
4679 4
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4680 4
      $a = $a[0];
4681 4
    }
4682
4683 4
    if ($len === 1) {
4684 3
      return $a;
4685
    }
4686 3
4687 3
    $arrayOutput = array();
4688 3
    $p = -1;
4689
4690 3
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4691 1
    foreach ($a as $l => $a) {
4692
      if ($l % $len) {
4693 1
        $arrayOutput[$p] .= $a;
4694 1
      } else {
4695 1
        $arrayOutput[++$p] = $a;
4696
      }
4697 1
    }
4698
4699
    return $arrayOutput;
4700
  }
4701
4702
  /**
4703
   * Get a binary representation of a specific character.
4704
   *
4705
   * @param   string $str The input character.
4706
   *
4707
   * @return  string
4708
   */
4709
  public static function str_to_binary($str)
4710
  {
4711
    $str = (string)$str;
4712 1
4713 3
    if (!isset($str[0])) {
4714
      return '';
4715 4
    }
4716
4717
    // init
4718
    $out = null;
4719
    $max = strlen($str);
4720 4
4721
    /** @noinspection ForeachInvariantsInspection */
4722
    for ($i = 0; $i < $max; ++$i) {
4723
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4724
    }
4725 4
4726 4
    return $out;
4727 2
  }
4728 2
4729
  /**
4730 2
   * US-ASCII transliterations of Unicode text.
4731 2
   *
4732 1
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4733
   * Warning: you should only pass this well formed UTF-8!
4734 2
   * Be aware it works by making a copy of the input string which it appends transliterated
4735
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4736 4
   * requiring up to the same amount again as the input string
4737 4
   *
4738 4
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4739 4
   *
4740 1
   * @author <[email protected]>
4741
   *
4742 7
   * @param string $str     UTF-8 string to convert
4743
   * @param string $unknown Character use if character unknown. (default is ?)
4744 7
   *
4745
   * @return string US-ASCII string
4746
   */
4747
  public static function str_transliterate($str, $unknown = '?')
4748
  {
4749
    static $UTF8_TO_ASCII;
4750
4751
    $str = (string)$str;
4752
4753
    if (!isset($str[0])) {
4754
      return '';
4755
    }
4756 1
4757
    $str = self::clean($str);
4758 1
4759 1
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4760 1
    $chars = $ar[0];
4761 1
    foreach ($chars as &$c) {
4762
4763 1
      $ordC0 = ord($c[0]);
4764
4765
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4766
        continue;
4767 1
      }
4768
4769
      $ordC1 = ord($c[1]);
4770
4771
      // ASCII - next please
4772
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4773
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4774
      }
4775
4776 1
      if ($ordC0 >= 224) {
4777
        $ordC2 = ord($c[2]);
4778
4779 1
        if ($ordC0 <= 239) {
4780
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4781
        }
4782
4783
        if ($ordC0 >= 240) {
4784
          $ordC3 = ord($c[3]);
4785
4786
          if ($ordC0 <= 247) {
4787
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4788
          }
4789
4790 8
          if ($ordC0 >= 248) {
4791
            $ordC4 = ord($c[4]);
4792 8
4793 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4794
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4795
            }
4796
4797
            if ($ordC0 >= 252) {
4798
              $ordC5 = ord($c[5]);
4799
4800 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4801
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4802
              }
4803
            }
4804
          }
4805 8
        }
4806
      }
4807 8
4808 5
      if ($ordC0 >= 254 && $ordC0 <= 255) {
4809 5
        $c = $unknown;
4810 8
        continue;
4811
      }
4812
4813
      if (!isset($ord)) {
4814
        $c = $unknown;
4815
        continue;
4816
      }
4817
4818
      $bank = $ord >> 8;
4819
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
4820
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
4821
        if (file_exists($bankfile)) {
4822
          /** @noinspection PhpIncludeInspection */
4823 5
          require $bankfile;
4824
        } else {
4825 5
          $UTF8_TO_ASCII[$bank] = array();
4826
        }
4827
      }
4828
4829 5
      $newchar = $ord & 255;
4830
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
4831
        $c = $UTF8_TO_ASCII[$bank][$newchar];
4832 5
      } else {
4833
        $c = $unknown;
4834
      }
4835
    }
4836 5
4837 5
    return implode('', $chars);
4838
  }
4839
4840
  /**
4841
   * Counts number of words in the UTF-8 string.
4842
   *
4843
   * @param string $str The input string.
4844
   * @param int $format <strong>0</strong> => return a number of words<br />
4845
   *                    <strong>1</strong> => return an array of words
4846
   *                    <strong>2</strong> => return an array of words with word-offset as key
4847
   * @param string $charlist
4848
   *
4849
   * @return array|float The number of words in the string
4850 2
   */
4851
  public static function str_word_count($str, $format = 0, $charlist = '')
4852 2
  {
4853 2
    $charlist = self::rxClass($charlist, '\pL');
4854
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4855 2
4856 2
    $len = count($strParts);
4857 2
4858
    if ($format === 1) {
4859 2
4860 2
      $numberOfWords = array();
4861
      for ($i = 1; $i < $len; $i += 2) {
4862
        $numberOfWords[] = $strParts[$i];
4863
      }
4864
4865
    } elseif ($format === 2) {
4866
4867
      self::checkForSupport();
4868
4869
      $numberOfWords = array();
4870 1
      $offset = self::strlen($strParts[0]);
4871
      for ($i = 1; $i < $len; $i += 2) {
4872 1
        $numberOfWords[$offset] = $strParts[$i];
4873
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4874
      }
4875
4876
    } else {
4877
4878
      $numberOfWords = ($len - 1) / 2;
4879
4880
    }
4881
4882
    return $numberOfWords;
4883
  }
4884
4885
  /**
4886
   * Case-insensitive string comparison.
4887
   *
4888
   * @param string $str1
4889
   * @param string $str2
4890
   *
4891
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
4892
   */
4893
  public static function strcasecmp($str1, $str2)
4894 2
  {
4895
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4896
  }
4897 2
4898
  /**
4899 2
   * String comparison.
4900
   *
4901
   * @param string $str1
4902
   * @param string $str2
4903
   *
4904
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
4905
   *              <strong>> 0</strong> if str1 is greater than str2<br />
4906
   *              <strong>0</strong> if they are equal.
4907
   */
4908
  public static function strcmp($str1, $str2)
4909
  {
4910
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4911
        \Normalizer::normalize($str1, \Normalizer::NFD),
4912
        \Normalizer::normalize($str2, \Normalizer::NFD)
4913
    );
4914
  }
4915
4916
  /**
4917
   * Find length of initial segment not matching mask.
4918
   *
4919
   * @param string $str
4920
   * @param string $charList
4921
   * @param int    $offset
4922
   * @param int    $length
4923
   *
4924
   * @return int|null
4925 8
   */
4926
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4927 8
  {
4928 8
    if ('' === $charList .= '') {
4929
      return null;
4930 8
    }
4931 2
4932
    if ($offset || 2147483647 !== $length) {
4933
      $str = (string)self::substr($str, $offset, $length);
4934
    } else {
4935 7
      $str = (string)$str;
4936
    }
4937 7
4938 1
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4939 1
      /** @noinspection OffsetOperationsInspection */
4940 1
      return self::strlen($length[1]);
4941
    } else {
4942
      return self::strlen($str);
4943 7
    }
4944 1
  }
4945 1
4946
  /**
4947 7
   * Makes a UTF-8 string from code points.
4948
   *
4949
   * @param    array $array Integer or Hexadecimal codepoints
4950
   *
4951
   * @return   string UTF-8 encoded string
4952
   */
4953
  public static function string($array)
4954
  {
4955
    return implode(
4956
        array_map(
4957
            array(
4958
                '\\voku\\helper\\UTF8',
4959 7
                'chr',
4960
            ),
4961 7
            $array
4962 2
        )
4963
    );
4964
  }
4965
4966 5
  /**
4967
   * Checks if string starts with "UTF-8 BOM" character.
4968 5
   *
4969
   * @param    string $str The input string.
4970
   *
4971
   * @return   bool True if the string has BOM at the start, False otherwise.
4972
   */
4973
  public static function string_has_bom($str)
4974
  {
4975
    return self::is_bom(substr($str, 0, 3));
4976
  }
4977
4978
  /**
4979
   * Strip HTML and PHP tags from a string.
4980
   *
4981
   * @link http://php.net/manual/en/function.strip-tags.php
4982
   *
4983
   * @param string $str            <p>
4984
   *                               The input string.
4985 66
   *                               </p>
4986
   * @param string $allowable_tags [optional] <p>
4987 66
   *                               You can use the optional second parameter to specify tags which should
4988
   *                               not be stripped.
4989 66
   *                               </p>
4990 4
   *                               <p>
4991
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4992
   *                               can not be changed with allowable_tags.
4993
   *                               </p>
4994 65
   *
4995
   * @return string the stripped string.
4996
   */
4997 65
  public static function strip_tags($str, $allowable_tags = null)
4998
  {
4999
    //clean broken utf8
5000
    $str = self::clean($str);
5001 65
5002
    return strip_tags($str, $allowable_tags);
5003
  }
5004
5005 65
  /**
5006
   * Finds position of first occurrence of a string within another, case insensitive.
5007
   *
5008
   * @link http://php.net/manual/en/function.mb-stripos.php
5009
   *
5010
   * @param string  $haystack  <p>
5011
   *                           The string from which to get the position of the first occurrence
5012
   *                           of needle
5013
   *                           </p>
5014
   * @param string  $needle    <p>
5015
   *                           The string to find in haystack
5016
   *                           </p>
5017 1
   * @param int     $offset    [optional] <p>
5018
   *                           The position in haystack
5019 1
   *                           to start searching
5020
   *                           </p>
5021
   * @param string  $encoding
5022
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5023
   *
5024
   * @return int Return the numeric position of the first occurrence of
5025
   * needle in the haystack
5026
   * string, or false if needle is not found.
5027
   */
5028
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5029
  {
5030
    $haystack = (string)$haystack;
5031 2
    $needle = (string)$needle;
5032
5033 2
    if (!isset($haystack[0], $needle[0])) {
5034
      return false;
5035
    }
5036
5037
    // init
5038
    self::checkForSupport();
5039
5040
    if ($cleanUtf8 === true) {
5041
      $haystack = self::clean($haystack);
5042
      $needle = self::clean($needle);
5043
    }
5044
5045
    // INFO: this is only a fallback for old versions
5046
    if ($encoding === true || $encoding === false) {
5047
      $encoding = 'UTF-8';
5048
    }
5049
5050
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5051
  }
5052
5053
  /**
5054
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5055
   *
5056
   * @param string $str
5057
   * @param string $needle
5058
   * @param bool   $before_needle
5059
   *
5060
   * @return false|string
5061
   */
5062
  public static function stristr($str, $needle, $before_needle = false)
5063
  {
5064
    if ('' === $needle .= '') {
5065
      return false;
5066
    }
5067
5068
    // init
5069
    self::checkForSupport();
5070
5071
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5072
  }
5073
5074
  /**
5075
   * Get the string length, not the byte-length!
5076
   *
5077
   * @link     http://php.net/manual/en/function.mb-strlen.php
5078
   *
5079
   * @param string  $str       The string being checked for length.
5080
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5081
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5082
   *
5083
   * @return int the number of characters in
5084
   *           string str having character encoding
5085
   *           encoding. A multi-byte character is
5086
   *           counted as 1.
5087
   */
5088
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5089
  {
5090
    $str = (string)$str;
5091
5092
    if (!isset($str[0])) {
5093
      return 0;
5094
    }
5095
5096
    // init
5097
    self::checkForSupport();
5098
5099
    // INFO: this is only a fallback for old versions
5100
    if ($encoding === true || $encoding === false) {
5101
      $encoding = 'UTF-8';
5102
    }
5103 11
5104
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5105 11
      $str = self::clean($str);
5106 11
    }
5107
5108 11
    return \mb_strlen($str, $encoding);
5109 2
  }
5110
5111
  /**
5112
   * Case insensitive string comparisons using a "natural order" algorithm.
5113 10
   *
5114 10
   * @param string $str1
5115
   * @param string $str2
5116
   *
5117
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5118 10
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5119
   *             <strong>0</strong> if they are equal
5120
   */
5121
  public static function strnatcasecmp($str1, $str2)
5122 10
  {
5123
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5124
  }
5125
5126 1
  /**
5127 1
   * String comparisons using a "natural order" algorithm
5128 1
   *
5129
   * @link  http://php.net/manual/en/function.strnatcmp.php
5130 10
   *
5131
   * @param string $str1 <p>
5132
   *                     The first string.
5133 10
   *                     </p>
5134 1
   * @param string $str2 <p>
5135 1
   *                     The second string.
5136
   *                     </p>
5137 10
   *
5138
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5139
   * str1 is less than str2; &gt;
5140
   * 0 if str1 is greater than
5141
   * str2, and 0 if they are equal.
5142
   * @since 4.0
5143
   * @since 5.0
5144
   */
5145
  public static function strnatcmp($str1, $str2)
5146
  {
5147
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5148
  }
5149
5150
  /**
5151
   * Binary safe case-insensitive string comparison of the first n characters
5152
   *
5153
   * @link  http://php.net/manual/en/function.strncasecmp.php
5154
   *
5155
   * @param string $str1 <p>
5156
   *                     The first string.
5157
   *                     </p>
5158
   * @param string $str2 <p>
5159
   *                     The second string.
5160
   *                     </p>
5161
   * @param int    $len  <p>
5162
   *                     The length of strings to be used in the comparison.
5163
   *                     </p>
5164
   *
5165
   * @return int &lt; 0 if <i>str1</i> is less than
5166
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5167
   * greater than <i>str2</i>, and 0 if they are equal.
5168
   * @since 4.0.4
5169
   * @since 5.0
5170
   */
5171
  public static function strncasecmp($str1, $str2, $len)
5172
  {
5173
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5174
  }
5175
5176
  /**
5177
   * Binary safe string comparison of the first n characters
5178
   *
5179
   * @link  http://php.net/manual/en/function.strncmp.php
5180
   *
5181
   * @param string $str1 <p>
5182
   *                     The first string.
5183
   *                     </p>
5184
   * @param string $str2 <p>
5185
   *                     The second string.
5186 1
   *                     </p>
5187
   * @param int    $len  <p>
5188 1
   *                     Number of characters to use in the comparison.
5189
   *                     </p>
5190 1
   *
5191
   * @return int &lt; 0 if <i>str1</i> is less than
5192
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5193
   * is greater than <i>str2</i>, and 0 if they are
5194
   * equal.
5195
   * @since 4.0
5196
   * @since 5.0
5197
   */
5198
  public static function strncmp($str1, $str2, $len)
5199
  {
5200 4
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5201
  }
5202 4
5203
  /**
5204
   * Search a string for any of a set of characters
5205
   *
5206
   * @link  http://php.net/manual/en/function.strpbrk.php
5207
   *
5208
   * @param string $haystack  <p>
5209
   *                          The string where char_list is looked for.
5210
   *                          </p>
5211
   * @param string $char_list <p>
5212
   *                          This parameter is case sensitive.
5213
   *                          </p>
5214
   *
5215
   * @return string a string starting from the character found, or false if it is
5216
   * not found.
5217
   * @since 5.0
5218
   */
5219
  public static function strpbrk($haystack, $char_list)
5220
  {
5221
    $haystack = (string)$haystack;
5222
    $char_list = (string)$char_list;
5223
5224
    if (!isset($haystack[0], $char_list[0])) {
5225
      return false;
5226
    }
5227
5228
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5229
      return substr($haystack, strpos($haystack, $m[0]));
5230
    } else {
5231
      return false;
5232
    }
5233 1
  }
5234
5235 1
  /**
5236
   * Find position of first occurrence of string in a string.
5237 1
   *
5238
   * @link http://php.net/manual/en/function.mb-strpos.php
5239
   *
5240
   * @param string  $haystack     <p>
5241
   *                              The string being checked.
5242
   *                              </p>
5243
   * @param string  $needle       <p>
5244
   *                              The position counted from the beginning of haystack.
5245
   *                              </p>
5246
   * @param int     $offset       [optional] <p>
5247
   *                              The search offset. If it is not specified, 0 is used.
5248
   *                              </p>
5249 1
   * @param string  $encoding
5250
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5251 1
   *
5252
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5253
   *             If needle is not found it returns false.
5254
   */
5255
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5256
  {
5257
    $haystack = (string)$haystack;
5258
    $needle = (string)$needle;
5259
5260
    if (!isset($haystack[0], $needle[0])) {
5261
      return false;
5262
    }
5263
5264
    // init
5265
    self::checkForSupport();
5266
    $offset = (int)$offset;
5267
5268
    // iconv and mbstring do not support integer $needle
5269
5270
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5271
      $needle = self::chr($needle);
5272
    }
5273
5274
    if ($cleanUtf8 === true) {
5275
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5276 10
      // iconv_strpos is not tolerant to invalid characters
5277
5278 10
      $needle = self::clean((string)$needle);
5279 10
      $haystack = self::clean($haystack);
5280
    }
5281 10
5282 2
    if (self::$support['mbstring'] === true) {
5283
5284
      // INFO: this is only a fallback for old versions
5285
      if ($encoding === true || $encoding === false) {
5286 9
        $encoding = 'UTF-8';
5287
      }
5288 9
5289
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5290
    }
5291
5292 9
    if (self::$support['iconv'] === true) {
5293 9
      // ignore invalid negative offset to keep compatility
5294
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5295 9
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5296
    }
5297
5298 1
    if ($offset > 0) {
5299 1
      $haystack = self::substr($haystack, $offset);
5300 1
    }
5301
5302 9 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5303 9
      $left = substr($haystack, 0, $pos);
5304
5305
      // negative offset not supported in PHP strpos(), ignoring
5306
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5307
    }
5308
5309
    return false;
5310
  }
5311
5312
  /**
5313
   * Finds the last occurrence of a character in a string within another.
5314
   *
5315
   * @link http://php.net/manual/en/function.mb-strrchr.php
5316
   *
5317
   * @param string $haystack <p>
5318
   *                         The string from which to get the last occurrence
5319
   *                         of needle
5320
   *                         </p>
5321
   * @param string $needle   <p>
5322
   *                         The string to find in haystack
5323
   *                         </p>
5324
   * @param bool   $part     [optional] <p>
5325
   *                         Determines which portion of haystack
5326
   *                         this function returns.
5327
   *                         If set to true, it returns all of haystack
5328
   *                         from the beginning to the last occurrence of needle.
5329
   *                         If set to false, it returns all of haystack
5330
   *                         from the last occurrence of needle to the end,
5331
   *                         </p>
5332
   * @param string $encoding [optional] <p>
5333
   *                         Character encoding name to use.
5334
   *                         If it is omitted, internal character encoding is used.
5335
   *                         </p>
5336
   *
5337
   * @return string the portion of haystack.
5338
   * or false if needle is not found.
5339 6
   */
5340
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5341 6
  {
5342
    self::checkForSupport();
5343
5344
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5345 6
  }
5346
5347
  /**
5348
   * Reverses characters order in the string.
5349
   *
5350
   * @param    string $str The input string
5351
   *
5352
   * @return   string The string with characters in the reverse sequence
5353
   */
5354
  public static function strrev($str)
5355
  {
5356
    return implode(array_reverse(self::split($str)));
5357
  }
5358
5359
  /**
5360
   * Finds the last occurrence of a character in a string within another, case insensitive.
5361
   *
5362
   * @link http://php.net/manual/en/function.mb-strrichr.php
5363
   *
5364
   * @param string $haystack <p>
5365
   *                         The string from which to get the last occurrence
5366 1
   *                         of needle
5367
   *                         </p>
5368 1
   * @param string $needle   <p>
5369
   *                         The string to find in haystack
5370 1
   *                         </p>
5371
   * @param bool   $part     [optional] <p>
5372
   *                         Determines which portion of haystack
5373
   *                         this function returns.
5374
   *                         If set to true, it returns all of haystack
5375
   *                         from the beginning to the last occurrence of needle.
5376
   *                         If set to false, it returns all of haystack
5377
   *                         from the last occurrence of needle to the end,
5378
   *                         </p>
5379
   * @param string $encoding [optional] <p>
5380
   *                         Character encoding name to use.
5381
   *                         If it is omitted, internal character encoding is used.
5382
   *                         </p>
5383 10
   *
5384
   * @return string the portion of haystack.
5385 10
   * or false if needle is not found.
5386 10
   */
5387 10
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
5388
  {
5389 10
    self::checkForSupport();
5390 1
5391 1
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5392 1
  }
5393
5394 10
  /**
5395
   * Find position of last occurrence of a case-insensitive string.
5396 10
   *
5397
   * @param    string $haystack The string to look in
5398 10
   * @param    string $needle   The string to look for
5399 1
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5400 1
   *
5401
   * @return   int The position of offset
5402
   */
5403 10
  public static function strripos($haystack, $needle, $offset = 0)
5404 10
  {
5405
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5406 10
  }
5407
5408 10
  /**
5409
   * Find position of last occurrence of a string in a string.
5410
   *
5411
   * @link http://php.net/manual/en/function.mb-strrpos.php
5412
   *
5413
   * @param string     $haystack  <p>
5414
   *                              The string being checked, for the last occurrence
5415
   *                              of needle
5416
   *                              </p>
5417
   * @param string|int $needle    <p>
5418
   *                              The string to find in haystack.
5419
   *                              Or a code point as int.
5420
   *                              </p>
5421
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5422
   *                              the string. Negative values will stop searching at an arbitrary point
5423
   *                              prior to the end of the string.
5424 20
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5425
   *
5426 20
   * @return int the numeric position of
5427
   * the last occurrence of needle in the
5428 20
   * haystack string. If
5429 5
   * needle is not found, it returns false.
5430
   */
5431
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5432
  {
5433 18
    $haystack = (string)$haystack;
5434
5435 18
    if (((int)$needle) === $needle && ($needle >= 0)) {
5436
      $needle = self::chr($needle);
5437
    }
5438
5439
    $needle = (string)$needle;
5440
5441
    if (!isset($haystack[0], $needle[0])) {
5442
      return false;
5443
    }
5444
5445 3
    // init
5446
    self::checkForSupport();
5447 3
5448
    $needle = (string)$needle;
5449
    $offset = (int)$offset;
5450
5451
    if ($cleanUtf8 === true) {
5452
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5453
5454
      $needle = self::clean($needle);
5455
      $haystack = self::clean($haystack);
5456
    }
5457
5458
    if (self::$support['mbstring'] === true) {
5459
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5460
    }
5461
5462 16
    if (self::$support['iconv'] === true) {
5463
      return \grapheme_strrpos($haystack, $needle, $offset);
5464 16
    }
5465
5466 16
    // fallback
5467 4
5468
    if ($offset > 0) {
5469
      $haystack = self::substr($haystack, $offset);
5470
    } elseif ($offset < 0) {
5471 15
      $haystack = self::substr($haystack, 0, $offset);
5472
    }
5473 15
5474 15 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5475
      $left = substr($haystack, 0, $pos);
5476
5477
      // negative offset not supported in PHP strpos(), ignoring
5478
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5479
    }
5480
5481
    return false;
5482
  }
5483
5484
  /**
5485
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5486
   * mask.
5487
   *
5488
   * @param string $str
5489
   * @param string $mask
5490
   * @param int    $offset
5491
   * @param int    $length
5492
   *
5493
   * @return int|null
5494
   */
5495
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5496
  {
5497
    if ($offset || 2147483647 !== $length) {
5498
      $str = self::substr($str, $offset, $length);
5499
    }
5500
5501
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5502
  }
5503 1
5504
  /**
5505 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5506
   *
5507
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5508
   *
5509
   * @param string $haystack      <p>
5510
   *                              The input string. Must be valid UTF-8.
5511
   *                              </p>
5512
   * @param string $needle        <p>
5513
   *                              The string to look for. Must be valid UTF-8.
5514
   *                              </p>
5515
   * @param bool   $before_needle [optional] <p>
5516
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5517
   *                              haystack before the first occurrence of the needle (excluding the needle).
5518
   *                              </p>
5519
   *
5520 1
   * @return string the portion of string, or FALSE if needle is not found.
5521
   */
5522
  public static function strstr($haystack, $needle, $before_needle = false)
5523
  {
5524
    self::checkForSupport();
5525
5526
    return \grapheme_strstr($haystack, $needle, $before_needle);
5527
  }
5528
5529
  /**
5530 1
   * Unicode transformation for case-less matching.
5531
   *
5532
   * @link http://unicode.org/reports/tr21/tr21-5.html
5533 1
   *
5534
   * @param string $str
5535 1
   * @param bool   $full
5536
   *
5537
   * @return string
5538
   */
5539
  public static function strtocasefold($str, $full = true)
5540
  {
5541
    static $fullCaseFold = null;
5542
    static $commonCaseFoldKeys = null;
5543
    static $commonCaseFoldValues = null;
5544
5545
    if ($commonCaseFoldKeys === null) {
5546
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5547
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5548
    }
5549
5550
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5551
5552
    if ($full) {
5553
5554
      if ($fullCaseFold === null) {
5555
        $fullCaseFold = self::getData('caseFolding_full');
5556
      }
5557
5558 39
      /** @noinspection OffsetOperationsInspection */
5559
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5560 39
    }
5561
5562 39
    $str = self::clean($str);
5563 9
5564
    return self::strtolower($str);
5565
  }
5566
5567 37
  /**
5568
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5569 37
   * Make a string lowercase.
5570
   *
5571
   * @link http://php.net/manual/en/function.mb-strtolower.php
5572
   *
5573 1
   * @param string $str <p>
5574 1
   *                    The string being lowercased.
5575
   *                    </p>
5576 37
   * @param string $encoding
5577 22
   *
5578 22
   * @return string str with all alphabetic characters converted to lowercase.
5579 33
   */
5580
  public static function strtolower($str, $encoding = 'UTF-8')
5581
  {
5582 37
    $str = (string)$str;
5583
5584
    if (!isset($str[0])) {
5585 37
      return '';
5586 1
    }
5587 1
5588
    // init
5589 37
    self::checkForSupport();
5590
5591
    return \mb_strtolower($str, $encoding);
5592
  }
5593
5594
  /**
5595
   * Generic case sensitive transformation for collation matching.
5596
   *
5597
   * @param string $s
5598
   *
5599
   * @return string
5600
   */
5601
  protected static function strtonatfold($s)
5602
  {
5603
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5604
  }
5605
5606
  /**
5607
   * Make a string uppercase.
5608
   *
5609
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5610
   *
5611
   * @param string $str <p>
5612
   *                    The string being uppercased.
5613
   *                    </p>
5614
   * @param string $encoding
5615
   *
5616
   * @return string str with all alphabetic characters converted to uppercase.
5617
   */
5618 1
  public static function strtoupper($str, $encoding = 'UTF-8')
5619
  {
5620 1
    $str = (string)$str;
5621 1
5622
    if (!isset($str[0])) {
5623 1
      return '';
5624
    }
5625
5626
    // init
5627
    self::checkForSupport();
5628
5629
    if (self::$support['mbstring'] === true) {
5630
      return \mb_strtoupper($str, $encoding);
5631
    } else {
5632
5633
      // fallback
5634
5635
      static $caseTableKeys = null;
5636
      static $caseTableValues = null;
5637
5638
      if ($caseTableKeys === null) {
5639
        $caseTable = self::case_table();
5640
        $caseTableKeys = array_keys($caseTable);
5641
        $caseTableValues = array_values($caseTable);
5642
      }
5643
5644
      $str = self::clean($str);
5645
5646
      return str_replace($caseTableKeys, $caseTableValues, $str);
5647
    }
5648
  }
5649
5650
  /**
5651
   * Translate characters or replace sub-strings.
5652
   *
5653
   * @link  http://php.net/manual/en/function.strtr.php
5654
   *
5655
   * @param string       $str  <p>
5656
   *                           The string being translated.
5657
   *                           </p>
5658
   * @param string|array $from <p>
5659
   *                           The string replacing from.
5660
   *                           </p>
5661
   * @param string|array $to   <p>
5662
   *                           The string being translated to to.
5663
   *                           </p>
5664
   *
5665 6
   * @return string This function returns a copy of str,
5666
   * translating all occurrences of each character in
5667
   * from to the corresponding character in
5668 6
   * to.
5669 1
   * @since 4.0
5670
   * @since 5.0
5671
   */
5672 1
  public static function strtr($str, $from, $to = INF)
5673 1
  {
5674 1
    if (INF !== $to) {
5675 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5675 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5676
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5676 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5677
      $countFrom = count($from);
5678
      $countTo = count($to);
5679 1
5680 1
      if ($countFrom > $countTo) {
5681 1
        $from = array_slice($from, 0, $countTo);
5682 1
      } elseif ($countFrom < $countTo) {
5683 1
        $to = array_slice($to, 0, $countFrom);
5684 1
      }
5685 1
5686 1
      $from = array_combine($from, $to);
5687
    }
5688
5689
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5672 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5690 1
  }
5691 1
5692 1
  /**
5693 1
   * Return the width of a string.
5694 1
   *
5695 1
   * @param string $s
5696 1
   *
5697 1
   * @return int
5698
   */
5699
  public static function strwidth($s)
5700 1
  {
5701 1
    // init
5702 1
    self::checkForSupport();
5703 1
5704
    return \mb_strwidth($s, 'UTF-8');
5705
  }
5706
5707 1
  /**
5708
   * Get part of a string.
5709 6
   *
5710 1
   * @link http://php.net/manual/en/function.mb-substr.php
5711 1
   *
5712 1
   * @param string  $str       <p>
5713 1
   *                           The string being checked.
5714
   *                           </p>
5715 1
   * @param int     $start     <p>
5716
   *                           The first position used in str.
5717
   *                           </p>
5718 6
   * @param int     $length    [optional] <p>
5719 6
   *                           The maximum length of the returned string.
5720
   *                           </p>
5721 6
   * @param string  $encoding
5722 4
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5723
   *
5724 4
   * @return string mb_substr returns the portion of
5725 4
   * str specified by the start and length parameters.
5726
   */
5727 6
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5728
  {
5729 6
    $str = (string)$str;
5730
5731
    if (!isset($str[0])) {
5732
      return '';
5733
    }
5734
5735
    // init
5736
    self::checkForSupport();
5737
5738
    if ($cleanUtf8 === true) {
5739
      // iconv and mbstring are not tolerant to invalid encoding
5740 1
      // further, their behaviour is inconsistent with that of PHP's substr
5741
5742 1
      $str = self::clean($str);
5743
    }
5744 1
5745 1
    if ($length === null) {
5746
      $length = (int)self::strlen($str);
5747
    } else {
5748 1
      $length = (int)$length;
5749
    }
5750 1
5751 1
    if (self::$support['mbstring'] === true) {
5752
5753 1
      // INFO: this is only a fallback for old versions
5754
      if ($encoding === true || $encoding === false) {
5755 1
        $encoding = 'UTF-8';
5756 1
      }
5757
5758 1
      return \mb_substr($str, $start, $length, $encoding);
5759
    }
5760 1
5761
    if (self::$support['iconv'] === true) {
5762 1
      return (string)\grapheme_substr($str, $start, $length);
5763
    }
5764 1
5765
    // fallback
5766
5767
    // split to array, and remove invalid characters
5768
    $array = self::split($str);
5769
5770
    // extract relevant part, and join to make sting again
5771
    return implode(array_slice($array, $start, $length));
5772
  }
5773
5774
  /**
5775 6
   * Binary safe comparison of two strings from an offset, up to length characters.
5776
   *
5777 6
   * @param string  $main_str           The main string being compared.
5778
   * @param string  $str                The secondary string being compared.
5779
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5780
   *                                    end of the string.
5781
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5782
   *                                    the str compared to the length of main_str less the offset.
5783
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5784
   *
5785
   * @return int
5786
   */
5787
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5788
  {
5789
    $main_str = self::substr($main_str, $offset, $length);
5790
    $str = self::substr($str, 0, self::strlen($main_str));
5791
5792
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
5793
  }
5794
5795
  /**
5796
   * Count the number of substring occurrences
5797
   *
5798
   * @link  http://php.net/manual/en/function.substr-count.php
5799
   *
5800
   * @param string $haystack <p>
5801
   *                         The string to search in
5802
   *                         </p>
5803
   * @param string $needle   <p>
5804
   *                         The substring to search for
5805
   *                         </p>
5806
   * @param int    $offset   [optional] <p>
5807
   *                         The offset where to start counting
5808
   *                         </p>
5809
   * @param int    $length   [optional] <p>
5810
   *                         The maximum length after the specified offset to search for the
5811
   *                         substring. It outputs a warning if the offset plus the length is
5812 7
   *                         greater than the haystack length.
5813
   *                         </p>
5814 7
   *
5815
   * @return int This functions returns an integer.
5816 7
   * @since 4.0
5817
   * @since 5.0
5818 7
   */
5819 2
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
5820
  {
5821
    $haystack = (string)$haystack;
5822 6
    $needle = (string)$needle;
5823
5824 6
    if (!isset($haystack[0], $needle[0])) {
5825 3
      return 0;
5826
    }
5827 3
5828
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5829 3
      $offset = (int)$offset;
5830
      $length = (int)$length;
5831
5832 3
      $haystack = self::substr($haystack, $offset, $length);
5833
    }
5834 3
5835 3
    self::checkForSupport();
5836
5837
    return \mb_substr_count($haystack, $needle);
5838 3
  }
5839 3
5840 3
  /**
5841
   * Replace text within a portion of a string.
5842
   *
5843
   * source: https://gist.github.com/stemar/8287074
5844
   *
5845
   * @param string|array   $str
5846
   * @param string|array   $replacement
5847
   * @param int|array      $start
5848
   * @param null|int|array $length
5849
   *
5850
   * @return array|string
5851
   */
5852 3
  public static function substr_replace($str, $replacement, $start, $length = null)
5853
  {
5854 1
    if (is_array($str)) {
5855 1
      $num = count($str);
5856 1
5857
      // $replacement
5858 1
      if (is_array($replacement)) {
5859 1
        $replacement = array_slice($replacement, 0, $num);
5860 1
      } else {
5861 1
        $replacement = array_pad(array($replacement), $num, $replacement);
5862
      }
5863 1
5864
      // $start
5865 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5866 1
        $start = array_slice($start, 0, $num);
5867
        foreach ($start as &$valueTmp) {
5868
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5869 1
        }
5870
        unset($valueTmp);
5871 3
      } else {
5872 1
        $start = array_pad(array($start), $num, $start);
5873 1
      }
5874
5875 3
      // $length
5876 3
      if (!isset($length)) {
5877
        $length = array_fill(0, $num, 0);
5878 3 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5879 3
        $length = array_slice($length, 0, $num);
5880
        foreach ($length as &$valueTmpV2) {
5881 6
          if (isset($valueTmpV2)) {
5882
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5883
          } else {
5884
            $valueTmpV2 = 0;
5885
          }
5886
        }
5887
        unset($valueTmpV2);
5888
      } else {
5889
        $length = array_pad(array($length), $num, $length);
5890
      }
5891
5892
      // Recursive call
5893
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5894
    } else {
5895
      if (is_array($replacement)) {
5896
        if (count($replacement) > 0) {
5897
          $replacement = $replacement[0];
5898
        } else {
5899
          $replacement = '';
5900
        }
5901
      }
5902
    }
5903 2
5904
    preg_match_all('/./us', (string)$str, $smatches);
5905 2
    preg_match_all('/./us', (string)$replacement, $rmatches);
5906
5907
    if ($length === null) {
5908
      self::checkForSupport();
5909
5910
      $length = \mb_strlen($str);
5911
    }
5912
5913
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5914
5915
    return implode($smatches[0], null);
5916
  }
5917
5918
  /**
5919
   * Returns a case swapped version of the string.
5920
   *
5921
   * @param string $str
5922
   * @param string $encoding
5923
   *
5924
   * @return string each character's case swapped
5925
   */
5926
  public static function swapCase($str, $encoding = 'UTF-8')
5927
  {
5928
    $str = (string)$str;
5929 20
5930
    if (!isset($str[0])) {
5931 20
      return '';
5932 2
    }
5933
5934 2
    $str = self::clean($str);
5935 2
5936
    $strSwappedCase = preg_replace_callback(
5937 2
        '/[\S]/u',
5938
        function ($match) use ($encoding) {
5939
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5940 20
5941
          if ($match[0] === $marchToUpper) {
5942 20
            return UTF8::strtolower($match[0], $encoding);
5943 9
          } else {
5944
            return $marchToUpper;
5945
          }
5946 20
        },
5947
        $str
5948 20
    );
5949
5950 20
    return $strSwappedCase;
5951 20
  }
5952
5953 20
  /**
5954 20
   * alias for "UTF8::to_ascii()"
5955 20
   *
5956 20
   * @param string $s The input string e.g. a UTF-8 String
5957
   * @param string $subst_chr
5958 20
   *
5959
   * @return string
5960 18
   */
5961 17
  public static function toAscii($s, $subst_chr = '?')
5962 17
  {
5963 17
    return self::to_ascii($s, $subst_chr);
5964 5
  }
5965 5
5966 5
  /**
5967
   * alias for "UTF8::to_latin1()"
5968
   *
5969 20
   * @param $str
5970
   *
5971 18
   * @return string
5972 14
   */
5973 14
  public static function toLatin1($str)
5974 14
  {
5975 8
    return self::to_latin1($str);
5976 8
  }
5977 8
5978
  /**
5979
   * alias for "UTF8::to_utf8"
5980 19
   *
5981
   * @param string $str
5982 9
   *
5983 3
   * @return string
5984 3
   */
5985 3
  public static function toUTF8($str)
5986 6
  {
5987 6
    return self::to_utf8($str);
5988 6
  }
5989
5990
  /**
5991 9
   * convert to ASCII
5992 6
   *
5993 6
   * @param string $s The input string e.g. a UTF-8 String
5994 6
   * @param string $subst_chr
5995
   *
5996
   * @return string
5997 20
   */
5998
  public static function to_ascii($s, $subst_chr = '?')
5999 2
  {
6000 2
    static $translitExtra = null;
6001
6002
    $s = (string)$s;
6003 2
6004 2
    if (!isset($s[0])) {
6005 2
      return '';
6006
    }
6007
6008 2
    $s = self::clean($s);
6009 18
6010
    if (preg_match("/[\x80-\xFF]/", $s)) {
6011 20
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6012
6013 20
      $glibc = 'glibc' === ICONV_IMPL;
6014
6015
      preg_match_all('/./u', $s, $s);
6016 20
6017 20
      /** @noinspection AlterInForeachInspection */
6018
      foreach ($s[0] as &$c) {
6019 3
6020 20
        if (!isset($c[1])) {
6021
          continue;
6022 20
        }
6023
6024
        if ($glibc) {
6025 20
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6026 20
        } else {
6027 20
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6028 2
6029 20
          if ($t !== false && is_string($t)) {
6030
            if (!isset($t[0])) {
6031 20
              $t = '?';
6032
            } elseif (isset($t[1])) {
6033 20
              $t = ltrim($t, '\'`"^~');
6034
            }
6035
          }
6036
        }
6037
6038
        if ('?' === $t) {
6039
6040
          if ($translitExtra === null) {
6041
            $translitExtra = (array)self::getData('translit_extra');
6042
          }
6043 2
6044
          if (isset($translitExtra[$c])) {
6045 2
            $t = $translitExtra[$c];
6046
          } else {
6047 1
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6048
6049 1
            if ($t[0] < "\x80") {
6050 1
              $t = $t[0];
6051
            } else {
6052 1
              $t = $subst_chr;
6053 2
            }
6054 2
          }
6055
        }
6056
6057
        if ('?' === $t) {
6058
          $t = self::str_transliterate($c, $subst_chr);
6059
        }
6060
6061
        $c = $t;
6062
      }
6063
6064
      $s = implode('', $s[0]);
6065
    }
6066
6067
    return $s;
6068
  }
6069
6070
  /**
6071
   * alias for "UTF8::to_win1252()"
6072
   *
6073 26
   * @param   string $str
6074
   *
6075 26
   * @return  array|string
6076
   */
6077 26
  public static function to_iso8859($str)
6078 5
  {
6079
    return self::to_win1252($str);
6080
  }
6081
6082 22
  /**
6083 6
   * alias for "UTF8::to_win1252()"
6084
   *
6085
   * @param string|array $str
6086 16
   *
6087
   * @return string|array
6088
   */
6089
  public static function to_latin1($str)
6090
  {
6091
    return self::to_win1252($str);
6092
  }
6093
6094
  /**
6095
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6096 14
   *
6097
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6098 14
   *
6099
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6100
   *
6101
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6102
   *    are followed by any of these:  ("group B")
6103
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6104
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6105
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6106
   * is also a valid unicode character, and will be left unchanged.
6107
   *
6108
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6109
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6110
   *
6111
   * @param string|array $str Any string or array.
6112
   *
6113
   * @return string The same string, but UTF8 encoded.
6114
   */
6115
  public static function to_utf8($str)
6116
  {
6117
    if (is_array($str)) {
6118
      foreach ($str as $k => $v) {
6119
        /** @noinspection AlterInForeachInspection */
6120
        $str[$k] = self::to_utf8($v);
6121 8
      }
6122
6123 8
      return $str;
6124 2
    }
6125
6126
    $str = (string)$str;
6127
6128 7
    if (!isset($str[0])) {
6129 7
      return $str;
6130
    }
6131 7
6132 1
    $max = self::strlen($str, '8bit');
6133 1
6134 7
    $buf = '';
6135
    /** @noinspection ForeachInvariantsInspection */
6136
    for ($i = 0; $i < $max; $i++) {
6137 7
      $c1 = $str[$i];
6138
6139 7
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6140
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6141
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6142
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6143 1
6144 1
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6145 1
6146 7
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6147 7
            $buf .= $c1 . $c2;
6148 7
            $i++;
6149 7
          } else { // not valid UTF8 - convert it
6150 7
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6151
            $cc2 = ($c1 & "\x3f") | "\x80";
6152 7
            $buf .= $cc1 . $cc2;
6153
          }
6154
6155 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6156
6157
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6158
            $buf .= $c1 . $c2 . $c3;
6159
            $i += 2;
6160
          } else { // not valid UTF8 - convert it
6161
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6162
            $cc2 = ($c1 & "\x3f") | "\x80";
6163
            $buf .= $cc1 . $cc2;
6164
          }
6165
6166
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6167
6168 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6169
            $buf .= $c1 . $c2 . $c3 . $c4;
6170
            $i += 3;
6171
          } else { // not valid UTF8 - convert it
6172 1
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6173
            $cc2 = ($c1 & "\x3f") | "\x80";
6174 1
            $buf .= $cc1 . $cc2;
6175
          }
6176 1
6177 1
        } else { // doesn't look like UTF8, but should be converted
6178
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6179
          $cc2 = (($c1 & "\x3f") | "\x80");
6180 1
          $buf .= $cc1 . $cc2;
6181
        }
6182 1
6183
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6184 1
6185 1
        $ordC1 = ord($c1);
6186 1
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6187 1
          $buf .= self::$win1252ToUtf8[$ordC1];
6188
        } else {
6189 1
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6190 1
          $cc2 = (($c1 & "\x3f") | "\x80");
6191 1
          $buf .= $cc1 . $cc2;
6192
        }
6193 1
6194
      } else { // it doesn't need conversion
6195
        $buf .= $c1;
6196
      }
6197
    }
6198
6199
    self::checkForSupport();
6200
6201
    // decode unicode escape sequences
6202
    $buf = preg_replace_callback(
6203
        '/\\\\u([0-9a-f]{4})/i',
6204
        function ($match) {
6205
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6206
        },
6207
        $buf
6208
    );
6209
6210
    // decode UTF-8 codepoints
6211
    $buf = preg_replace_callback(
6212
        '/&#\d{2,4};/',
6213
        function ($match) {
6214
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6215
        },
6216
        $buf
6217
    );
6218
6219
    return $buf;
6220
  }
6221
6222
  /**
6223
   * Convert a string into "win1252"-encoding.
6224
   *
6225
   * @param  string|array $str
6226
   *
6227
   * @return string|array
6228
   */
6229
  protected static function to_win1252($str)
6230
  {
6231
    if (is_array($str)) {
6232
6233
      foreach ($str as $k => $v) {
6234
        /** @noinspection AlterInForeachInspection */
6235
        $str[$k] = self::to_win1252($v);
6236
      }
6237
6238
      return $str;
6239
    }
6240
6241
    $str = (string)$str;
6242
6243
    if (!isset($str[0])) {
6244
      return '';
6245
    }
6246
6247
    return self::utf8_decode($str);
6248
  }
6249
6250
  /**
6251
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6252
   *
6253
   * INFO: This is slower then "trim()"
6254
   *
6255
   * But we can only use the original-function, if we use <= 7-Bit in the string / chars
6256
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6257
   *
6258
   * @param    string $str   The string to be trimmed
6259
   * @param    string $chars Optional characters to be stripped
6260
   *
6261
   * @return   string The trimmed string
6262
   */
6263
  public static function trim($str = '', $chars = INF)
6264
  {
6265
    $str = (string)$str;
6266
6267
    if (!isset($str[0])) {
6268
      return '';
6269
    }
6270
6271
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6272
    if ($chars === INF || !$chars) {
6273
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6274
    }
6275
6276
    return self::rtrim(self::ltrim($str, $chars), $chars);
6277
  }
6278
6279
  /**
6280
   * Makes string's first char uppercase.
6281
   *
6282
   * @param    string $str The input string
6283
   *
6284
   * @return   string The resulting string
6285
   */
6286
  public static function ucfirst($str)
6287
  {
6288
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6289
  }
6290
6291
  /**
6292
   * alias for "UTF8::ucfirst"
6293
   *
6294
   * @param $str
6295
   *
6296
   * @return string
6297
   */
6298
  public static function ucword($str)
6299
  {
6300
    return self::ucfirst($str);
6301
  }
6302
6303
  /**
6304
   * Uppercase for all words in the string.
6305
   *
6306
   * @param  string $str
6307
   * @param array   $exceptions
6308
   *
6309
   * @return string
6310
   */
6311
  public static function ucwords($str, $exceptions = array())
6312
  {
6313
    if (!$str) {
6314
      return '';
6315
    }
6316
6317
    // init
6318
    $words = explode(' ', $str);
6319
    $newwords = array();
6320
6321
    if (count($exceptions) > 0) {
6322
      $useExceptions = true;
6323
    } else {
6324
      $useExceptions = false;
6325
    }
6326
6327
    foreach ($words as $word) {
6328
      if (
6329
          ($useExceptions === false)
6330
          ||
6331
          (
6332
              $useExceptions === true
6333
              &&
6334
              !in_array($word, $exceptions, true)
6335
          )
6336
      ) {
6337
        $word = self::ucfirst($word);
6338
      }
6339
      $newwords[] = $word;
6340
    }
6341
6342
    return self::ucfirst(implode(' ', $newwords));
6343
  }
6344
6345
  /**
6346
   * Multi decode html entity & fix urlencoded-win1252-chars.
6347
   *
6348
   * e.g:
6349
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6350
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6351
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6352
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6353
   * 'Düsseldorf'                   => 'Düsseldorf'
6354
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6355
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6356
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6357
   *
6358
   * @param string $str
6359
   *
6360
   * @return string
6361
   */
6362
  public static function urldecode($str)
6363
  {
6364
    $str = (string)$str;
6365
6366
    if (!isset($str[0])) {
6367
      return '';
6368
    }
6369
6370
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6371
6372
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6373
6374
    $str = self::fix_simple_utf8(
6375
        rawurldecode(
6376
            self::html_entity_decode(
6377
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6378
                $flags
6379
            )
6380
        )
6381
    );
6382
6383
    return (string)$str;
6384
  }
6385
6386
  /**
6387
   * Return a array with "urlencoded"-win1252 -> UTF-8
6388
   *
6389
   * @return mixed
6390
   */
6391
  public static function urldecode_fix_win1252_chars()
6392
  {
6393
    static $array = array(
6394
        '%20' => ' ',
6395
        '%21' => '!',
6396
        '%22' => '"',
6397
        '%23' => '#',
6398
        '%24' => '$',
6399
        '%25' => '%',
6400
        '%26' => '&',
6401
        '%27' => "'",
6402
        '%28' => '(',
6403
        '%29' => ')',
6404
        '%2A' => '*',
6405
        '%2B' => '+',
6406
        '%2C' => ',',
6407
        '%2D' => '-',
6408
        '%2E' => '.',
6409
        '%2F' => '/',
6410
        '%30' => '0',
6411
        '%31' => '1',
6412
        '%32' => '2',
6413
        '%33' => '3',
6414
        '%34' => '4',
6415
        '%35' => '5',
6416
        '%36' => '6',
6417
        '%37' => '7',
6418
        '%38' => '8',
6419
        '%39' => '9',
6420
        '%3A' => ':',
6421
        '%3B' => ';',
6422
        '%3C' => '<',
6423
        '%3D' => '=',
6424
        '%3E' => '>',
6425
        '%3F' => '?',
6426
        '%40' => '@',
6427
        '%41' => 'A',
6428
        '%42' => 'B',
6429
        '%43' => 'C',
6430
        '%44' => 'D',
6431
        '%45' => 'E',
6432
        '%46' => 'F',
6433
        '%47' => 'G',
6434
        '%48' => 'H',
6435
        '%49' => 'I',
6436
        '%4A' => 'J',
6437
        '%4B' => 'K',
6438
        '%4C' => 'L',
6439
        '%4D' => 'M',
6440 6
        '%4E' => 'N',
6441
        '%4F' => 'O',
6442 6
        '%50' => 'P',
6443 6
        '%51' => 'Q',
6444
        '%52' => 'R',
6445 6
        '%53' => 'S',
6446
        '%54' => 'T',
6447 6
        '%55' => 'U',
6448 5
        '%56' => 'V',
6449
        '%57' => 'W',
6450
        '%58' => 'X',
6451
        '%59' => 'Y',
6452 6
        '%5A' => 'Z',
6453
        '%5B' => '[',
6454 6
        '%5C' => '\\',
6455
        '%5D' => ']',
6456 6
        '%5E' => '^',
6457 1
        '%5F' => '_',
6458 1
        '%60' => '`',
6459 1
        '%61' => 'a',
6460
        '%62' => 'b',
6461 6
        '%63' => 'c',
6462
        '%64' => 'd',
6463
        '%65' => 'e',
6464
        '%66' => 'f',
6465
        '%67' => 'g',
6466
        '%68' => 'h',
6467
        '%69' => 'i',
6468
        '%6A' => 'j',
6469
        '%6B' => 'k',
6470
        '%6C' => 'l',
6471 6
        '%6D' => 'm',
6472
        '%6E' => 'n',
6473 6
        '%6F' => 'o',
6474
        '%70' => 'p',
6475 6
        '%71' => 'q',
6476 6
        '%72' => 'r',
6477
        '%73' => 's',
6478
        '%74' => 't',
6479 5
        '%75' => 'u',
6480 5
        '%76' => 'v',
6481
        '%77' => 'w',
6482 5
        '%78' => 'x',
6483 1
        '%79' => 'y',
6484 1
        '%7A' => 'z',
6485 1
        '%7B' => '{',
6486
        '%7C' => '|',
6487 5
        '%7D' => '}',
6488
        '%7E' => '~',
6489
        '%7F' => '',
6490
        '%80' => '`',
6491
        '%81' => '',
6492
        '%82' => '‚',
6493
        '%83' => 'ƒ',
6494
        '%84' => '„',
6495
        '%85' => '…',
6496
        '%86' => '†',
6497
        '%87' => '‡',
6498
        '%88' => 'ˆ',
6499
        '%89' => '‰',
6500
        '%8A' => 'Š',
6501
        '%8B' => '‹',
6502
        '%8C' => 'Œ',
6503
        '%8D' => '',
6504
        '%8E' => 'Ž',
6505
        '%8F' => '',
6506
        '%90' => '',
6507
        '%91' => '‘',
6508
        '%92' => '’',
6509
        '%93' => '“',
6510
        '%94' => '”',
6511
        '%95' => '•',
6512
        '%96' => '–',
6513
        '%97' => '—',
6514
        '%98' => '˜',
6515
        '%99' => '™',
6516
        '%9A' => 'š',
6517
        '%9B' => '›',
6518
        '%9C' => 'œ',
6519 1
        '%9D' => '',
6520
        '%9E' => 'ž',
6521 1
        '%9F' => 'Ÿ',
6522
        '%A0' => '',
6523
        '%A1' => '¡',
6524
        '%A2' => '¢',
6525
        '%A3' => '£',
6526
        '%A4' => '¤',
6527
        '%A5' => '¥',
6528
        '%A6' => '¦',
6529
        '%A7' => '§',
6530
        '%A8' => '¨',
6531
        '%A9' => '©',
6532
        '%AA' => 'ª',
6533 1
        '%AB' => '«',
6534
        '%AC' => '¬',
6535 1
        '%AD' => '',
6536
        '%AE' => '®',
6537
        '%AF' => '¯',
6538
        '%B0' => '°',
6539 1
        '%B1' => '±',
6540
        '%B2' => '²',
6541 1
        '%B3' => '³',
6542
        '%B4' => '´',
6543
        '%B5' => 'µ',
6544 1
        '%B6' => '¶',
6545 1
        '%B7' => '·',
6546 1
        '%B8' => '¸',
6547 1
        '%B9' => '¹',
6548 1
        '%BA' => 'º',
6549
        '%BB' => '»',
6550
        '%BC' => '¼',
6551 1
        '%BD' => '½',
6552
        '%BE' => '¾',
6553
        '%BF' => '¿',
6554
        '%C0' => 'À',
6555
        '%C1' => 'Á',
6556
        '%C2' => 'Â',
6557
        '%C3' => 'Ã',
6558
        '%C4' => 'Ä',
6559
        '%C5' => 'Å',
6560
        '%C6' => 'Æ',
6561
        '%C7' => 'Ç',
6562
        '%C8' => 'È',
6563
        '%C9' => 'É',
6564 4
        '%CA' => 'Ê',
6565
        '%CB' => 'Ë',
6566 4
        '%CC' => 'Ì',
6567
        '%CD' => 'Í',
6568
        '%CE' => 'Î',
6569
        '%CF' => 'Ï',
6570 4
        '%D0' => 'Ð',
6571 4
        '%D1' => 'Ñ',
6572 4
        '%D2' => 'Ò',
6573
        '%D3' => 'Ó',
6574 4
        '%D4' => 'Ô',
6575 4
        '%D5' => 'Õ',
6576 4
        '%D6' => 'Ö',
6577 4
        '%D7' => '×',
6578
        '%D8' => 'Ø',
6579 4
        '%D9' => 'Ù',
6580
        '%DA' => 'Ú',
6581
        '%DB' => 'Û',
6582
        '%DC' => 'Ü',
6583
        '%DD' => 'Ý',
6584 4
        '%DE' => 'Þ',
6585
        '%DF' => 'ß',
6586 4
        '%E0' => 'à',
6587
        '%E1' => 'á',
6588
        '%E2' => 'â',
6589
        '%E3' => 'ã',
6590
        '%E4' => 'ä',
6591 4
        '%E5' => 'å',
6592 4
        '%E6' => 'æ',
6593
        '%E7' => 'ç',
6594 4
        '%E8' => 'è',
6595 4
        '%E9' => 'é',
6596 4
        '%EA' => 'ê',
6597 4
        '%EB' => 'ë',
6598 4
        '%EC' => 'ì',
6599
        '%ED' => 'í',
6600 4
        '%EE' => 'î',
6601 4
        '%EF' => 'ï',
6602 4
        '%F0' => 'ð',
6603 4
        '%F1' => 'ñ',
6604
        '%F2' => 'ò',
6605 4
        '%F3' => 'ó',
6606 3
        '%F4' => 'ô',
6607 3
        '%F5' => 'õ',
6608 3
        '%F6' => 'ö',
6609 3
        '%F7' => '÷',
6610
        '%F8' => 'ø',
6611 3
        '%F9' => 'ù',
6612
        '%FA' => 'ú',
6613
        '%FB' => 'û',
6614
        '%FC' => 'ü',
6615 3
        '%FD' => 'ý',
6616 3
        '%FE' => 'þ',
6617
        '%FF' => 'ÿ',
6618 4
    );
6619
6620
    return $array;
6621
  }
6622
6623
  /**
6624
   * Decodes an UTF-8 string to ISO-8859-1.
6625
   *
6626
   * @param string $str
6627
   *
6628
   * @return string
6629
   */
6630
  public static function utf8_decode($str)
6631
  {
6632
    static $utf8ToWin1252Keys = null;
6633
    static $utf8ToWin1252Values = null;
6634
6635
    $str = (string)$str;
6636
6637
    if (!isset($str[0])) {
6638
      return '';
6639
    }
6640
6641
    // init
6642
    self::checkForSupport();
6643
6644
    $str = self::to_utf8($str);
6645
6646
    if ($utf8ToWin1252Keys === null) {
6647
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6648
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6649
    }
6650
6651
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6652
  }
6653
6654
  /**
6655
   * Encodes an ISO-8859-1 string to UTF-8.
6656
   *
6657
   * @param string $str
6658
   *
6659
   * @return string
6660
   */
6661
  public static function utf8_encode($str)
6662
  {
6663
    $str = \utf8_encode($str);
6664
6665
    if (false === strpos($str, "\xC2")) {
6666
      return $str;
6667
    } else {
6668
6669
      static $cp1252ToUtf8Keys = null;
6670
      static $cp1252ToUtf8Values = null;
6671
6672
      if ($cp1252ToUtf8Keys === null) {
6673
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6674
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6675
      }
6676
6677
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6678
    }
6679
  }
6680
6681
  /**
6682
   * fix -> utf8-win1252 chars
6683
   *
6684
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6685
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6686
   * See: http://en.wikipedia.org/wiki/Windows-1252
6687
   *
6688
   * @deprecated use "UTF8::fix_simple_utf8()"
6689
   *
6690
   * @param   string $str
6691
   *
6692
   * @return  string
6693
   */
6694
  public static function utf8_fix_win1252_chars($str)
6695
  {
6696
    return self::fix_simple_utf8($str);
6697
  }
6698
6699
  /**
6700
   * Returns an array with all utf8 whitespace characters.
6701
   *
6702
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6703
   *
6704
   * @author: Derek E. [email protected]
6705
   *
6706
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6707
   *         as defined in above URL
6708
   */
6709
  public static function whitespace_table()
6710
  {
6711
    return self::$whitespaceTable;
6712
  }
6713
6714
  /**
6715
   * Limit the number of words in a string.
6716
   *
6717
   * @param  string $str
6718
   * @param  int    $words
6719
   * @param  string $strAddOn
6720
   *
6721
   * @return string
6722
   */
6723
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6724
  {
6725
    $str = (string)$str;
6726
6727
    if (!isset($str[0])) {
6728
      return '';
6729
    }
6730
6731
    $words = (int)$words;
6732
6733
    if ($words < 1) {
6734
      return '';
6735
    }
6736
6737
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6738
6739
    if (
6740
        !isset($matches[0])
6741
        ||
6742
        self::strlen($str) === self::strlen($matches[0])
6743
    ) {
6744
      return $str;
6745
    }
6746
6747
    return self::rtrim($matches[0]) . $strAddOn;
6748
  }
6749
6750
  /**
6751
   * Wraps a string to a given number of characters
6752
   *
6753
   * @link  http://php.net/manual/en/function.wordwrap.php
6754
   *
6755
   * @param string $str   <p>
6756
   *                      The input string.
6757
   *                      </p>
6758
   * @param int    $width [optional] <p>
6759
   *                      The column width.
6760
   *                      </p>
6761
   * @param string $break [optional] <p>
6762
   *                      The line is broken using the optional
6763
   *                      break parameter.
6764
   *                      </p>
6765
   * @param bool   $cut   [optional] <p>
6766
   *                      If the cut is set to true, the string is
6767
   *                      always wrapped at or before the specified width. So if you have
6768
   *                      a word that is larger than the given width, it is broken apart.
6769
   *                      (See second example).
6770
   *                      </p>
6771
   *
6772
   * @return string the given string wrapped at the specified column.
6773
   * @since 4.0.2
6774
   * @since 5.0
6775
   */
6776
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6777
  {
6778
    $str = (string)$str;
6779
    $break = (string)$break;
6780
6781
    if (!isset($str[0], $break[0])) {
6782
      return '';
6783
    }
6784
6785
    $w = '';
6786
    $strSplit = explode($break, $str);
6787
    $count = count($strSplit);
6788
6789
    if (1 === $count && '' === $strSplit[0]) {
6790
      return '';
6791
    }
6792
6793
    $chars = array();
6794
    /** @noinspection ForeachInvariantsInspection */
6795
    for ($i = 0; $i < $count; ++$i) {
6796
6797
      if ($i) {
6798
        $chars[] = $break;
6799
        $w .= '#';
6800
      }
6801
6802
      $c = $strSplit[$i];
6803
      unset($strSplit[$i]);
6804
6805
      foreach (self::split($c) as $c) {
6806
        $chars[] = $c;
6807
        $w .= ' ' === $c ? ' ' : '?';
6808
      }
6809
    }
6810
6811
    $strReturn = '';
6812
    $j = 0;
6813
    $b = $i = -1;
6814
    $w = wordwrap($w, $width, '#', $cut);
6815
6816
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6817
      for (++$i; $i < $b; ++$i) {
6818
        $strReturn .= $chars[$j];
6819
        unset($chars[$j++]);
6820
      }
6821
6822
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6823
        unset($chars[$j++]);
6824
      }
6825
6826
      $strReturn .= $break;
6827
    }
6828
6829
    return $strReturn . implode('', $chars);
6830
  }
6831
6832
  /**
6833
   * Returns an array of Unicode White Space characters.
6834
   *
6835
   * @return   array An array with numeric code point as key and White Space Character as value.
6836
   */
6837
  public static function ws()
6838
  {
6839
    return self::$whitespace;
6840
  }
6841
6842
}
6843