Completed
Push — master ( c15e3f...f989ff )
by Lars
07:10
created

UTF8::is_html()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 19
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 3

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 19
ccs 5
cts 5
cp 1
rs 9.4285
cc 3
eloc 10
nc 3
nop 1
crap 3
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  protected static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  protected static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  protected static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  protected static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  protected static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  protected static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  protected static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  protected static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790 1
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792 1
      'ISO-IR-230',
793 1
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803 1
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809
    self::checkForSupport();
810
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820
  public static function access($str, $pos)
821
  {
822
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833 2
   */
834
  public static function add_bom_to_string($str)
835 2
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Returns the UTF-8 Byte Order Mark Character.
845
   *
846 1
   * @return string UTF-8 Byte Order Mark
847
   */
848 1
  public static function bom()
849
  {
850
    return "\xEF\xBB\xBF";
851
  }
852
853
  /**
854
   * @alias of UTF8::chr_map()
855
   *
856
   * @param string|array $callback
857
   * @param string       $str
858
   *
859
   * @return array
860
   */
861
  public static function callback($callback, $str)
862
  {
863
    return self::chr_map($callback, $str);
864
  }
865
866
  /**
867
   * Returns an array of all lower and upper case UTF-8 encoded characters.
868
   *
869
   * @return   string An array with lower case chars as keys and upper chars as values.
870
   */
871
  protected static function case_table()
872
  {
873
    static $case = array(
874
875
      // lower => upper
876
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
877
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
878
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
879
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
880
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
881
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
882
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
883
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
884
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
885
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
886
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
887
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
888
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
889
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
890
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
891
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
892
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
893
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
894
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
895
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
896
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
897
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
898
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
899
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
900
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
901
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
902
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
903
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
904
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
905
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
906
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
907
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
908
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
909
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
910
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
911
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
912
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
913
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
914
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
915
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
916
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
917
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
918
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
919
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
920
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
921
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
922
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
923
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
924
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
925
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
926
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
927
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
928
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
929
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
930
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
931
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
932
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
933
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
934
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
935
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
936
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
937
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
938
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
939
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
940
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
941
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
942
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
943
      "\xea\x9e\x87"     => "\xea\x9e\x86",
944
      "\xea\x9e\x85"     => "\xea\x9e\x84",
945
      "\xea\x9e\x83"     => "\xea\x9e\x82",
946
      "\xea\x9e\x81"     => "\xea\x9e\x80",
947
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
948
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
949
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
950
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
951
      "\xea\x9d\xad"     => "\xea\x9d\xac",
952
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
953
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
954
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
955
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
956
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
957
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
958
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
959
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
960
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
961
      "\xea\x9d\x99"     => "\xea\x9d\x98",
962
      "\xea\x9d\x97"     => "\xea\x9d\x96",
963
      "\xea\x9d\x95"     => "\xea\x9d\x94",
964
      "\xea\x9d\x93"     => "\xea\x9d\x92",
965
      "\xea\x9d\x91"     => "\xea\x9d\x90",
966
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
967
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
968
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
969
      "\xea\x9d\x89"     => "\xea\x9d\x88",
970
      "\xea\x9d\x87"     => "\xea\x9d\x86",
971
      "\xea\x9d\x85"     => "\xea\x9d\x84",
972
      "\xea\x9d\x83"     => "\xea\x9d\x82",
973
      "\xea\x9d\x81"     => "\xea\x9d\x80",
974
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
975
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
976
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
977
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
978
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
979
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
980
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
981
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
982
      "\xea\x9c\xad"     => "\xea\x9c\xac",
983
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
984
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
985
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
986
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
987
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
988
      "\xea\x9a\x97"     => "\xea\x9a\x96",
989
      "\xea\x9a\x95"     => "\xea\x9a\x94",
990
      "\xea\x9a\x93"     => "\xea\x9a\x92",
991
      "\xea\x9a\x91"     => "\xea\x9a\x90",
992
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
993
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
994
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
995
      "\xea\x9a\x89"     => "\xea\x9a\x88",
996
      "\xea\x9a\x87"     => "\xea\x9a\x86",
997
      "\xea\x9a\x85"     => "\xea\x9a\x84",
998
      "\xea\x9a\x83"     => "\xea\x9a\x82",
999
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1000
      "\xea\x99\xad"     => "\xea\x99\xac",
1001
      "\xea\x99\xab"     => "\xea\x99\xaa",
1002
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1003
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1004
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1005
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1006
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1007
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1008
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1009
      "\xea\x99\x99"     => "\xea\x99\x98",
1010
      "\xea\x99\x97"     => "\xea\x99\x96",
1011
      "\xea\x99\x95"     => "\xea\x99\x94",
1012
      "\xea\x99\x93"     => "\xea\x99\x92",
1013
      "\xea\x99\x91"     => "\xea\x99\x90",
1014
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1015
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1016
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1017
      "\xea\x99\x89"     => "\xea\x99\x88",
1018
      "\xea\x99\x87"     => "\xea\x99\x86",
1019
      "\xea\x99\x85"     => "\xea\x99\x84",
1020
      "\xea\x99\x83"     => "\xea\x99\x82",
1021
      "\xea\x99\x81"     => "\xea\x99\x80",
1022
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1023
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1024
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1025
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1026
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1027
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1028
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1029
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1030
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1031
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1032
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1033
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1034
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1035
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1036
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1037
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1038
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1039
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1040
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1041
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1042
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1043
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1044
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1045
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1046
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1047
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1048
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1049
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1050
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1051
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1052
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1053
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1054
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1055
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1056
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1057
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1058
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1059
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1060
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1061
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1062
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1063
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1064
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1065
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1066
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1067
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1068
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1069
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1070
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1071
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1072
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1073
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1074
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1075
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1076
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1077
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1078
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1079
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1080
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1081
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1082
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1083
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1084
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1085
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1086
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1087
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1088
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1089
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1090
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1091
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1092
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1093
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1094
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1095
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1096
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1097
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1098
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1099
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1100
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1101
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1102
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1103
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1104
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1105
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1106
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1107
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1108
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1109
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1110
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1111
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1112
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1113
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1114
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1115
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1116
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1117
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1118
      "\xe2\xb1\xa5"     => "\xc8\xba",
1119
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1120
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1121
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1122
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1123
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1124
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1125
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1126
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1127
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1128
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1129
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1130
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1131
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1132
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1133
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1134
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1135
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1136
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1137
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1138
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1139
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1140
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1141
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1142
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1143
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1144
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1145
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1146
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1147
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1148
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1149
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1150
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1151
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1152
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1153
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1154
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1155
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1156
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1157
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1158
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1159
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1160
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1161
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1162
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1163
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1164
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1165
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1166
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1167
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1168
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1169
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1170
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1171
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1172
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1173
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1174
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1175
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1176
      "\xe1\xbe\xbe"     => "\xce\x99",
1177
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1178
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1179
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1180
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1181
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1182
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1183
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1184
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1185
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1186
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1187
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1188
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1189
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1190
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1191
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1192
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1193
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1194
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1195
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1196
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1197
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1198
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1199
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1200
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1201
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1202
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1203
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1204
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1205
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1206
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1207
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1208
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1209
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1210
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1211
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1212
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1213
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1214
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1215
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1216
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1217
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1218
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1219
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1220
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1221
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1222
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1223
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1224
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1225
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1226
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1227
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1228
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1229
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1230
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1231
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1232
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1233
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1234
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1235
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1236
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1237
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1238
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1239
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1240
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1241
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1242
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1243
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1244
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1245
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1246
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1247
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1248
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1249
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1250
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1251
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1252
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1253
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1254
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1255
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1256
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1257
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1258
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1259
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1260
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1261
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1262
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1263
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1264
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1265
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1266
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1267
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1268
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1269
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1270
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1271
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1272
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1273
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1274
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1275
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1276
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1277
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1278
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1279
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1280
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1281
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1282
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1283
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1284
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1285
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1286
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1287
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1288
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1289
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1290
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1291
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1292
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1293
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1294
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1295
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1296
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1297
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1298
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1299
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1300
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1301
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1302
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1303
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1304
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1305
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1306
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1307
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1308
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1309
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1310
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1311
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1312
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1313
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1314
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1315
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1316
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1317
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1318
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1319
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1320
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1321
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1322
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1323
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1324
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1325
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1326
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1327
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1328
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1329
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1330
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1331
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1332
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1333
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1334
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1335
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1336
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1337
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1338
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1339
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1340
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1341
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1342
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1343
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1344
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1345
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1346
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1347
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1348
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1349
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1350
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1351
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1352
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1353
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1354
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1355
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1356
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1357
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1358
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1359
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1360
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1361
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1362
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1363
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1364
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1365
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1366
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1367
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1368
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1369
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1370
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1371
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1372
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1373
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1374
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1375
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1376
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1377
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1378
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1379
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1380
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1381
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1382
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1383
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1384
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1385
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1386
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1387
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1388
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1389
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1390
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1391
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1392
      "\xd6\x86"         => "\xd5\x96",
1393
      "\xd6\x85"         => "\xd5\x95",
1394
      "\xd6\x84"         => "\xd5\x94",
1395
      "\xd6\x83"         => "\xd5\x93",
1396
      "\xd6\x82"         => "\xd5\x92",
1397
      "\xd6\x81"         => "\xd5\x91",
1398
      "\xd6\x80"         => "\xd5\x90",
1399
      "\xd5\xbf"         => "\xd5\x8f",
1400
      "\xd5\xbe"         => "\xd5\x8e",
1401
      "\xd5\xbd"         => "\xd5\x8d",
1402
      "\xd5\xbc"         => "\xd5\x8c",
1403
      "\xd5\xbb"         => "\xd5\x8b",
1404
      "\xd5\xba"         => "\xd5\x8a",
1405
      "\xd5\xb9"         => "\xd5\x89",
1406
      "\xd5\xb8"         => "\xd5\x88",
1407
      "\xd5\xb7"         => "\xd5\x87",
1408
      "\xd5\xb6"         => "\xd5\x86",
1409
      "\xd5\xb5"         => "\xd5\x85",
1410
      "\xd5\xb4"         => "\xd5\x84",
1411
      "\xd5\xb3"         => "\xd5\x83",
1412
      "\xd5\xb2"         => "\xd5\x82",
1413
      "\xd5\xb1"         => "\xd5\x81",
1414
      "\xd5\xb0"         => "\xd5\x80",
1415
      "\xd5\xaf"         => "\xd4\xbf",
1416
      "\xd5\xae"         => "\xd4\xbe",
1417
      "\xd5\xad"         => "\xd4\xbd",
1418
      "\xd5\xac"         => "\xd4\xbc",
1419
      "\xd5\xab"         => "\xd4\xbb",
1420
      "\xd5\xaa"         => "\xd4\xba",
1421
      "\xd5\xa9"         => "\xd4\xb9",
1422
      "\xd5\xa8"         => "\xd4\xb8",
1423
      "\xd5\xa7"         => "\xd4\xb7",
1424
      "\xd5\xa6"         => "\xd4\xb6",
1425
      "\xd5\xa5"         => "\xd4\xb5",
1426
      "\xd5\xa4"         => "\xd4\xb4",
1427
      "\xd5\xa3"         => "\xd4\xb3",
1428
      "\xd5\xa2"         => "\xd4\xb2",
1429
      "\xd5\xa1"         => "\xd4\xb1",
1430
      "\xd4\xa5"         => "\xd4\xa4",
1431
      "\xd4\xa3"         => "\xd4\xa2",
1432
      "\xd4\xa1"         => "\xd4\xa0",
1433
      "\xd4\x9f"         => "\xd4\x9e",
1434
      "\xd4\x9d"         => "\xd4\x9c",
1435
      "\xd4\x9b"         => "\xd4\x9a",
1436
      "\xd4\x99"         => "\xd4\x98",
1437
      "\xd4\x97"         => "\xd4\x96",
1438
      "\xd4\x95"         => "\xd4\x94",
1439
      "\xd4\x93"         => "\xd4\x92",
1440
      "\xd4\x91"         => "\xd4\x90",
1441
      "\xd4\x8f"         => "\xd4\x8e",
1442
      "\xd4\x8d"         => "\xd4\x8c",
1443
      "\xd4\x8b"         => "\xd4\x8a",
1444
      "\xd4\x89"         => "\xd4\x88",
1445
      "\xd4\x87"         => "\xd4\x86",
1446
      "\xd4\x85"         => "\xd4\x84",
1447
      "\xd4\x83"         => "\xd4\x82",
1448
      "\xd4\x81"         => "\xd4\x80",
1449
      "\xd3\xbf"         => "\xd3\xbe",
1450
      "\xd3\xbd"         => "\xd3\xbc",
1451
      "\xd3\xbb"         => "\xd3\xba",
1452
      "\xd3\xb9"         => "\xd3\xb8",
1453
      "\xd3\xb7"         => "\xd3\xb6",
1454
      "\xd3\xb5"         => "\xd3\xb4",
1455
      "\xd3\xb3"         => "\xd3\xb2",
1456
      "\xd3\xb1"         => "\xd3\xb0",
1457
      "\xd3\xaf"         => "\xd3\xae",
1458
      "\xd3\xad"         => "\xd3\xac",
1459
      "\xd3\xab"         => "\xd3\xaa",
1460
      "\xd3\xa9"         => "\xd3\xa8",
1461
      "\xd3\xa7"         => "\xd3\xa6",
1462
      "\xd3\xa5"         => "\xd3\xa4",
1463
      "\xd3\xa3"         => "\xd3\xa2",
1464
      "\xd3\xa1"         => "\xd3\xa0",
1465
      "\xd3\x9f"         => "\xd3\x9e",
1466
      "\xd3\x9d"         => "\xd3\x9c",
1467
      "\xd3\x9b"         => "\xd3\x9a",
1468
      "\xd3\x99"         => "\xd3\x98",
1469
      "\xd3\x97"         => "\xd3\x96",
1470
      "\xd3\x95"         => "\xd3\x94",
1471
      "\xd3\x93"         => "\xd3\x92",
1472
      "\xd3\x91"         => "\xd3\x90",
1473
      "\xd3\x8f"         => "\xd3\x80",
1474
      "\xd3\x8e"         => "\xd3\x8d",
1475
      "\xd3\x8c"         => "\xd3\x8b",
1476
      "\xd3\x8a"         => "\xd3\x89",
1477
      "\xd3\x88"         => "\xd3\x87",
1478
      "\xd3\x86"         => "\xd3\x85",
1479
      "\xd3\x84"         => "\xd3\x83",
1480
      "\xd3\x82"         => "\xd3\x81",
1481
      "\xd2\xbf"         => "\xd2\xbe",
1482
      "\xd2\xbd"         => "\xd2\xbc",
1483
      "\xd2\xbb"         => "\xd2\xba",
1484
      "\xd2\xb9"         => "\xd2\xb8",
1485
      "\xd2\xb7"         => "\xd2\xb6",
1486
      "\xd2\xb5"         => "\xd2\xb4",
1487
      "\xd2\xb3"         => "\xd2\xb2",
1488
      "\xd2\xb1"         => "\xd2\xb0",
1489
      "\xd2\xaf"         => "\xd2\xae",
1490
      "\xd2\xad"         => "\xd2\xac",
1491
      "\xd2\xab"         => "\xd2\xaa",
1492
      "\xd2\xa9"         => "\xd2\xa8",
1493
      "\xd2\xa7"         => "\xd2\xa6",
1494
      "\xd2\xa5"         => "\xd2\xa4",
1495
      "\xd2\xa3"         => "\xd2\xa2",
1496
      "\xd2\xa1"         => "\xd2\xa0",
1497
      "\xd2\x9f"         => "\xd2\x9e",
1498
      "\xd2\x9d"         => "\xd2\x9c",
1499
      "\xd2\x9b"         => "\xd2\x9a",
1500
      "\xd2\x99"         => "\xd2\x98",
1501
      "\xd2\x97"         => "\xd2\x96",
1502
      "\xd2\x95"         => "\xd2\x94",
1503
      "\xd2\x93"         => "\xd2\x92",
1504
      "\xd2\x91"         => "\xd2\x90",
1505
      "\xd2\x8f"         => "\xd2\x8e",
1506
      "\xd2\x8d"         => "\xd2\x8c",
1507
      "\xd2\x8b"         => "\xd2\x8a",
1508
      "\xd2\x81"         => "\xd2\x80",
1509
      "\xd1\xbf"         => "\xd1\xbe",
1510
      "\xd1\xbd"         => "\xd1\xbc",
1511
      "\xd1\xbb"         => "\xd1\xba",
1512
      "\xd1\xb9"         => "\xd1\xb8",
1513
      "\xd1\xb7"         => "\xd1\xb6",
1514
      "\xd1\xb5"         => "\xd1\xb4",
1515
      "\xd1\xb3"         => "\xd1\xb2",
1516
      "\xd1\xb1"         => "\xd1\xb0",
1517
      "\xd1\xaf"         => "\xd1\xae",
1518
      "\xd1\xad"         => "\xd1\xac",
1519
      "\xd1\xab"         => "\xd1\xaa",
1520
      "\xd1\xa9"         => "\xd1\xa8",
1521
      "\xd1\xa7"         => "\xd1\xa6",
1522
      "\xd1\xa5"         => "\xd1\xa4",
1523
      "\xd1\xa3"         => "\xd1\xa2",
1524
      "\xd1\xa1"         => "\xd1\xa0",
1525
      "\xd1\x9f"         => "\xd0\x8f",
1526
      "\xd1\x9e"         => "\xd0\x8e",
1527
      "\xd1\x9d"         => "\xd0\x8d",
1528
      "\xd1\x9c"         => "\xd0\x8c",
1529
      "\xd1\x9b"         => "\xd0\x8b",
1530
      "\xd1\x9a"         => "\xd0\x8a",
1531
      "\xd1\x99"         => "\xd0\x89",
1532
      "\xd1\x98"         => "\xd0\x88",
1533
      "\xd1\x97"         => "\xd0\x87",
1534
      "\xd1\x96"         => "\xd0\x86",
1535
      "\xd1\x95"         => "\xd0\x85",
1536
      "\xd1\x94"         => "\xd0\x84",
1537
      "\xd1\x93"         => "\xd0\x83",
1538
      "\xd1\x92"         => "\xd0\x82",
1539
      "\xd1\x91"         => "\xd0\x81",
1540
      "\xd1\x90"         => "\xd0\x80",
1541
      "\xd1\x8f"         => "\xd0\xaf",
1542
      "\xd1\x8e"         => "\xd0\xae",
1543
      "\xd1\x8d"         => "\xd0\xad",
1544
      "\xd1\x8c"         => "\xd0\xac",
1545
      "\xd1\x8b"         => "\xd0\xab",
1546
      "\xd1\x8a"         => "\xd0\xaa",
1547
      "\xd1\x89"         => "\xd0\xa9",
1548
      "\xd1\x88"         => "\xd0\xa8",
1549
      "\xd1\x87"         => "\xd0\xa7",
1550
      "\xd1\x86"         => "\xd0\xa6",
1551
      "\xd1\x85"         => "\xd0\xa5",
1552
      "\xd1\x84"         => "\xd0\xa4",
1553
      "\xd1\x83"         => "\xd0\xa3",
1554
      "\xd1\x82"         => "\xd0\xa2",
1555
      "\xd1\x81"         => "\xd0\xa1",
1556
      "\xd1\x80"         => "\xd0\xa0",
1557
      "\xd0\xbf"         => "\xd0\x9f",
1558
      "\xd0\xbe"         => "\xd0\x9e",
1559
      "\xd0\xbd"         => "\xd0\x9d",
1560
      "\xd0\xbc"         => "\xd0\x9c",
1561
      "\xd0\xbb"         => "\xd0\x9b",
1562
      "\xd0\xba"         => "\xd0\x9a",
1563
      "\xd0\xb9"         => "\xd0\x99",
1564
      "\xd0\xb8"         => "\xd0\x98",
1565
      "\xd0\xb7"         => "\xd0\x97",
1566
      "\xd0\xb6"         => "\xd0\x96",
1567
      "\xd0\xb5"         => "\xd0\x95",
1568
      "\xd0\xb4"         => "\xd0\x94",
1569
      "\xd0\xb3"         => "\xd0\x93",
1570
      "\xd0\xb2"         => "\xd0\x92",
1571
      "\xd0\xb1"         => "\xd0\x91",
1572
      "\xd0\xb0"         => "\xd0\x90",
1573
      "\xcf\xbb"         => "\xcf\xba",
1574
      "\xcf\xb8"         => "\xcf\xb7",
1575
      "\xcf\xb5"         => "\xce\x95",
1576
      "\xcf\xb2"         => "\xcf\xb9",
1577
      "\xcf\xb1"         => "\xce\xa1",
1578
      "\xcf\xb0"         => "\xce\x9a",
1579
      "\xcf\xaf"         => "\xcf\xae",
1580
      "\xcf\xad"         => "\xcf\xac",
1581
      "\xcf\xab"         => "\xcf\xaa",
1582
      "\xcf\xa9"         => "\xcf\xa8",
1583
      "\xcf\xa7"         => "\xcf\xa6",
1584
      "\xcf\xa5"         => "\xcf\xa4",
1585
      "\xcf\xa3"         => "\xcf\xa2",
1586
      "\xcf\xa1"         => "\xcf\xa0",
1587
      "\xcf\x9f"         => "\xcf\x9e",
1588
      "\xcf\x9d"         => "\xcf\x9c",
1589
      "\xcf\x9b"         => "\xcf\x9a",
1590
      "\xcf\x99"         => "\xcf\x98",
1591
      "\xcf\x97"         => "\xcf\x8f",
1592
      "\xcf\x96"         => "\xce\xa0",
1593
      "\xcf\x95"         => "\xce\xa6",
1594
      "\xcf\x91"         => "\xce\x98",
1595
      "\xcf\x90"         => "\xce\x92",
1596
      "\xcf\x8e"         => "\xce\x8f",
1597
      "\xcf\x8d"         => "\xce\x8e",
1598
      "\xcf\x8c"         => "\xce\x8c",
1599
      "\xcf\x8b"         => "\xce\xab",
1600
      "\xcf\x8a"         => "\xce\xaa",
1601
      "\xcf\x89"         => "\xce\xa9",
1602
      "\xcf\x88"         => "\xce\xa8",
1603
      "\xcf\x87"         => "\xce\xa7",
1604
      "\xcf\x86"         => "\xce\xa6",
1605
      "\xcf\x85"         => "\xce\xa5",
1606
      "\xcf\x84"         => "\xce\xa4",
1607
      "\xcf\x83"         => "\xce\xa3",
1608
      "\xcf\x82"         => "\xce\xa3",
1609
      "\xcf\x81"         => "\xce\xa1",
1610
      "\xcf\x80"         => "\xce\xa0",
1611
      "\xce\xbf"         => "\xce\x9f",
1612
      "\xce\xbe"         => "\xce\x9e",
1613
      "\xce\xbd"         => "\xce\x9d",
1614
      "\xce\xbc"         => "\xce\x9c",
1615
      "\xce\xbb"         => "\xce\x9b",
1616
      "\xce\xba"         => "\xce\x9a",
1617
      "\xce\xb9"         => "\xce\x99",
1618
      "\xce\xb8"         => "\xce\x98",
1619
      "\xce\xb7"         => "\xce\x97",
1620
      "\xce\xb6"         => "\xce\x96",
1621
      "\xce\xb5"         => "\xce\x95",
1622
      "\xce\xb4"         => "\xce\x94",
1623
      "\xce\xb3"         => "\xce\x93",
1624
      "\xce\xb2"         => "\xce\x92",
1625
      "\xce\xb1"         => "\xce\x91",
1626
      "\xce\xaf"         => "\xce\x8a",
1627
      "\xce\xae"         => "\xce\x89",
1628
      "\xce\xad"         => "\xce\x88",
1629
      "\xce\xac"         => "\xce\x86",
1630
      "\xcd\xbd"         => "\xcf\xbf",
1631
      "\xcd\xbc"         => "\xcf\xbe",
1632
      "\xcd\xbb"         => "\xcf\xbd",
1633
      "\xcd\xb7"         => "\xcd\xb6",
1634
      "\xcd\xb3"         => "\xcd\xb2",
1635
      "\xcd\xb1"         => "\xcd\xb0",
1636
      "\xca\x92"         => "\xc6\xb7",
1637
      "\xca\x8c"         => "\xc9\x85",
1638
      "\xca\x8b"         => "\xc6\xb2",
1639
      "\xca\x8a"         => "\xc6\xb1",
1640
      "\xca\x89"         => "\xc9\x84",
1641
      "\xca\x88"         => "\xc6\xae",
1642
      "\xca\x83"         => "\xc6\xa9",
1643
      "\xca\x80"         => "\xc6\xa6",
1644
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1645
      "\xc9\xb5"         => "\xc6\x9f",
1646
      "\xc9\xb2"         => "\xc6\x9d",
1647
      "\xc9\xb1"         => "\xe2\xb1\xae",
1648
      "\xc9\xaf"         => "\xc6\x9c",
1649
      "\xc9\xab"         => "\xe2\xb1\xa2",
1650
      "\xc9\xa9"         => "\xc6\x96",
1651
      "\xc9\xa8"         => "\xc6\x97",
1652
      "\xc9\xa5"         => "\xea\x9e\x8d",
1653
      "\xc9\xa3"         => "\xc6\x94",
1654
      "\xc9\xa0"         => "\xc6\x93",
1655
      "\xc9\x9b"         => "\xc6\x90",
1656
      "\xc9\x99"         => "\xc6\x8f",
1657
      "\xc9\x97"         => "\xc6\x8a",
1658
      "\xc9\x96"         => "\xc6\x89",
1659
      "\xc9\x94"         => "\xc6\x86",
1660
      "\xc9\x93"         => "\xc6\x81",
1661
      "\xc9\x92"         => "\xe2\xb1\xb0",
1662
      "\xc9\x91"         => "\xe2\xb1\xad",
1663
      "\xc9\x90"         => "\xe2\xb1\xaf",
1664
      "\xc9\x8f"         => "\xc9\x8e",
1665
      "\xc9\x8d"         => "\xc9\x8c",
1666
      "\xc9\x8b"         => "\xc9\x8a",
1667
      "\xc9\x89"         => "\xc9\x88",
1668
      "\xc9\x87"         => "\xc9\x86",
1669
      "\xc9\x82"         => "\xc9\x81",
1670
      "\xc9\x80"         => "\xe2\xb1\xbf",
1671
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1672
      "\xc8\xbc"         => "\xc8\xbb",
1673
      "\xc8\xb3"         => "\xc8\xb2",
1674
      "\xc8\xb1"         => "\xc8\xb0",
1675
      "\xc8\xaf"         => "\xc8\xae",
1676
      "\xc8\xad"         => "\xc8\xac",
1677
      "\xc8\xab"         => "\xc8\xaa",
1678
      "\xc8\xa9"         => "\xc8\xa8",
1679
      "\xc8\xa7"         => "\xc8\xa6",
1680
      "\xc8\xa5"         => "\xc8\xa4",
1681
      "\xc8\xa3"         => "\xc8\xa2",
1682
      "\xc8\x9f"         => "\xc8\x9e",
1683
      "\xc8\x9d"         => "\xc8\x9c",
1684
      "\xc8\x9b"         => "\xc8\x9a",
1685
      "\xc8\x99"         => "\xc8\x98",
1686
      "\xc8\x97"         => "\xc8\x96",
1687
      "\xc8\x95"         => "\xc8\x94",
1688
      "\xc8\x93"         => "\xc8\x92",
1689
      "\xc8\x91"         => "\xc8\x90",
1690
      "\xc8\x8f"         => "\xc8\x8e",
1691
      "\xc8\x8d"         => "\xc8\x8c",
1692
      "\xc8\x8b"         => "\xc8\x8a",
1693
      "\xc8\x89"         => "\xc8\x88",
1694
      "\xc8\x87"         => "\xc8\x86",
1695
      "\xc8\x85"         => "\xc8\x84",
1696
      "\xc8\x83"         => "\xc8\x82",
1697
      "\xc8\x81"         => "\xc8\x80",
1698
      "\xc7\xbf"         => "\xc7\xbe",
1699
      "\xc7\xbd"         => "\xc7\xbc",
1700
      "\xc7\xbb"         => "\xc7\xba",
1701
      "\xc7\xb9"         => "\xc7\xb8",
1702
      "\xc7\xb5"         => "\xc7\xb4",
1703
      "\xc7\xb3"         => "\xc7\xb2",
1704
      "\xc7\xaf"         => "\xc7\xae",
1705
      "\xc7\xad"         => "\xc7\xac",
1706
      "\xc7\xab"         => "\xc7\xaa",
1707
      "\xc7\xa9"         => "\xc7\xa8",
1708
      "\xc7\xa7"         => "\xc7\xa6",
1709
      "\xc7\xa5"         => "\xc7\xa4",
1710
      "\xc7\xa3"         => "\xc7\xa2",
1711
      "\xc7\xa1"         => "\xc7\xa0",
1712
      "\xc7\x9f"         => "\xc7\x9e",
1713
      "\xc7\x9d"         => "\xc6\x8e",
1714
      "\xc7\x9c"         => "\xc7\x9b",
1715
      "\xc7\x9a"         => "\xc7\x99",
1716
      "\xc7\x98"         => "\xc7\x97",
1717
      "\xc7\x96"         => "\xc7\x95",
1718
      "\xc7\x94"         => "\xc7\x93",
1719
      "\xc7\x92"         => "\xc7\x91",
1720
      "\xc7\x90"         => "\xc7\x8f",
1721
      "\xc7\x8e"         => "\xc7\x8d",
1722
      "\xc7\x8c"         => "\xc7\x8b",
1723
      "\xc7\x89"         => "\xc7\x88",
1724
      "\xc7\x86"         => "\xc7\x85",
1725
      "\xc6\xbf"         => "\xc7\xb7",
1726
      "\xc6\xbd"         => "\xc6\xbc",
1727
      "\xc6\xb9"         => "\xc6\xb8",
1728
      "\xc6\xb6"         => "\xc6\xb5",
1729
      "\xc6\xb4"         => "\xc6\xb3",
1730
      "\xc6\xb0"         => "\xc6\xaf",
1731
      "\xc6\xad"         => "\xc6\xac",
1732
      "\xc6\xa8"         => "\xc6\xa7",
1733
      "\xc6\xa5"         => "\xc6\xa4",
1734
      "\xc6\xa3"         => "\xc6\xa2",
1735
      "\xc6\xa1"         => "\xc6\xa0",
1736
      "\xc6\x9e"         => "\xc8\xa0",
1737
      "\xc6\x9a"         => "\xc8\xbd",
1738
      "\xc6\x99"         => "\xc6\x98",
1739
      "\xc6\x95"         => "\xc7\xb6",
1740
      "\xc6\x92"         => "\xc6\x91",
1741
      "\xc6\x8c"         => "\xc6\x8b",
1742
      "\xc6\x88"         => "\xc6\x87",
1743
      "\xc6\x85"         => "\xc6\x84",
1744
      "\xc6\x83"         => "\xc6\x82",
1745
      "\xc6\x80"         => "\xc9\x83",
1746
      "\xc5\xbf"         => "\x53",
1747
      "\xc5\xbe"         => "\xc5\xbd",
1748
      "\xc5\xbc"         => "\xc5\xbb",
1749
      "\xc5\xba"         => "\xc5\xb9",
1750
      "\xc5\xb7"         => "\xc5\xb6",
1751
      "\xc5\xb5"         => "\xc5\xb4",
1752
      "\xc5\xb3"         => "\xc5\xb2",
1753
      "\xc5\xb1"         => "\xc5\xb0",
1754
      "\xc5\xaf"         => "\xc5\xae",
1755
      "\xc5\xad"         => "\xc5\xac",
1756
      "\xc5\xab"         => "\xc5\xaa",
1757
      "\xc5\xa9"         => "\xc5\xa8",
1758
      "\xc5\xa7"         => "\xc5\xa6",
1759
      "\xc5\xa5"         => "\xc5\xa4",
1760
      "\xc5\xa3"         => "\xc5\xa2",
1761
      "\xc5\xa1"         => "\xc5\xa0",
1762
      "\xc5\x9f"         => "\xc5\x9e",
1763
      "\xc5\x9d"         => "\xc5\x9c",
1764
      "\xc5\x9b"         => "\xc5\x9a",
1765
      "\xc5\x99"         => "\xc5\x98",
1766
      "\xc5\x97"         => "\xc5\x96",
1767
      "\xc5\x95"         => "\xc5\x94",
1768
      "\xc5\x93"         => "\xc5\x92",
1769
      "\xc5\x91"         => "\xc5\x90",
1770
      "\xc5\x8f"         => "\xc5\x8e",
1771
      "\xc5\x8d"         => "\xc5\x8c",
1772
      "\xc5\x8b"         => "\xc5\x8a",
1773
      "\xc5\x88"         => "\xc5\x87",
1774
      "\xc5\x86"         => "\xc5\x85",
1775
      "\xc5\x84"         => "\xc5\x83",
1776
      "\xc5\x82"         => "\xc5\x81",
1777
      "\xc5\x80"         => "\xc4\xbf",
1778
      "\xc4\xbe"         => "\xc4\xbd",
1779
      "\xc4\xbc"         => "\xc4\xbb",
1780
      "\xc4\xba"         => "\xc4\xb9",
1781
      "\xc4\xb7"         => "\xc4\xb6",
1782
      "\xc4\xb5"         => "\xc4\xb4",
1783
      "\xc4\xb3"         => "\xc4\xb2",
1784
      "\xc4\xb1"         => "\x49",
1785
      "\xc4\xaf"         => "\xc4\xae",
1786
      "\xc4\xad"         => "\xc4\xac",
1787
      "\xc4\xab"         => "\xc4\xaa",
1788
      "\xc4\xa9"         => "\xc4\xa8",
1789
      "\xc4\xa7"         => "\xc4\xa6",
1790
      "\xc4\xa5"         => "\xc4\xa4",
1791
      "\xc4\xa3"         => "\xc4\xa2",
1792
      "\xc4\xa1"         => "\xc4\xa0",
1793
      "\xc4\x9f"         => "\xc4\x9e",
1794
      "\xc4\x9d"         => "\xc4\x9c",
1795
      "\xc4\x9b"         => "\xc4\x9a",
1796
      "\xc4\x99"         => "\xc4\x98",
1797
      "\xc4\x97"         => "\xc4\x96",
1798
      "\xc4\x95"         => "\xc4\x94",
1799
      "\xc4\x93"         => "\xc4\x92",
1800
      "\xc4\x91"         => "\xc4\x90",
1801
      "\xc4\x8f"         => "\xc4\x8e",
1802
      "\xc4\x8d"         => "\xc4\x8c",
1803
      "\xc4\x8b"         => "\xc4\x8a",
1804
      "\xc4\x89"         => "\xc4\x88",
1805
      "\xc4\x87"         => "\xc4\x86",
1806
      "\xc4\x85"         => "\xc4\x84",
1807
      "\xc4\x83"         => "\xc4\x82",
1808
      "\xc4\x81"         => "\xc4\x80",
1809
      "\xc3\xbf"         => "\xc5\xb8",
1810
      "\xc3\xbe"         => "\xc3\x9e",
1811
      "\xc3\xbd"         => "\xc3\x9d",
1812
      "\xc3\xbc"         => "\xc3\x9c",
1813
      "\xc3\xbb"         => "\xc3\x9b",
1814
      "\xc3\xba"         => "\xc3\x9a",
1815
      "\xc3\xb9"         => "\xc3\x99",
1816
      "\xc3\xb8"         => "\xc3\x98",
1817
      "\xc3\xb6"         => "\xc3\x96",
1818
      "\xc3\xb5"         => "\xc3\x95",
1819
      "\xc3\xb4"         => "\xc3\x94",
1820
      "\xc3\xb3"         => "\xc3\x93",
1821
      "\xc3\xb2"         => "\xc3\x92",
1822
      "\xc3\xb1"         => "\xc3\x91",
1823
      "\xc3\xb0"         => "\xc3\x90",
1824
      "\xc3\xaf"         => "\xc3\x8f",
1825
      "\xc3\xae"         => "\xc3\x8e",
1826
      "\xc3\xad"         => "\xc3\x8d",
1827
      "\xc3\xac"         => "\xc3\x8c",
1828
      "\xc3\xab"         => "\xc3\x8b",
1829
      "\xc3\xaa"         => "\xc3\x8a",
1830
      "\xc3\xa9"         => "\xc3\x89",
1831
      "\xc3\xa8"         => "\xc3\x88",
1832
      "\xc3\xa7"         => "\xc3\x87",
1833
      "\xc3\xa6"         => "\xc3\x86",
1834
      "\xc3\xa5"         => "\xc3\x85",
1835
      "\xc3\xa4"         => "\xc3\x84",
1836
      "\xc3\xa3"         => "\xc3\x83",
1837
      "\xc3\xa2"         => "\xc3\x82",
1838
      "\xc3\xa1"         => "\xc3\x81",
1839
      "\xc3\xa0"         => "\xc3\x80",
1840
      "\xc2\xb5"         => "\xce\x9c",
1841
      "\x7a"             => "\x5a",
1842
      "\x79"             => "\x59",
1843
      "\x78"             => "\x58",
1844
      "\x77"             => "\x57",
1845
      "\x76"             => "\x56",
1846
      "\x75"             => "\x55",
1847
      "\x74"             => "\x54",
1848
      "\x73"             => "\x53",
1849
      "\x72"             => "\x52",
1850
      "\x71"             => "\x51",
1851
      "\x70"             => "\x50",
1852
      "\x6f"             => "\x4f",
1853
      "\x6e"             => "\x4e",
1854
      "\x6d"             => "\x4d",
1855
      "\x6c"             => "\x4c",
1856
      "\x6b"             => "\x4b",
1857
      "\x6a"             => "\x4a",
1858
      "\x69"             => "\x49",
1859
      "\x68"             => "\x48",
1860
      "\x67"             => "\x47",
1861 157
      "\x66"             => "\x46",
1862
      "\x65"             => "\x45",
1863 157
      "\x64"             => "\x44",
1864
      "\x63"             => "\x43",
1865 1
      "\x62"             => "\x42",
1866 1
      "\x61"             => "\x41",
1867 1
1868 1
    );
1869 1
1870 157
    return $case;
1871
  }
1872
1873
  /**
1874
   * This method will auto-detect your server environment for UTF-8 support.
1875
   *
1876
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1877
   */
1878
  public static function checkForSupport()
1879 8
  {
1880
    if (!isset(self::$support['mbstring'])) {
1881 8
1882
      self::$support['mbstring'] = self::mbstring_loaded();
1883 8
      self::$support['iconv'] = self::iconv_loaded();
1884
      self::$support['intl'] = self::intl_loaded();
1885
      self::$support['intlChar'] = self::intlChar_loaded();
1886
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1887
    }
1888
  }
1889
1890 8
  /**
1891
   * Generates a UTF-8 encoded character from the given code point.
1892
   *
1893
   * @param    int $code_point The code point for which to generate a character.
1894
   *
1895
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1896
   */
1897
  public static function chr($code_point)
1898
  {
1899
    self::checkForSupport();
1900
1901
    $i = (int)$code_point;
1902 1
1903
    if (self::$support['intlChar'] === true) {
1904 1
      return \IntlChar::chr($code_point);
1905
    }
1906 1
1907
    if ($i !== $code_point) {
1908
      $i = (int)self::hex_to_int($code_point);
1909
    }
1910
1911
    if (!$i) {
1912
      return '';
1913
    }
1914
1915
    return self::html_entity_decode("&#{$i};", ENT_QUOTES);
1916
  }
1917
1918
  /**
1919
   * Applies callback to all characters of a string.
1920
   *
1921 2
   * @param  string|array $callback The callback function.
1922
   * @param  string       $str      UTF-8 string to run callback on.
1923 2
   *
1924 2
   * @return array The outcome of callback.
1925
   */
1926
  public static function chr_map($callback, $str)
1927 2
  {
1928
    $chars = self::split($str);
1929
1930
    return array_map($callback, $chars);
1931
  }
1932
1933
  /**
1934
   * Generates an array of byte length of each character of a Unicode string.
1935
   *
1936
   * 1 byte => U+0000  - U+007F
1937 2
   * 2 byte => U+0080  - U+07FF
1938
   * 3 byte => U+0800  - U+FFFF
1939 2
   * 4 byte => U+10000 - U+10FFFF
1940 2
   *
1941 2
   * @param    string $str The original Unicode string.
1942
   *
1943 2
   * @return   array An array of byte lengths of each character.
1944
   */
1945 2
  public static function chr_size_list($str)
1946
  {
1947
    if (!$str) {
1948 2
      return array();
1949
    }
1950 2
1951 2
    return array_map('strlen', self::split($str));
1952 2
  }
1953
1954 1
  /**
1955 1
   * Get a decimal code representation of a specific character.
1956 1
   *
1957
   * @param   string $char The input character
1958
   *
1959
   * @return  int
1960
   */
1961
  public static function chr_to_decimal($char)
1962 2
  {
1963
    $char = (string)$char;
1964 2
    $code = self::ord($char[0]);
1965 2
    $bytes = 1;
1966
1967 2
    if (!($code & 0x80)) {
1968
      // 0xxxxxxx
1969
      return $code;
1970
    }
1971
1972
    if (($code & 0xe0) === 0xc0) {
1973
      // 110xxxxx
1974
      $bytes = 2;
1975
      $code &= ~0xc0;
1976
    } elseif (($code & 0xf0) === 0xe0) {
1977
      // 1110xxxx
1978
      $bytes = 3;
1979
      $code &= ~0xe0;
1980
    } elseif (($code & 0xf8) === 0xf0) {
1981
      // 11110xxx
1982
      $bytes = 4;
1983
      $code &= ~0xf0;
1984
    }
1985
1986
    for ($i = 2; $i <= $bytes; $i++) {
1987
      // 10xxxxxx
1988
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1989
    }
1990
1991
    return $code;
1992
  }
1993 1
1994
  /**
1995 1
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1996
   *
1997
   * @param    string $char The input character
1998
   * @param    string $pfix
1999
   *
2000
   * @return   string The code point encoded as U+xxxx
2001
   */
2002
  public static function chr_to_hex($char, $pfix = 'U+')
2003
  {
2004
    return self::int_to_hex(self::ord($char), $pfix);
2005
  }
2006
2007
  /**
2008
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2009 35
   *
2010
   * @param    string $body     The original string to be split.
2011
   * @param    int    $chunklen The maximum character length of a chunk.
2012
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2013
   *
2014
   * @return   string The chunked string
2015
   */
2016
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2017
  {
2018
    return implode($end, self::split($body, $chunklen));
2019
  }
2020
2021
  /**
2022
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2023
   *
2024 35
   * @param string $str                     The string to be sanitized.
2025 35
   * @param bool   $remove_bom
2026
   * @param bool   $normalize_whitespace
2027 35
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2028 35
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2029
   *
2030 35
   * @return string Clean UTF-8 encoded string
2031 7
   */
2032 7
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2033
  {
2034 35
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2035 1
    // caused connection reset problem on larger strings
2036 1
2037
    $regx = '/
2038 35
      (
2039 4
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2040 4
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2041
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2042 35
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2043
        ){1,100}                      # ...one or more times
2044
      )
2045
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2046
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2047
    /x';
2048
    $str = preg_replace($regx, '$1', $str);
2049
2050
    $str = self::replace_diamond_question_mark($str, '');
2051
    $str = self::remove_invisible_characters($str);
2052 3
2053
    if ($normalize_whitespace === true) {
2054 3
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2055
    }
2056 3
2057 1
    if ($normalize_msword === true) {
2058
      $str = self::normalize_msword($str);
2059
    }
2060
2061 3
    if ($remove_bom === true) {
2062
      $str = self::removeBOM($str);
2063
    }
2064
2065
    return $str;
2066
  }
2067
2068 3
  /**
2069
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2070 3
   *
2071
   * @param string $str
2072
   *
2073
   * @return string
2074
   */
2075
  public static function cleanup($str)
2076
  {
2077
    $str = (string)$str;
2078
2079
    if (!isset($str[0])) {
2080
      return '';
2081
    }
2082 3
2083
    // fixed ISO <-> UTF-8 Errors
2084 3
    $str = self::fix_simple_utf8($str);
2085 3
2086 3
    // remove all none UTF-8 symbols
2087
    // && remove diamond question mark (�)
2088 3
    // && remove remove invisible characters (e.g. "\0")
2089
    // && remove BOM
2090 3
    // && normalize whitespace chars (but keep non-breaking-spaces)
2091 3
    $str = self::clean($str, true, true, false, true);
2092 3
2093
    return (string)$str;
2094 3
  }
2095
2096 3
  /**
2097
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2098
   *
2099
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2100
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2101
   *                                    default, code points will be returned as integers.
2102
   *
2103
   * @return   array The array of code points
2104
   */
2105
  public static function codepoints($arg, $u_style = false)
2106 3
  {
2107
    if (is_string($arg)) {
2108
      $arg = self::split($arg);
2109
    }
2110
2111
    $arg = array_map(
2112
        array(
2113
            '\\voku\\helper\\UTF8',
2114
            'ord',
2115
        ),
2116
        $arg
2117 3
    );
2118
2119 3
    if ($u_style) {
2120
      $arg = array_map(
2121 3
          array(
2122
              '\\voku\\helper\\UTF8',
2123 3
              'int_to_hex',
2124
          ),
2125
          $arg
2126
      );
2127
    }
2128
2129
    return $arg;
2130
  }
2131
2132
  /**
2133 1
   * Returns count of characters used in a string.
2134
   *
2135 1
   * @param    string $str The input string.
2136
   *
2137 1
   * @return   array An associative array of Character as keys and
2138 1
   *           their count as values.
2139 1
   */
2140
  public static function count_chars($str)
2141 1
  {
2142
    return array_count_values(self::split($str));
2143
  }
2144
2145
  /**
2146
   * Get a UTF-8 character from its decimal code representation.
2147
   *
2148
   * @param   int $code Code.
2149
   *
2150
   * @return  string
2151
   */
2152
  public static function decimal_to_chr($code)
2153
  {
2154
    self::checkForSupport();
2155 11
2156
    return \mb_convert_encoding(
2157 11
        '&#x' . dechex($code) . ';',
2158
        'UTF-8',
2159 11
        'HTML-ENTITIES'
2160 11
    );
2161
  }
2162
2163 1
  /**
2164 1
   * Encode a string with a new charset-encoding.
2165
   *
2166
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2167
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2168
   *
2169
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2170
   * @param string $str      the string
2171
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2172
   *                         otherwise we auto-detect the current string-encoding
2173
   *
2174
   * @return string
2175
   */
2176
  public static function encode($encoding, $str, $force = true)
2177
  {
2178
    $str = (string)$str;
2179
    $encoding = (string)$encoding;
2180
2181
    if (!isset($str[0], $encoding[0])) {
2182
      return $str;
2183
    }
2184
2185
    $encoding = self::normalizeEncoding($encoding);
2186
    $encodingDetected = self::str_detect_encoding($str);
2187
2188
    if (
2189
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2190
        &&
2191
        (
2192
            $force === true
2193
            ||
2194
            $encodingDetected !== $encoding
2195
        )
2196
    ) {
2197
      self::checkForSupport();
2198
2199 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2200
          $encoding === 'UTF-8'
2201
          &&
2202
          (
2203
              $force === true
2204
              || $encodingDetected === 'UTF-8'
2205
              || $encodingDetected === 'WINDOWS-1252'
2206
              || $encodingDetected === 'ISO-8859-1'
2207
          )
2208
      ) {
2209
        return self::to_utf8($str);
2210
      }
2211
2212 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2213
          $encoding === 'ISO-8859-1'
2214
          &&
2215
          (
2216
              $force === true
2217
              || $encodingDetected === 'ISO-8859-1'
2218
              || $encodingDetected === 'UTF-8'
2219
          )
2220
      ) {
2221
        return self::to_win1252($str);
2222
      }
2223
2224
      $strEncoded = \mb_convert_encoding(
2225
          $str,
2226
          $encoding,
2227
          $encodingDetected
2228
      );
2229
2230
      if ($strEncoded) {
2231
        return $strEncoded;
2232
      }
2233
    }
2234
2235
    return $str;
2236
  }
2237
2238
  /**
2239
   * Callback function for preg_replace_callback use.
2240
   *
2241
   * @internal used for "UTF8::html_entity_decode()"
2242
   *
2243
   * @param  array $matches PREG matches
2244
   *
2245
   * @return string
2246
   */
2247
  protected static function html_entity_decode_callback($matches)
2248
  {
2249
    self::checkForSupport();
2250
2251
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2252 2
2253
    if ($return === "'") {
2254
      return '&#x27;';
2255 2
    }
2256 2
2257
    return $return;
2258 2
  }
2259 2
2260
  /**
2261
   * Reads entire file into a string.
2262
   *
2263 2
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2264 2
   *
2265
   * @link http://php.net/manual/en/function.file-get-contents.php
2266 2
   *
2267 2
   * @param string        $filename      <p>
2268
   *                                     Name of the file to read.
2269 2
   *                                     </p>
2270 1
   * @param int|null      $flags         [optional] <p>
2271 1
   *                                     Prior to PHP 6, this parameter is called
2272 2
   *                                     use_include_path and is a bool.
2273
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2274
   *                                     to trigger include path
2275
   *                                     search.
2276 2
   *                                     </p>
2277
   *                                     <p>
2278
   *                                     The value of flags can be any combination of
2279
   *                                     the following flags (with some restrictions), joined with the
2280 2
   *                                     binary OR (|)
2281 2
   *                                     operator.
2282
   *                                     </p>
2283 2
   *                                     <p>
2284
   *                                     <table>
2285 2
   *                                     Available flags
2286 1
   *                                     <tr valign="top">
2287 1
   *                                     <td>Flag</td>
2288 1
   *                                     <td>Description</td>
2289 1
   *                                     </tr>
2290 1
   *                                     <tr valign="top">
2291 1
   *                                     <td>
2292
   *                                     FILE_USE_INCLUDE_PATH
2293 2
   *                                     </td>
2294 2
   *                                     <td>
2295 2
   *                                     Search for filename in the include directory.
2296 2
   *                                     See include_path for more
2297
   *                                     information.
2298
   *                                     </td>
2299 2
   *                                     </tr>
2300
   *                                     <tr valign="top">
2301
   *                                     <td>
2302
   *                                     FILE_TEXT
2303
   *                                     </td>
2304
   *                                     <td>
2305
   *                                     As of PHP 6, the default encoding of the read
2306
   *                                     data is UTF-8. You can specify a different encoding by creating a
2307
   *                                     custom context or by changing the default using
2308
   *                                     stream_default_encoding. This flag cannot be
2309 1
   *                                     used with FILE_BINARY.
2310
   *                                     </td>
2311 1
   *                                     </tr>
2312
   *                                     <tr valign="top">
2313
   *                                     <td>
2314
   *                                     FILE_BINARY
2315
   *                                     </td>
2316
   *                                     <td>
2317
   *                                     With this flag, the file is read in binary mode. This is the default
2318
   *                                     setting and cannot be used with FILE_TEXT.
2319
   *                                     </td>
2320
   *                                     </tr>
2321
   *                                     </table>
2322
   *                                     </p>
2323 7
   * @param resource|null $context       [optional] <p>
2324
   *                                     A valid context resource created with
2325 7
   *                                     stream_context_create. If you don't need to use a
2326 7
   *                                     custom context, you can skip this parameter by &null;.
2327 2
   *                                     </p>
2328
   * @param int|null      $offset        [optional] <p>
2329 1
   *                                     The offset where the reading starts.
2330 2
   *                                     </p>
2331 2
   * @param int|null      $maxlen        [optional] <p>
2332 7
   *                                     Maximum length of data read. The default is to read until end
2333 1
   *                                     of file is reached.
2334 1
   *                                     </p>
2335 1
   * @param int           $timeout
2336 1
   *
2337 7
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2338 7
   *                                     non default utf-8 chars
2339
   *
2340
   * @return string The function returns the read data or false on failure.
2341
   */
2342 7
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2343 7
  {
2344 1
    // init
2345 1
    $timeout = (int)$timeout;
2346 7
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2347
2348 7
    if ($timeout && $context === null) {
2349 5
      $context = stream_context_create(
2350 5
          array(
2351 4
              'http' =>
2352
                  array(
2353
                      'timeout' => $timeout,
2354
                  ),
2355 7
          )
2356
      );
2357
    }
2358
2359
    if (is_int($maxlen)) {
2360 7
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2361 7
    } else {
2362 7
      $data = file_get_contents($filename, $flags, $context, $offset);
2363
    }
2364 7
2365
    // return false on error
2366
    if ($data === false) {
2367
      return false;
2368
    }
2369
2370
    if ($convertToUtf8 === true) {
2371
      self::checkForSupport();
2372
2373
      $data = self::encode('UTF-8', $data, false);
2374
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2375
    }
2376
2377
    // clean utf-8 string
2378
    return $data;
2379
  }
2380
2381
  /**
2382
   * Checks if a file starts with BOM (Byte Order Mark) character.
2383
   *
2384
   * @param    string $file_path Path to a valid file.
2385
   *
2386
   * @return   bool True if the file has BOM at the start, False otherwise.
2387
   */
2388
  public static function file_has_bom($file_path)
2389
  {
2390
    return self::string_has_bom(file_get_contents($file_path));
2391
  }
2392
2393
  /**
2394
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2395
   *
2396
   * @param mixed  $var
2397
   * @param int    $normalization_form
2398
   * @param string $leading_combining
2399
   *
2400
   * @return mixed
2401
   */
2402
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
2403
  {
2404
    switch (gettype($var)) {
2405 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2406
        foreach ($var as $k => $v) {
2407
          /** @noinspection AlterInForeachInspection */
2408
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2409
        }
2410
        break;
2411 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2412
        foreach ($var as $k => $v) {
2413
          $var->$k = self::filter($v, $normalization_form, $leading_combining);
2414
        }
2415
        break;
2416
      case 'string':
2417 1
        if (false !== strpos($var, "\r")) {
2418
          // Workaround https://bugs.php.net/65732
2419 1
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2420 1
        }
2421 1
        if (preg_match('/[\x80-\xFF]/', $var)) {
2422 1
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2423
            $n = '-';
2424
          } else {
2425 1
            $n = \Normalizer::normalize($var, $normalization_form);
2426
2427
            if (isset($n[0])) {
2428
              $var = $n;
2429
            } else {
2430
              $var = self::encode('UTF-8', $var);
2431
            }
2432
2433
          }
2434
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2435
            // Prevent leading combining chars
2436
            // for NFC-safe concatenations.
2437 1
            $var = $leading_combining . $var;
2438
          }
2439 1
        }
2440 1
        break;
2441 1
    }
2442 1
2443
    return $var;
2444
  }
2445 1
2446
  /**
2447
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2448
   *
2449
   * @param int    $type
2450
   * @param string $var
2451
   * @param int    $filter
2452
   * @param mixed  $option
2453
   *
2454
   * @return mixed
2455
   */
2456 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2457 1
  {
2458
    if (4 > func_num_args()) {
2459 1
      $var = filter_input($type, $var, $filter);
2460
    } else {
2461
      $var = filter_input($type, $var, $filter, $option);
2462
    }
2463
2464
    return self::filter($var);
2465
  }
2466
2467
  /**
2468
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2469 8
   *
2470
   * @param int   $type
2471 8
   * @param mixed $definition
2472 8
   * @param bool  $add_empty
2473
   *
2474 8
   * @return mixed
2475
   */
2476 8 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2477 2
  {
2478
    if (2 > func_num_args()) {
2479
      $a = filter_input_array($type);
2480 8
    } else {
2481 1
      $a = filter_input_array($type, $definition, $add_empty);
2482 1
    }
2483 1
2484
    return self::filter($a);
2485 8
  }
2486
2487
  /**
2488
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2489
   *
2490
   * @param mixed $var
2491
   * @param int   $filter
2492
   * @param mixed $option
2493
   *
2494
   * @return mixed
2495 1
   */
2496 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2497 1
  {
2498
    if (3 > func_num_args()) {
2499
      $var = filter_var($var, $filter);
2500
    } else {
2501
      $var = filter_var($var, $filter, $option);
2502
    }
2503
2504
    return self::filter($var);
2505
  }
2506
2507 1
  /**
2508 1
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2509 1
   *
2510 1
   * @param array $data
2511 1
   * @param mixed $definition
2512
   * @param bool  $add_empty
2513 1
   *
2514
   * @return mixed
2515
   */
2516 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2517
  {
2518
    if (2 > func_num_args()) {
2519
      $a = filter_var_array($data);
2520
    } else {
2521
      $a = filter_var_array($data, $definition, $add_empty);
2522
    }
2523 1
2524
    return self::filter($a);
2525 1
  }
2526
2527 1
  /**
2528 1
   * Check if the number of unicode characters are not more than the specified integer.
2529
   *
2530
   * @param    string $str      The original string to be checked.
2531 1
   * @param    int    $box_size The size in number of chars to be checked against string.
2532
   *
2533 1
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2534 1
   */
2535 1
  public static function fits_inside($str, $box_size)
2536 1
  {
2537 1
    return (self::strlen($str) <= $box_size);
2538 1
  }
2539 1
2540 1
  /**
2541 1
   * Try to fix simple broken UTF-8 strings.
2542 1
   *
2543 1
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2544
   *
2545
   * @param string $str
2546
   *
2547
   * @return string
2548
   */
2549
  public static function fix_simple_utf8($str)
2550
  {
2551
    static $brokenUtf8ToUtf8Keys = null;
2552
    static $brokenUtf8ToUtf8Values = null;
2553
2554
    $str = (string)$str;
2555
2556
    if (!isset($str[0])) {
2557
      return '';
2558
    }
2559
2560
    if ($brokenUtf8ToUtf8Keys === null) {
2561
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2562
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2563 1
    }
2564 1
2565
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2566
  }
2567
2568
  /**
2569
   * Fix a double (or multiple) encoded UTF8 string.
2570
   *
2571
   * @param string|string[] $str You can use a string or an array of strings.
2572
   *
2573
   * @return mixed
2574
   */
2575
  public static function fix_utf8($str)
2576
  {
2577
    if (is_array($str)) {
2578
2579
      foreach ($str as $k => $v) {
2580
        /** @noinspection AlterInForeachInspection */
2581
        $str[$k] = self::fix_utf8($v);
2582
      }
2583
2584
      return $str;
2585
    }
2586
2587
    $last = '';
2588
    while ($last !== $str) {
2589
      $last = $str;
2590
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2590 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2591
    }
2592
2593
    return $str;
2594
  }
2595
2596
  /**
2597
   * Get character of a specific character.
2598
   *
2599
   * @param   string $char Character.
2600
   *
2601
   * @return  string 'RTL' or 'LTR'
2602
   */
2603
  public static function getCharDirection($char)
2604
  {
2605
    // init
2606
    self::checkForSupport();
2607
2608
    if (self::$support['intlChar'] === true) {
2609
      $tmpReturn = \IntlChar::charDirection($char);
2610
2611
      // from "IntlChar"-Class
2612
      $charDirection = array(
2613
          'RTL' => array(1, 13, 14, 15, 21),
2614
          'LTR' => array(0, 11, 12, 20),
2615
      );
2616
2617
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2618
        return 'LTR';
2619
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2620
        return 'RTL';
2621
      }
2622
    }
2623 2
2624
    $c = static::chr_to_decimal($char);
2625 2
2626 2
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2627 2
      return 'LTR';
2628
    }
2629
2630
    if (0x85e >= $c) {
2631
2632
      if (0x5be === $c ||
2633
          0x5c0 === $c ||
2634
          0x5c3 === $c ||
2635
          0x5c6 === $c ||
2636
          (0x5d0 <= $c && 0x5ea >= $c) ||
2637
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2638
          0x608 === $c ||
2639
          0x60b === $c ||
2640 1
          0x60d === $c ||
2641
          0x61b === $c ||
2642 1
          (0x61e <= $c && 0x64a >= $c) ||
2643 1
          (0x66d <= $c && 0x66f >= $c) ||
2644
          (0x671 <= $c && 0x6d5 >= $c) ||
2645 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2646 1
          (0x6ee <= $c && 0x6ef >= $c) ||
2647
          (0x6fa <= $c && 0x70d >= $c) ||
2648
          0x710 === $c ||
2649
          (0x712 <= $c && 0x72f >= $c) ||
2650 1
          (0x74d <= $c && 0x7a5 >= $c) ||
2651
          0x7b1 === $c ||
2652 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
2653 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2654 1
          0x7fa === $c ||
2655
          (0x800 <= $c && 0x815 >= $c) ||
2656 1
          0x81a === $c ||
2657 1
          0x824 === $c ||
2658 1
          0x828 === $c ||
2659 1
          (0x830 <= $c && 0x83e >= $c) ||
2660 1
          (0x840 <= $c && 0x858 >= $c) ||
2661
          0x85e === $c
2662 1
      ) {
2663
        return 'RTL';
2664 1
      }
2665 1
2666
    } elseif (0x200f === $c) {
2667
2668
      return 'RTL';
2669 1
2670 1
    } elseif (0xfb1d <= $c) {
2671
2672 1
      if (0xfb1d === $c ||
2673
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2674 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2675 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2676 1
          0xfb3e === $c ||
2677
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2678 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2679
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2680
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2681
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2682
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2683
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2684
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2685
          (0xfe76 <= $c && 0xfefc >= $c) ||
2686
          (0x10800 <= $c && 0x10805 >= $c) ||
2687
          0x10808 === $c ||
2688
          (0x1080a <= $c && 0x10835 >= $c) ||
2689
          (0x10837 <= $c && 0x10838 >= $c) ||
2690
          0x1083c === $c ||
2691
          (0x1083f <= $c && 0x10855 >= $c) ||
2692
          (0x10857 <= $c && 0x1085f >= $c) ||
2693
          (0x10900 <= $c && 0x1091b >= $c) ||
2694
          (0x10920 <= $c && 0x10939 >= $c) ||
2695
          0x1093f === $c ||
2696
          0x10a00 === $c ||
2697
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2698
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2699
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2700
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2701
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2702
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2703
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2704
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2705
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2706
          (0x10b78 <= $c && 0x10b7f >= $c)
2707
      ) {
2708 1
        return 'RTL';
2709
      }
2710 1
    }
2711 1
2712
    return 'LTR';
2713 1
  }
2714 1
2715 1
  /**
2716 1
   * get data from "/data/*.ser"
2717 1
   *
2718 1
   * @param string $file
2719
   *
2720
   * @return bool|string|array|int false on error
2721
   */
2722
  protected static function getData($file)
2723
  {
2724
    $file = __DIR__ . '/data/' . $file . '.php';
2725
    if (file_exists($file)) {
2726
      /** @noinspection PhpIncludeInspection */
2727
      return require $file;
2728
    } else {
2729
      return false;
2730
    }
2731
  }
2732
2733
  /**
2734
   * Creates a random string of UTF-8 characters.
2735
   *
2736
   * WARNING: This method does not create a hash of something, maybe it will be renamed in future.
2737
   *
2738
   * @param    int $len The length of string in characters.
2739
   *
2740
   * @return   string String consisting of random characters.
2741
   *
2742
   * @deprecated
2743
   */
2744
  public static function hash($len = 8)
2745
  {
2746
    static $chars = array();
2747
    static $chars_len = null;
2748
2749
    if ($len <= 0) {
2750
      return '';
2751
    }
2752
2753
    // init
2754
    self::checkForSupport();
2755
2756
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2757
      if (self::$support['pcre_utf8'] === true) {
2758
        $chars = array_map(
2759
            array(
2760
                '\\voku\\helper\\UTF8',
2761
                'chr',
2762
            ),
2763
            range(48, 79)
2764
        );
2765
2766
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2767
2768
        $chars = array_values(array_filter($chars));
2769
      } else {
2770
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2771
      }
2772
2773
      $chars_len = count($chars);
2774
    }
2775
2776
    $hash = '';
2777
2778
    for (; $len; --$len) {
2779
      $hash .= $chars[mt_rand() % $chars_len];
2780
    }
2781
2782
    return $hash;
2783
  }
2784
2785
  /**
2786
   * Converts hexadecimal U+xxxx code point representation to integer.
2787
   *
2788
   * INFO: opposite to UTF8::int_to_hex()
2789
   *
2790 15
   * @param    string $str The hexadecimal code point representation.
2791
   *
2792 15
   * @return   int The code point, or 0 on failure.
2793
   */
2794 15
  public static function hex_to_int($str)
2795 3
  {
2796
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2797
      return intval($match[1], 16);
2798 15
    }
2799 4
2800
    return 0;
2801
  }
2802 15
2803 3
  /**
2804 3
   * alias for "UTF8::html_entity_decode()"
2805 3
   *
2806
   * @param string $str
2807
   * @param int    $flags
2808 3
   * @param string $encoding
2809
   *
2810
   * @return string
2811 15
   */
2812
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2813 15
  {
2814
    return self::html_entity_decode($str, $flags, $encoding);
2815
  }
2816 15
2817 15
  /**
2818 15
   * Converts a UTF-8 string to a series of HTML numbered entities.
2819
   *
2820 15
   * INFO: opposite to UTF8::html_decode()
2821
   *
2822 15
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2823
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2824 15
   * @param  string $encoding
2825
   *
2826
   * @return string HTML numbered entities.
2827
   */
2828
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2829
  {
2830
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2831
    if (function_exists('mb_encode_numericentity')) {
2832
2833
      $startCode = 0x00;
2834 12
      if ($keepAsciiChars === true) {
2835
        $startCode = 0x80;
2836 12
      }
2837
2838 12
      $encoding = self::normalizeEncoding($encoding);
2839
2840 12
      return mb_encode_numericentity(
2841 5
          $str,
2842
          array($startCode, 0xffff, 0, 0xffff,),
2843
          $encoding
2844 11
      );
2845
    }
2846
2847
    return implode(
2848
        array_map(
2849
            function ($data) use ($keepAsciiChars) {
2850
              return self::single_chr_html_encode($data, $keepAsciiChars);
2851
            },
2852
            self::split($str)
2853
        )
2854
    );
2855
  }
2856
2857
  /**
2858
   * UTF-8 version of html_entity_decode()
2859
   *
2860
   * The reason we are not using html_entity_decode() by itself is because
2861
   * while it is not technically correct to leave out the semicolon
2862
   * at the end of an entity most browsers will still interpret the entity
2863
   * correctly. html_entity_decode() does not convert entities without
2864
   * semicolons, so we are left with our own little solution here. Bummer.
2865
   *
2866
   * Convert all HTML entities to their applicable characters
2867
   *
2868
   * INFO: opposite to UTF8::html_encode()
2869
   *
2870
   * @link http://php.net/manual/en/function.html-entity-decode.php
2871
   *
2872
   * @param string $str      <p>
2873
   *                         The input string.
2874
   *                         </p>
2875
   * @param int    $flags    [optional] <p>
2876
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2877
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2878
   *                         <table>
2879
   *                         Available <i>flags</i> constants
2880
   *                         <tr valign="top">
2881
   *                         <td>Constant Name</td>
2882
   *                         <td>Description</td>
2883
   *                         </tr>
2884
   *                         <tr valign="top">
2885
   *                         <td><b>ENT_COMPAT</b></td>
2886
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2887
   *                         </tr>
2888
   *                         <tr valign="top">
2889
   *                         <td><b>ENT_QUOTES</b></td>
2890
   *                         <td>Will convert both double and single quotes.</td>
2891
   *                         </tr>
2892
   *                         <tr valign="top">
2893
   *                         <td><b>ENT_NOQUOTES</b></td>
2894
   *                         <td>Will leave both double and single quotes unconverted.</td>
2895
   *                         </tr>
2896
   *                         <tr valign="top">
2897
   *                         <td><b>ENT_HTML401</b></td>
2898
   *                         <td>
2899
   *                         Handle code as HTML 4.01.
2900
   *                         </td>
2901
   *                         </tr>
2902
   *                         <tr valign="top">
2903
   *                         <td><b>ENT_XML1</b></td>
2904
   *                         <td>
2905
   *                         Handle code as XML 1.
2906
   *                         </td>
2907
   *                         </tr>
2908
   *                         <tr valign="top">
2909
   *                         <td><b>ENT_XHTML</b></td>
2910
   *                         <td>
2911
   *                         Handle code as XHTML.
2912
   *                         </td>
2913
   *                         </tr>
2914
   *                         <tr valign="top">
2915
   *                         <td><b>ENT_HTML5</b></td>
2916
   *                         <td>
2917
   *                         Handle code as HTML 5.
2918
   *                         </td>
2919
   *                         </tr>
2920
   *                         </table>
2921
   *                         </p>
2922
   * @param string $encoding [optional] <p>
2923
   *                         Encoding to use.
2924
   *                         </p>
2925
   *
2926
   * @return string the decoded string.
2927
   */
2928
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2929
  {
2930
    $str = (string)$str;
2931
2932
    if (!isset($str[0])) {
2933
      return '';
2934
    }
2935
2936
    if (strpos($str, '&') === false) {
2937
      return $str;
2938
    }
2939
2940
    $encoding = self::normalizeEncoding($encoding);
2941
2942
    if ($flags === null) {
2943
      if (Bootup::is_php('5.4') === true) {
2944
        $flags = ENT_COMPAT | ENT_HTML5;
2945
      } else {
2946
        $flags = ENT_COMPAT;
2947
      }
2948
    }
2949
2950 2
    do {
2951
      $str_compare = $str;
2952 2
2953
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'html_entity_decode_callback'), $str);
2954
2955
      // decode numeric & UTF16 two byte entities
2956
      $str = html_entity_decode(
2957
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2958
          $flags,
2959
          $encoding
2960
      );
2961
2962
    } while ($str_compare !== $str);
2963
2964
    return $str;
2965
  }
2966
2967
  /**
2968
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2969
   *
2970
   * @link http://php.net/manual/en/function.htmlentities.php
2971
   *
2972
   * @param string $str           <p>
2973
   *                              The input string.
2974
   *                              </p>
2975
   * @param int    $flags         [optional] <p>
2976
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2977
   *                              invalid code unit sequences and the used document type. The default is
2978
   *                              ENT_COMPAT | ENT_HTML401.
2979
   *                              <table>
2980
   *                              Available <i>flags</i> constants
2981
   *                              <tr valign="top">
2982
   *                              <td>Constant Name</td>
2983
   *                              <td>Description</td>
2984
   *                              </tr>
2985
   *                              <tr valign="top">
2986
   *                              <td><b>ENT_COMPAT</b></td>
2987
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2988
   *                              </tr>
2989
   *                              <tr valign="top">
2990
   *                              <td><b>ENT_QUOTES</b></td>
2991
   *                              <td>Will convert both double and single quotes.</td>
2992
   *                              </tr>
2993
   *                              <tr valign="top">
2994
   *                              <td><b>ENT_NOQUOTES</b></td>
2995
   *                              <td>Will leave both double and single quotes unconverted.</td>
2996
   *                              </tr>
2997
   *                              <tr valign="top">
2998
   *                              <td><b>ENT_IGNORE</b></td>
2999
   *                              <td>
3000
   *                              Silently discard invalid code unit sequences instead of returning
3001
   *                              an empty string. Using this flag is discouraged as it
3002
   *                              may have security implications.
3003
   *                              </td>
3004
   *                              </tr>
3005
   *                              <tr valign="top">
3006
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3007
   *                              <td>
3008
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3009
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3010
   *                              </td>
3011
   *                              </tr>
3012
   *                              <tr valign="top">
3013
   *                              <td><b>ENT_DISALLOWED</b></td>
3014
   *                              <td>
3015
   *                              Replace invalid code points for the given document type with a
3016
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3017
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3018
   *                              instance, to ensure the well-formedness of XML documents with
3019
   *                              embedded external content.
3020
   *                              </td>
3021
   *                              </tr>
3022
   *                              <tr valign="top">
3023
   *                              <td><b>ENT_HTML401</b></td>
3024
   *                              <td>
3025
   *                              Handle code as HTML 4.01.
3026
   *                              </td>
3027
   *                              </tr>
3028
   *                              <tr valign="top">
3029
   *                              <td><b>ENT_XML1</b></td>
3030
   *                              <td>
3031
   *                              Handle code as XML 1.
3032
   *                              </td>
3033
   *                              </tr>
3034
   *                              <tr valign="top">
3035
   *                              <td><b>ENT_XHTML</b></td>
3036
   *                              <td>
3037
   *                              Handle code as XHTML.
3038
   *                              </td>
3039
   *                              </tr>
3040
   *                              <tr valign="top">
3041
   *                              <td><b>ENT_HTML5</b></td>
3042
   *                              <td>
3043
   *                              Handle code as HTML 5.
3044
   *                              </td>
3045
   *                              </tr>
3046
   *                              </table>
3047
   *                              </p>
3048
   * @param string $encoding      [optional] <p>
3049
   *                              Like <b>htmlspecialchars</b>,
3050
   *                              <b>htmlentities</b> takes an optional third argument
3051
   *                              <i>encoding</i> which defines encoding used in
3052
   *                              conversion.
3053
   *                              Although this argument is technically optional, you are highly
3054
   *                              encouraged to specify the correct value for your code.
3055
   *                              </p>
3056
   * @param bool   $double_encode [optional] <p>
3057
   *                              When <i>double_encode</i> is turned off PHP will not
3058
   *                              encode existing html entities. The default is to convert everything.
3059
   *                              </p>
3060
   *
3061
   *
3062 1
   * @return string the encoded string.
3063
   * </p>
3064 1
   * <p>
3065
   * If the input <i>string</i> contains an invalid code unit
3066
   * sequence within the given <i>encoding</i> an empty string
3067
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3068
   * <b>ENT_SUBSTITUTE</b> flags are set.
3069
   */
3070
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3071
  {
3072 1
    $encoding = self::normalizeEncoding($encoding);
3073
3074 1
    $str = htmlentities($str, $flags, $encoding, $double_encode);
3075
3076
    if ($encoding !== 'UTF-8') {
3077
      return $str;
3078
    }
3079
3080
    $byteLengths = UTF8::chr_size_list($str);
3081
    $search = array();
3082
    $replacements = array();
3083
    foreach ($byteLengths as $counter => $byteLength) {
3084
      if ($byteLength >= 3) {
3085
        $char = UTF8::access($str, $counter);
3086
3087
        if (!isset($replacements[$char])) {
3088
          $search[$char] = $char;
3089
          $replacements[$char] = UTF8::html_encode($char);
3090
        }
3091
      }
3092
    }
3093
3094
    return str_replace($search, $replacements, $str);
3095
  }
3096
3097
  /**
3098
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3099
   *
3100
   * INFO: Take a look at "UTF8::htmlentities()"
3101
   *
3102
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3103 1
   *
3104
   * @param string $str           <p>
3105 1
   *                              The string being converted.
3106
   *                              </p>
3107
   * @param int    $flags         [optional] <p>
3108
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3109
   *                              invalid code unit sequences and the used document type. The default is
3110
   *                              ENT_COMPAT | ENT_HTML401.
3111
   *                              <table>
3112
   *                              Available <i>flags</i> constants
3113
   *                              <tr valign="top">
3114
   *                              <td>Constant Name</td>
3115 1
   *                              <td>Description</td>
3116
   *                              </tr>
3117 1
   *                              <tr valign="top">
3118
   *                              <td><b>ENT_COMPAT</b></td>
3119
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3120
   *                              </tr>
3121
   *                              <tr valign="top">
3122
   *                              <td><b>ENT_QUOTES</b></td>
3123
   *                              <td>Will convert both double and single quotes.</td>
3124
   *                              </tr>
3125
   *                              <tr valign="top">
3126
   *                              <td><b>ENT_NOQUOTES</b></td>
3127 1
   *                              <td>Will leave both double and single quotes unconverted.</td>
3128
   *                              </tr>
3129 1
   *                              <tr valign="top">
3130
   *                              <td><b>ENT_IGNORE</b></td>
3131
   *                              <td>
3132
   *                              Silently discard invalid code unit sequences instead of returning
3133
   *                              an empty string. Using this flag is discouraged as it
3134
   *                              may have security implications.
3135
   *                              </td>
3136
   *                              </tr>
3137
   *                              <tr valign="top">
3138
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3139
   *                              <td>
3140
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3141
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3142
   *                              </td>
3143
   *                              </tr>
3144
   *                              <tr valign="top">
3145
   *                              <td><b>ENT_DISALLOWED</b></td>
3146
   *                              <td>
3147
   *                              Replace invalid code points for the given document type with a
3148
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3149
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3150
   *                              instance, to ensure the well-formedness of XML documents with
3151
   *                              embedded external content.
3152
   *                              </td>
3153
   *                              </tr>
3154
   *                              <tr valign="top">
3155
   *                              <td><b>ENT_HTML401</b></td>
3156
   *                              <td>
3157
   *                              Handle code as HTML 4.01.
3158
   *                              </td>
3159
   *                              </tr>
3160
   *                              <tr valign="top">
3161
   *                              <td><b>ENT_XML1</b></td>
3162
   *                              <td>
3163
   *                              Handle code as XML 1.
3164
   *                              </td>
3165
   *                              </tr>
3166
   *                              <tr valign="top">
3167
   *                              <td><b>ENT_XHTML</b></td>
3168
   *                              <td>
3169
   *                              Handle code as XHTML.
3170
   *                              </td>
3171
   *                              </tr>
3172
   *                              <tr valign="top">
3173
   *                              <td><b>ENT_HTML5</b></td>
3174
   *                              <td>
3175
   *                              Handle code as HTML 5.
3176
   *                              </td>
3177
   *                              </tr>
3178
   *                              </table>
3179 16
   *                              </p>
3180
   * @param string $encoding      [optional] <p>
3181 16
   *                              Defines encoding used in conversion.
3182
   *                              </p>
3183
   *                              <p>
3184
   *                              For the purposes of this function, the encodings
3185
   *                              ISO-8859-1, ISO-8859-15,
3186
   *                              UTF-8, cp866,
3187
   *                              cp1251, cp1252, and
3188
   *                              KOI8-R are effectively equivalent, provided the
3189
   *                              <i>string</i> itself is valid for the encoding, as
3190
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3191
   *                              the same positions in all of these encodings.
3192 4
   *                              </p>
3193
   * @param bool   $double_encode [optional] <p>
3194 4
   *                              When <i>double_encode</i> is turned off PHP will not
3195
   *                              encode existing html entities, the default is to convert everything.
3196
   *                              </p>
3197
   *
3198
   * @return string The converted string.
3199
   * </p>
3200
   * <p>
3201
   * If the input <i>string</i> contains an invalid code unit
3202
   * sequence within the given <i>encoding</i> an empty string
3203
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3204 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
3205
   */
3206 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3207
  {
3208 1
    $encoding = self::normalizeEncoding($encoding);
3209 1
3210
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3211
  }
3212 1
3213 1
  /**
3214
   * checks whether iconv is available on the server
3215 1
   *
3216
   * @return   bool True if available, False otherwise
3217
   */
3218
  public static function iconv_loaded()
3219
  {
3220
    return extension_loaded('iconv') ? true : false;
3221
  }
3222
3223
  /**
3224
   * Converts Integer to hexadecimal U+xxxx code point representation.
3225
   *
3226 4
   * INFO: opposite to UTF8::hex_to_int()
3227
   *
3228
   * @param    int    $int The integer to be converted to hexadecimal code point.
3229 4
   * @param    string $pfix
3230
   *
3231
   * @return   string The code point, or empty string on failure.
3232 4
   */
3233
  public static function int_to_hex($int, $pfix = 'U+')
3234 4
  {
3235 4
    if (ctype_digit((string)$int)) {
3236 4
      $hex = dechex((int)$int);
3237 4
3238 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3239
3240 4
      return $pfix . $hex;
3241
    }
3242
3243
    return '';
3244
  }
3245
3246
  /**
3247
   * checks whether intl is available on the server
3248
   *
3249
   * @return   bool True if available, False otherwise
3250
   */
3251
  public static function intl_loaded()
3252
  {
3253
    return extension_loaded('intl') ? true : false;
3254
  }
3255
3256
  /**
3257
   * checks whether intl-char is available on the server
3258
   *
3259
   * @return   bool True if available, False otherwise
3260
   */
3261
  public static function intlChar_loaded()
3262
  {
3263
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3264
  }
3265
3266
  /**
3267
   * alias for "UTF8::is_ascii()"
3268
   *
3269
   * @param string $str
3270
   *
3271
   * @return boolean
3272
   */
3273 2
  public static function isAscii($str)
3274
  {
3275 2
    return self::is_ascii($str);
3276
  }
3277
3278
  /**
3279
   * alias for "UTF8::is_base64()"
3280
   *
3281
   * @param string $str
3282
   *
3283
   * @return bool
3284
   */
3285 2
  public static function isBase64($str)
3286
  {
3287 2
    return self::is_base64($str);
3288 2
  }
3289
3290 2
  /**
3291 2
   * alias for "UTF8::is_binary()"
3292 2
   *
3293 2
   * @param string $str
3294 2
   *
3295 2
   * @return bool
3296 2
   */
3297 2
  public static function isBinary($str)
3298 2
  {
3299 1
    return self::is_binary($str);
3300 1
  }
3301 2
3302 2
  /**
3303 2
   * alias for "UTF8::is_bom()"
3304
   *
3305 2
   * @param string $utf8_chr
3306 2
   *
3307 2
   * @return boolean
3308 2
   */
3309 2
  public static function isBom($utf8_chr)
3310 2
  {
3311 2
    return self::is_bom($utf8_chr);
3312 2
  }
3313 2
3314 1
  /**
3315 1
   * alias for "UTF8::is_json()"
3316 2
   *
3317 2
   * @param string $str
3318 2
   *
3319
   * @return bool
3320 2
   */
3321 1
  public static function isJson($str)
3322 1
  {
3323
    return self::is_json($str);
3324 1
  }
3325
3326
  /**
3327
   * alias for "UTF8::is_html()"
3328 2
   *
3329
   * @param string $str
3330 2
   *
3331
   * @return boolean
3332
   */
3333
  public static function isHtml($str)
3334
  {
3335
    return self::is_html($str);
3336
  }
3337
3338
  /**
3339
   * alias for "UTF8::is_utf8()"
3340 2
   *
3341
   * @param string $str
3342 2
   *
3343 2
   * @return bool
3344
   */
3345 2
  public static function isUtf8($str)
3346 2
  {
3347 2
    return self::is_utf8($str);
3348 2
  }
3349 2
3350 2
  /**
3351 2
   * alias for "UTF8::is_utf16()"
3352 2
   *
3353 2
   * @param string $str
3354
   *
3355
   * @return bool
3356 2
   */
3357 2
  public static function isUtf16($str)
3358 2
  {
3359
    return self::is_utf16($str);
3360 2
  }
3361 2
3362 2
  /**
3363 1
   * alias for "UTF8::is_utf32()"
3364 1
   *
3365 1
   * @param string $str
3366 1
   *
3367 1
   * @return bool
3368 1
   */
3369
  public static function isUtf32($str)
3370
  {
3371 1
    return self::is_utf32($str);
3372 1
  }
3373 1
3374
  /**
3375 2
   * Checks if a string is 7 bit ASCII.
3376
   *
3377
   * @param    string $str The string to check.
3378
   *
3379
   * @return   bool <strong>true</strong> if it is ASCII<br />
3380
   *                <strong>false</strong> otherwise
3381
   */
3382
  public static function is_ascii($str)
3383 2
  {
3384
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3385 2
  }
3386
3387
  /**
3388
   * Returns true if the string is base64 encoded, false otherwise.
3389
   *
3390
   * @param string $str
3391
   *
3392
   * @return bool Whether or not $str is base64 encoded
3393
   */
3394
  public static function is_base64($str)
3395
  {
3396
    $str = (string)$str;
3397 34
3398
    if (!isset($str[0])) {
3399 34
      return false;
3400
    }
3401 34
3402 3
    if (base64_encode(base64_decode($str, true)) === $str) {
3403
      return true;
3404
    } else {
3405 32
      return false;
3406
    }
3407
  }
3408
3409
  /**
3410
   * Check if the input is binary... (is look like a hack).
3411
   *
3412
   * @param string $input
3413
   *
3414
   * @return bool
3415 32
   */
3416
  public static function is_binary($input)
3417 32
  {
3418 32
3419 32
    $testLength = strlen($input);
3420
3421
    if (
3422 32
        preg_match('~^[01]+$~', $input)
3423 32
        ||
3424 32
        substr_count($input, "\x00") > 0
3425
        ||
3426
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3427 32
    ) {
3428
      return true;
3429 30
    } else {
3430 32
      return false;
3431
    }
3432 28
  }
3433 28
3434 28
  /**
3435 28
   * Check if the file is binary.
3436 30
   *
3437
   * @param string $file
3438 13
   *
3439 13
   * @return boolean
3440 13
   */
3441 13
  public static function is_binary_file($file)
3442 23
  {
3443
    try {
3444 6
      $fp = fopen($file, 'r');
3445 6
      $block = fread($fp, 512);
3446 6
      fclose($fp);
3447 6
    } catch (\Exception $e) {
3448 12
      $block = '';
3449
    }
3450
3451
    return self::is_binary($block);
3452
  }
3453
3454
  /**
3455
   * Checks if the given string is an "Byte Order Mark".
3456
   *
3457 3
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3458 3
   *
3459 3
   * @param    string $str The input string.
3460 3
   *
3461 7
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3462
   */
3463 3
  public static function is_bom($str)
3464 3
  {
3465 3
    foreach (self::$bom as $bomString => $bomByteLength) {
3466 3
      if ($str === $bomString) {
3467 3
        return true;
3468
      }
3469
    }
3470
3471 3
    return false;
3472
  }
3473 32
3474
  /**
3475
   * Try to check if "$str" is an json-string.
3476 30
   *
3477
   * @param string $str
3478 28
   *
3479 28
   * @return bool
3480 28
   */
3481 28
  public static function is_json($str)
3482
  {
3483
    $str = (string)$str;
3484
3485
    if (!isset($str[0])) {
3486 28
      return false;
3487
    }
3488
3489
    if (
3490
        is_object(json_decode($str))
3491
        &&
3492 28
        json_last_error() === JSON_ERROR_NONE
3493 28
    ) {
3494 28
      return true;
3495 28
    } else {
3496
      return false;
3497 28
    }
3498
  }
3499 28
3500 28
  /**
3501 5
   * Check if string contains any html-tags <lall>.
3502
   *
3503
   * @param string $str
3504 28
   *
3505 28
   * @return boolean
3506 28
   */
3507 28
  public static function is_html($str)
3508 28
  {
3509
    $str = (string)$str;
3510
3511
    if (!isset($str[0])) {
3512
      return false;
3513 13
    }
3514
3515
    // init
3516 32
    $matches = array();
3517
3518 14
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3519
3520
    if (count($matches) == 0) {
3521
      return false;
3522
    } else {
3523
      return true;
3524
    }
3525
  }
3526
3527
  /**
3528
   * Check if the string is UTF-16.
3529
   *
3530
   * @param string $str
3531
   *
3532
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3533
   */
3534 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3535
  {
3536
    if (self::is_binary($str)) {
3537
      self::checkForSupport();
3538
3539
      $maybeUTF16LE = 0;
3540
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3541
      if ($test !== false && strlen($test) > 1) {
3542
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3543
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3544
        if ($test3 === $test) {
3545
          $strChars = self::count_chars($str);
3546
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3547
            if (in_array($test3char, $strChars, true) === true) {
3548
              $maybeUTF16LE++;
3549
            }
3550
          }
3551
        }
3552
      }
3553
3554
      $maybeUTF16BE = 0;
3555
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3556
      if ($test !== false && strlen($test) > 1) {
3557
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3558 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3559
        if ($test3 === $test) {
3560 2
          $strChars = self::count_chars($str);
3561
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3562 2
            if (in_array($test3char, $strChars, true) === true) {
3563 2
              $maybeUTF16BE++;
3564 2
            }
3565
          }
3566
        }
3567
      }
3568 2
3569
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3570
        if ($maybeUTF16LE > $maybeUTF16BE) {
3571
          return 1;
3572
        } else {
3573
          return 2;
3574
        }
3575
      }
3576
3577
    }
3578
3579
    return false;
3580
  }
3581
3582
  /**
3583
   * Check if the string is UTF-32.
3584
   *
3585
   * @param string $str
3586
   *
3587
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3588
   */
3589 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3590
  {
3591
    if (self::is_binary($str)) {
3592
      self::checkForSupport();
3593
3594
      $maybeUTF32LE = 0;
3595
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3596
      if ($test !== false && strlen($test) > 1) {
3597
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3598
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3599
        if ($test3 === $test) {
3600
          $strChars = self::count_chars($str);
3601
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3602
            if (in_array($test3char, $strChars, true) === true) {
3603
              $maybeUTF32LE++;
3604
            }
3605
          }
3606
        }
3607 1
      }
3608
3609 1
      $maybeUTF32BE = 0;
3610
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3611 1
      if ($test !== false && strlen($test) > 1) {
3612
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3613
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3614 1
        if ($test3 === $test) {
3615
          $strChars = self::count_chars($str);
3616
          foreach (self::count_chars($test3) as $test3char => $test3charEmpty) {
3617 1
            if (in_array($test3char, $strChars, true) === true) {
3618
              $maybeUTF32BE++;
3619
            }
3620
          }
3621
        }
3622
      }
3623
3624
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3625
        if ($maybeUTF32LE > $maybeUTF32BE) {
3626
          return 1;
3627 6
        } else {
3628
          return 2;
3629 6
        }
3630
      }
3631
3632
    }
3633
3634
    return false;
3635
  }
3636
3637
  /**
3638
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3639
   *
3640
   * @see    http://hsivonen.iki.fi/php-utf8/
3641
   *
3642 24
   * @param    string $str The string to be checked.
3643
   *
3644 24
   * @return   bool
3645
   */
3646 24
  public static function is_utf8($str)
3647 2
  {
3648
    $str = (string)$str;
3649
3650 23
    if (!isset($str[0])) {
3651
      return true;
3652 23
    }
3653
3654
    if (self::pcre_utf8_support() !== true) {
3655
3656
      // If even just the first character can be matched, when the /u
3657
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3658
      // invalid, nothing at all will match, even if the string contains
3659
      // some valid sequences
3660
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3661
3662 1
    } else {
3663
3664 1
      $mState = 0; // cached expected number of octets after the current octet
3665
      // until the beginning of the next UTF8 character sequence
3666
      $mUcs4 = 0; // cached Unicode character
3667
      $mBytes = 1; // cached expected number of octets in the current sequence
3668 1
      $len = strlen($str);
3669
3670
      /** @noinspection ForeachInvariantsInspection */
3671
      for ($i = 0; $i < $len; $i++) {
3672
        $in = ord($str[$i]);
3673
        if ($mState === 0) {
3674
          // When mState is zero we expect either a US-ASCII character or a
3675
          // multi-octet sequence.
3676
          if (0 === (0x80 & $in)) {
3677
            // US-ASCII, pass straight through.
3678
            $mBytes = 1;
3679 1 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3680
            // First octet of 2 octet sequence.
3681 1
            $mUcs4 = $in;
3682 1
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3683 1
            $mState = 1;
3684
            $mBytes = 2;
3685 1
          } elseif (0xE0 === (0xF0 & $in)) {
3686
            // First octet of 3 octet sequence.
3687
            $mUcs4 = $in;
3688
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3689
            $mState = 2;
3690
            $mBytes = 3;
3691 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3692
            // First octet of 4 octet sequence.
3693
            $mUcs4 = $in;
3694 2
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3695
            $mState = 3;
3696 2
            $mBytes = 4;
3697
          } elseif (0xF8 === (0xFC & $in)) {
3698 2
            /* First octet of 5 octet sequence.
3699 2
            *
3700 2
            * This is illegal because the encoded codepoint must be either
3701
            * (a) not the shortest form or
3702 2
            * (b) outside the Unicode range of 0-0x10FFFF.
3703
            * Rather than trying to resynchronize, we will carry on until the end
3704
            * of the sequence and let the later error handling code catch it.
3705
            */
3706
            $mUcs4 = $in;
3707
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3708
            $mState = 4;
3709
            $mBytes = 5;
3710 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3711
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3712 1
            $mUcs4 = $in;
3713
            $mUcs4 = ($mUcs4 & 1) << 30;
3714 1
            $mState = 5;
3715
            $mBytes = 6;
3716
          } else {
3717
            /* Current octet is neither in the US-ASCII range nor a legal first
3718 1
             * octet of a multi-octet sequence.
3719
             */
3720
            return false;
3721
          }
3722
        } else {
3723
          // When mState is non-zero, we expect a continuation of the multi-octet
3724
          // sequence
3725
          if (0x80 === (0xC0 & $in)) {
3726
            // Legal continuation.
3727
            $shift = ($mState - 1) * 6;
3728 13
            $tmp = $in;
3729
            $tmp = ($tmp & 0x0000003F) << $shift;
3730 13
            $mUcs4 |= $tmp;
3731
            /**
3732 13
             * End of the multi-octet sequence. mUcs4 now contains the final
3733
             * Unicode code point to be output
3734
             */
3735 13
            if (0 === --$mState) {
3736 13
              /*
3737 13
              * Check for illegal sequences and code points.
3738 13
              */
3739 13
              // From Unicode 3.1, non-shortest form is illegal
3740 13
              if (
3741 13
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3742 13
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3743 13
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3744 13
                  (4 < $mBytes) ||
3745 13
                  // From Unicode 3.2, surrogate characters are illegal.
3746 13
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3747 13
                  // Code points outside the Unicode range are illegal.
3748 13
                  ($mUcs4 > 0x10FFFF)
3749
              ) {
3750 13
                return false;
3751 2
              }
3752
              // initialize UTF8 cache
3753
              $mState = 0;
3754 13
              $mUcs4 = 0;
3755
              $mBytes = 1;
3756
            }
3757
          } else {
3758
            /**
3759
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3760
             * Incomplete multi-octet sequence.
3761
             */
3762
            return false;
3763
          }
3764 2
        }
3765
      }
3766 2
3767 2
      return true;
3768
    }
3769 2
  }
3770 1
3771 1
  /**
3772 1
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3773
   * Decodes a JSON string
3774 2
   *
3775
   * @link http://php.net/manual/en/function.json-decode.php
3776
   *
3777
   * @param string $json    <p>
3778
   *                        The <i>json</i> string being decoded.
3779
   *                        </p>
3780
   *                        <p>
3781
   *                        This function only works with UTF-8 encoded strings.
3782
   *                        </p>
3783
   *                        <p>PHP implements a superset of
3784
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3785
   *                        only supports these values when they are nested inside an array or an object.
3786 8
   *                        </p>
3787
   * @param bool   $assoc   [optional] <p>
3788 8
   *                        When <b>TRUE</b>, returned objects will be converted into
3789 8
   *                        associative arrays.
3790
   *                        </p>
3791 8
   * @param int    $depth   [optional] <p>
3792
   *                        User specified recursion depth.
3793 8
   *                        </p>
3794
   * @param int    $options [optional] <p>
3795 2
   *                        Bitmask of JSON decode options. Currently only
3796
   *                        <b>JSON_BIGINT_AS_STRING</b>
3797 2
   *                        is supported (default is to cast large integers as floats)
3798
   *                        </p>
3799 1
   *
3800 1
   * @return mixed the value encoded in <i>json</i> in appropriate
3801
   * PHP type. Values true, false and
3802 2
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3803 2
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3804
   * <i>json</i> cannot be decoded or if the encoded
3805 8
   * data is deeper than the recursion limit.
3806 8
   */
3807 1
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3808 1
  {
3809
    $json = self::filter($json);
3810 8
3811 8
    if (Bootup::is_php('5.4') === true) {
3812
      $json = json_decode($json, $assoc, $depth, $options);
3813 8
    } else {
3814
      $json = json_decode($json, $assoc, $depth);
3815
    }
3816
3817
    return $json;
3818
  }
3819
3820
  /**
3821
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3822
   * Returns the JSON representation of a value
3823
   *
3824
   * @link http://php.net/manual/en/function.json-encode.php
3825
   *
3826 1
   * @param mixed $value   <p>
3827
   *                       The <i>value</i> being encoded. Can be any type except
3828 1
   *                       a resource.
3829 1
   *                       </p>
3830
   *                       <p>
3831
   *                       All string data must be UTF-8 encoded.
3832
   *                       </p>
3833
   *                       <p>PHP implements a superset of
3834
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3835
   *                       only supports these values when they are nested inside an array or an object.
3836
   *                       </p>
3837
   * @param int   $options [optional] <p>
3838
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3839
   *                       <b>JSON_HEX_TAG</b>,
3840
   *                       <b>JSON_HEX_AMP</b>,
3841
   *                       <b>JSON_HEX_APOS</b>,
3842 1
   *                       <b>JSON_NUMERIC_CHECK</b>,
3843
   *                       <b>JSON_PRETTY_PRINT</b>,
3844 1
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3845
   *                       <b>JSON_FORCE_OBJECT</b>,
3846
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3847
   *                       constants is described on
3848
   *                       the JSON constants page.
3849
   *                       </p>
3850
   * @param int   $depth   [optional] <p>
3851
   *                       Set the maximum depth. Must be greater than zero.
3852
   *                       </p>
3853
   *
3854
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3855 15
   */
3856
  public static function json_encode($value, $options = 0, $depth = 512)
3857 15
  {
3858 2
    $value = self::filter($value);
3859
3860
    if (Bootup::is_php('5.5')) {
3861 14
      $json = json_encode($value, $options, $depth);
3862 14
    } else {
3863
      $json = json_encode($value, $options);
3864 14
    }
3865 2
3866
    return $json;
3867
  }
3868 13
3869 7
  /**
3870
   * Makes string's first char lowercase.
3871
   *
3872 12
   * @param    string $str The input string
3873 8
   *
3874
   * @return   string The resulting string
3875
   */
3876 10
  public static function lcfirst($str)
3877
  {
3878
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
3879
  }
3880
3881
  /**
3882
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3883
   *
3884
   * WARNING: This is much slower then "ltrim()" !!!!
3885
   *
3886
   * @param    string $str   The string to be trimmed
3887
   * @param    string $chars Optional characters to be stripped
3888
   *
3889
   * @return   string The string with unwanted characters stripped from the left
3890
   */
3891 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3892
  {
3893
    $str = (string)$str;
3894
3895
    if (!isset($str[0])) {
3896
      return '';
3897 1
    }
3898
3899
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3900 1
3901
    return preg_replace("/^{$chars}+/u", '', $str);
3902 1
  }
3903
3904 1
  /**
3905 1
   * Returns the UTF-8 character with the maximum code point in the given data.
3906
   *
3907
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3908
   *
3909
   * @return   string The character with the highest code point than others.
3910
   */
3911 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3912 33
  {
3913
    if (is_array($arg)) {
3914
      $arg = implode($arg);
3915 33
    }
3916
3917
    return self::chr(max(self::codepoints($arg)));
3918
  }
3919
3920
  /**
3921
   * Calculates and returns the maximum number of bytes taken by any
3922
   * UTF-8 encoded character in the given string.
3923
   *
3924
   * @param    string $str The original Unicode string.
3925
   *
3926 1
   * @return   int An array of byte lengths of each character.
3927
   */
3928 1
  public static function max_chr_width($str)
3929 1
  {
3930
    $bytes = self::chr_size_list($str);
3931
    if (count($bytes) > 0) {
3932 1
      return (int)max($bytes);
3933
    } else {
3934 1
      return 0;
3935
    }
3936
  }
3937 1
3938
  /**
3939
   * checks whether mbstring is available on the server
3940 1
   *
3941
   * @return   bool True if available, False otherwise
3942
   */
3943
  public static function mbstring_loaded()
3944 1
  {
3945
    $return = extension_loaded('mbstring');
3946 1
3947
    if ($return === true) {
3948
      \mb_internal_encoding('UTF-8');
3949 1
    }
3950
3951
    return $return;
3952 1
  }
3953
3954
  /**
3955
   * Returns the UTF-8 character with the minimum code point in the given data.
3956 1
   *
3957
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3958 1
   *
3959 1
   * @return   string The character with the lowest code point than others.
3960 1
   */
3961 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3962 1
  {
3963
    if (is_array($arg)) {
3964
      $arg = implode($arg);
3965
    }
3966
3967
    return self::chr(min(self::codepoints($arg)));
3968
  }
3969
3970
  /**
3971
   * Normalize the encoding-name input.
3972
   *
3973
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3974
   *
3975 7
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3976
   */
3977 7
  public static function normalizeEncoding($encoding)
3978
  {
3979
    static $staticNormalizeEncodingCache = array();
3980 7
3981 2
    if (!$encoding) {
3982 2
      return $encoding;
3983 7
    }
3984
3985 7
    if ('UTF-8' === $encoding) {
3986
      return $encoding;
3987
    }
3988 3
3989 1
    if (in_array($encoding, self::$iconvEncoding, true)) {
3990 1
      return $encoding;
3991
    }
3992
3993
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3994 3
      return $staticNormalizeEncodingCache[$encoding];
3995 1
    }
3996 1
3997 3
    $encodingOrig = $encoding;
3998
    $encoding = strtoupper($encoding);
3999 7
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4000
4001
    $equivalences = array(
4002 3
        'ISO88591'    => 'ISO-8859-1',
4003 1
        'ISO8859'     => 'ISO-8859-1',
4004 1
        'ISO'         => 'ISO-8859-1',
4005
        'LATIN1'      => 'ISO-8859-1',
4006
        'LATIN'       => 'ISO-8859-1',
4007
        'UTF16'       => 'UTF-16',
4008 3
        'UTF32'       => 'UTF-32',
4009 1
        'UTF8'        => 'UTF-8',
4010 1
        'UTF'         => 'UTF-8',
4011 3
        'UTF7'        => 'UTF-7',
4012
        'WIN1252'     => 'ISO-8859-1',
4013 7
        'WINDOWS1252' => 'ISO-8859-1',
4014
        '8BIT'        => 'CP850',
4015
        'BINARY'      => 'CP850',
4016
    );
4017
4018
    if (!empty($equivalences[$encodingUpperHelper])) {
4019
      $encoding = $equivalences[$encodingUpperHelper];
4020
    }
4021
4022
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
4023
4024 1
    return $encoding;
4025
  }
4026 1
4027 1
  /**
4028 1
   * Normalize MS Word special characters.
4029
   *
4030 1
   * @param string $str The string to be normalized.
4031 1
   *
4032 1
   * @return string
4033 1
   */
4034 1
  public static function normalize_msword($str)
4035
  {
4036 1
    static $utf8MSWordKeys = null;
4037
    static $utf8MSWordValues = null;
4038
4039
    if ($utf8MSWordKeys === null) {
4040
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
4041
      $utf8MSWordValues = array_values(self::$utf8MSWord);
4042
    }
4043
4044
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
4045
  }
4046
4047
  /**
4048
   * Normalize the whitespace.
4049
   *
4050
   * @param string $str                     The string to be normalized.
4051
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
4052 36
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
4053
   *
4054
   * @return string
4055 36
   */
4056
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
4057
  {
4058
    static $whitespaces = array();
4059 36
    static $bidiUniCodeControls = null;
4060 36
4061 36
    $cacheKey = (int)$keepNonBreakingSpace;
4062 36
4063
    if (!isset($whitespaces[$cacheKey])) {
4064 36
4065
      $whitespaces[$cacheKey] = self::$whitespaceTable;
4066
4067 36
      if ($keepNonBreakingSpace === true) {
4068 36
        /** @noinspection OffsetOperationsInspection */
4069
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
4070 36
      }
4071
4072
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
4073
    }
4074
4075
    if ($keepBidiUnicodeControls === false) {
4076
      if ($bidiUniCodeControls === null) {
4077
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
4078
      }
4079
4080
      $str = str_replace($bidiUniCodeControls, '', $str);
4081 36
    }
4082
4083 36
    return str_replace($whitespaces[$cacheKey], ' ', $str);
4084
  }
4085 36
4086 36
  /**
4087 36
   * Format a number with grouped thousands.
4088
   *
4089 36
   * @param float  $number
4090 36
   * @param int    $decimals
4091 36
   * @param string $dec_point
4092
   * @param string $thousands_sep
4093 36
   *
4094
   * @return string
4095
   */
4096
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4097
  {
4098
    $thousands_sep = (string)$thousands_sep;
4099
    $dec_point = (string)$dec_point;
4100
4101
    if (
4102
        isset($thousands_sep[1], $dec_point[1])
4103
        &&
4104
        Bootup::is_php('5.4') === true
4105
    ) {
4106 23
      return str_replace(
4107
          array(
4108 23
              '.',
4109
              ',',
4110 23
          ),
4111 5
          array(
4112
              $dec_point,
4113
              $thousands_sep,
4114 19
          ),
4115
          number_format($number, $decimals, '.', ',')
4116 19
      );
4117
    }
4118
4119
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4120
  }
4121
4122
  /**
4123
   * Calculates Unicode code point of the given UTF-8 encoded character.
4124
   *
4125
   * @param    string $s The character of which to calculate code point.
4126
   *
4127 40
   * @return   int Unicode code point of the given character,<br />
4128
   *           0 on invalid UTF-8 byte sequence.
4129 40
   */
4130
  public static function ord($s)
4131 40
  {
4132
    if (!$s && $s !== '0') {
4133 40
      return 0;
4134 30
    }
4135
4136
    // init
4137 16
    self::checkForSupport();
4138
4139 16
    if (self::$support['intlChar'] === true) {
4140 15
      $tmpReturn = \IntlChar::ord($s);
4141
      if ($tmpReturn) {
4142 15
        return $tmpReturn;
4143 14
      }
4144 15
    }
4145 1
4146 1
    $s = unpack('C*', substr($s, 0, 4));
4147
    $a = $s ? $s[1] : 0;
4148
4149 16
    if (0xF0 <= $a && isset($s[4])) {
4150
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
4151 16
    }
4152
4153 16
    if (0xE0 <= $a && isset($s[3])) {
4154 16
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
4155 16
    }
4156
4157
    if (0xC0 <= $a && isset($s[2])) {
4158
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
4159 16
    }
4160
4161 16
    return $a;
4162
  }
4163
4164
  /**
4165
   * Parses the string into variables.
4166
   *
4167
   * WARNING: This differs from parse_str() by returning the results
4168
   *    instead of placing them in the local scope!
4169
   *
4170
   * @link http://php.net/manual/en/function.parse-str.php
4171
   *
4172
   * @param string $str     <p>
4173
   *                        The input string.
4174
   *                        </p>
4175
   * @param array  $result  <p>
4176
   *                        If the second parameter arr is present,
4177
   *                        variables are stored in this variable as array elements instead.
4178
   *                        </p>
4179
   *
4180
   * @return void
4181 2
   */
4182
  public static function parse_str($str, &$result)
4183 2
  {
4184 1
    // init
4185
    self::checkForSupport();
4186
4187 2
    $str = self::filter($str);
4188
4189
    \mb_parse_str($str, $result);
4190
  }
4191
4192
  /**
4193
   * checks if \u modifier is available that enables Unicode support in PCRE.
4194
   *
4195
   * @return   bool True if support is available, false otherwise
4196
   */
4197
  public static function pcre_utf8_support()
4198
  {
4199 25
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4200
    return (bool)@preg_match('//u', '');
4201 25
  }
4202
4203 25
  /**
4204 5
   * Create an array containing a range of UTF-8 characters.
4205
   *
4206
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4207
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4208 24
   *
4209 24
   * @return   array
4210 24
   */
4211
  public static function range($var1, $var2)
4212 24
  {
4213
    if (!$var1 || !$var2) {
4214 24
      return array();
4215
    }
4216
4217 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4218 24
      $start = (int)$var1;
4219 24
    } elseif (ctype_xdigit($var1)) {
4220 24
      $start = (int)self::hex_to_int($var1);
4221 24
    } else {
4222 24
      $start = self::ord($var1);
4223
    }
4224 24
4225
    if (!$start) {
4226
      return array();
4227
    }
4228
4229 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4230
      $end = (int)$var2;
4231
    } elseif (ctype_xdigit($var2)) {
4232
      $end = (int)self::hex_to_int($var2);
4233
    } else {
4234
      $end = self::ord($var2);
4235
    }
4236
4237
    if (!$end) {
4238
      return array();
4239
    }
4240
4241
    return array_map(
4242
        array(
4243
            '\\voku\\helper\\UTF8',
4244
            'chr',
4245
        ),
4246
        range($start, $end)
4247
    );
4248
  }
4249
4250
  /**
4251
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4252
   *
4253
   * @param string $str
4254
   *
4255
   * @return string
4256 24
   */
4257 5
  public static function removeBOM($str = '')
4258
  {
4259 5
    foreach (self::$bom as $bomString => $bomByteLength) {
4260 5
      if (0 === strpos($str, $bomString)) {
4261
        $str = substr($str, $bomByteLength);
4262 24
      }
4263
    }
4264
4265
    return $str;
4266 24
  }
4267
4268
  /**
4269
   * Removes duplicate occurrences of a string in another string.
4270
   *
4271
   * @param    string       $str  The base string
4272
   * @param    string|array $what String to search for in the base string
4273
   *
4274
   * @return   string The result string with removed duplicates
4275
   */
4276
  public static function remove_duplicates($str, $what = ' ')
4277 3
  {
4278
    if (is_string($what)) {
4279
      $what = array($what);
4280
    }
4281
4282
    if (is_array($what)) {
4283
      foreach ($what as $item) {
4284 3
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4285 2
      }
4286 1
    }
4287 2
4288 1
    return $str;
4289 2
  }
4290
4291 2
  /**
4292
   * Remove Invisible Characters
4293
   *
4294 2
   * This prevents sandwiching null characters
4295
   * between ascii characters, like Java\0script.
4296
   *
4297
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4298
   *
4299
   * @param  string $str
4300 3
   * @param  bool   $url_encoded
4301 1
   * @param  string $replacement
4302
   *
4303
   * @return  string
4304
   */
4305
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4306
  {
4307
    // init
4308
    $non_displayables = array();
4309
4310 3
    // every control character except newline (dec 10),
4311 3
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4312 3
    if ($url_encoded) {
4313 3
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4314 3
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4315 3
    }
4316 3
4317 3
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4318
4319
    do {
4320 3
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4321 3
    } while ($count !== 0);
4322 3
4323 3
    return $str;
4324
  }
4325
4326
  /**
4327
   * replace diamond question mark (�)
4328
   *
4329
   * @param string $str
4330
   * @param string $unknown
4331
   *
4332
   * @return string
4333
   */
4334
  public static function replace_diamond_question_mark($str, $unknown = '?')
4335
  {
4336
    return str_replace(
4337
        array(
4338
            "\xEF\xBF\xBD",
4339
            '�',
4340
        ),
4341
        array(
4342
            $unknown,
4343
            $unknown,
4344
        ),
4345
        $str
4346
    );
4347
  }
4348
4349
  /**
4350
   * Strip whitespace or other characters from end of a UTF-8 string.
4351
   *
4352
   * WARNING: This is much slower then "rtrim()" !!!!
4353 13
   *
4354
   * @param    string $str   The string to be trimmed
4355 13
   * @param    string $chars Optional characters to be stripped
4356
   *
4357
   * @return   string The string with unwanted characters stripped from the right
4358 13
   */
4359 13 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4360 1
  {
4361 1
    $str = (string)$str;
4362 12
4363
    if (!isset($str[0])) {
4364 13
      return '';
4365
    }
4366 13
4367 13
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4368
4369 13
    return preg_replace("/{$chars}+$/u", '', $str);
4370
  }
4371
4372
  /**
4373
   * rxClass
4374
   *
4375
   * @param string $s
4376
   * @param string $class
4377
   *
4378
   * @return string
4379
   */
4380
  protected static function rxClass($s, $class = '')
4381 1
  {
4382
    static $rxClassCache = array();
4383 1
4384
    $cacheKey = $s . $class;
4385
4386
    if (isset($rxClassCache[$cacheKey])) {
4387 1
      return $rxClassCache[$cacheKey];
4388
    }
4389 1
4390
    $class = array($class);
4391
4392
    /** @noinspection SuspiciousLoopInspection */
4393 1
    foreach (self::str_split($s) as $s) {
4394 1
      if ('-' === $s) {
4395
        $class[0] = '-' . $class[0];
4396
      } elseif (!isset($s[2])) {
4397 1
        $class[0] .= preg_quote($s, '/');
4398 1
      } elseif (1 === self::strlen($s)) {
4399 1
        $class[0] .= $s;
4400 1
      } else {
4401
        $class[] = $s;
4402 1
      }
4403
    }
4404
4405 1
    $class[0] = '[' . $class[0] . ']';
4406
4407
    if (1 === count($class)) {
4408 1
      $return = $class[0];
4409
    } else {
4410
      $return = '(?:' . implode('|', $class) . ')';
4411
    }
4412
4413
    $rxClassCache[$cacheKey] = $return;
4414
4415
    return $return;
4416
  }
4417
4418
  /**
4419
   * Echo native UTF8-Support libs, e.g. for debugging.
4420
   */
4421 2
  public static function showSupport()
4422
  {
4423 2
    foreach (self::$support as $utf8Support) {
4424
      echo $utf8Support . "\n<br>";
4425 2
    }
4426 2
  }
4427
4428 2
  /**
4429
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4430
   *
4431 2
   * @param    string $char           The Unicode character to be encoded as numbered entity.
4432 2
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4433 2
   *
4434 2
   * @return   string The HTML numbered entity.
4435 2
   */
4436
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
4437 2
  {
4438 2
    if (!$char) {
4439 2
      return '';
4440 2
    }
4441 2
4442 2
    if (
4443
        $keepAsciiChars === true
4444 2
        &&
4445 2
        self::isAscii($char) === true
4446 2
    ) {
4447 2
      return $char;
4448 2
    }
4449 2
4450
    return '&#' . self::ord($char) . ';';
4451 2
  }
4452
4453
  /**
4454 2
   * Convert a string to an array of Unicode characters.
4455
   *
4456
   * @param    string  $str       The string to split into array.
4457
   * @param    int     $length    Max character length of each array element.
4458
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4459
   *
4460
   * @return   array An array containing chunks of the string.
4461
   */
4462
  public static function split($str, $length = 1, $cleanUtf8 = false)
4463
  {
4464
    $str = (string)$str;
4465
4466
    if (!isset($str[0])) {
4467
      return array();
4468
    }
4469
4470
    // init
4471
    self::checkForSupport();
4472
    $str = (string)$str;
4473
    $ret = array();
4474
4475 1
    if (self::$support['pcre_utf8'] === true) {
4476
4477 1
      if ($cleanUtf8 === true) {
4478
        $str = self::clean($str);
4479 1
      }
4480
4481
      preg_match_all('/./us', $str, $retArray);
4482
      if (isset($retArray[0])) {
4483
        $ret = $retArray[0];
4484
      }
4485
      unset($retArray);
4486
4487
    } else {
4488
4489
      // fallback
4490
4491
      $len = strlen($str);
4492
4493
      /** @noinspection ForeachInvariantsInspection */
4494
      for ($i = 0; $i < $len; $i++) {
4495
        if (($str[$i] & "\x80") === "\x00") {
4496
          $ret[] = $str[$i];
4497
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4498
          if (($str[$i + 1] & "\xC0") === "\x80") {
4499
            $ret[] = $str[$i] . $str[$i + 1];
4500
4501
            $i++;
4502
          }
4503 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4504
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4505
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4506
4507
            $i += 2;
4508
          }
4509
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4510 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4511
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4512 12
4513
            $i += 3;
4514 12
          }
4515
        }
4516
      }
4517
    }
4518
4519
    if ($length > 1) {
4520
      $ret = array_chunk($ret, $length);
4521
4522
      $ret = array_map('implode', $ret);
4523
    }
4524
4525
    if (isset($ret[0]) && $ret[0] === '') {
4526
      return array();
4527
    }
4528
4529
    return $ret;
4530
  }
4531
4532
  /**
4533
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4534
   *
4535
   * @param string $str
4536
   *
4537
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4538
   *                      otherwise it will return false.
4539
   */
4540
  public static function str_detect_encoding($str)
4541
  {
4542 1
4543
    //
4544 1
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4545
    //
4546 1
4547 1
    if (self::is_binary($str)) {
4548 1
      if (self::is_utf16($str) === 1) {
4549
        return 'UTF-16LE';
4550 1
      } elseif (self::is_utf16($str) === 2) {
4551 1
        return 'UTF-16BE';
4552 1
      } elseif (self::is_utf32($str) === 1) {
4553 1
        return 'UTF-32LE';
4554
      } elseif (self::is_utf32($str) === 2) {
4555
        return 'UTF-32BE';
4556 1
      }
4557
    }
4558
4559
    //
4560
    // 2.) simple check for ASCII chars
4561
    //
4562
4563
    if (self::is_ascii($str) === true) {
4564
      return 'ASCII';
4565
    }
4566
4567 17
    //
4568
    // 3.) simple check for UTF-8 chars
4569
    //
4570 17
4571
    if (self::is_utf8($str) === true) {
4572 17
      return 'UTF-8';
4573
    }
4574
4575
    //
4576
    // 4.) check via "\mb_detect_encoding()"
4577
    //
4578 17
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4579 17
4580 17
    $detectOrder = array(
4581 17
        'windows-1251',
4582 17
        'ISO-8859-1',
4583 16
        'ASCII',
4584 16
        'UTF-8',
4585 17
    );
4586
4587
    self::checkForSupport();
4588
4589
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4590 17
    if ($encoding) {
4591 17
      return $encoding;
4592
    }
4593
4594 1
    //
4595 1
    // 5.) check via "iconv()"
4596
    //
4597
4598 1
    $md5 = md5($str);
4599 1
    foreach (self::$iconvEncoding as $encodingTmp) {
4600 1
      # INFO: //IGNORE and //TRANSLIT still throw notice
4601 1
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4602 1
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4603
        return $encodingTmp;
4604 1
      }
4605
    }
4606 1
4607
    return false;
4608
  }
4609
4610
  /**
4611
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4612
   *
4613
   * @link  http://php.net/manual/en/function.str-ireplace.php
4614
   *
4615
   * @param mixed $search  <p>
4616 1
   *                       Every replacement with search array is
4617
   *                       performed on the result of previous replacement.
4618 1
   *                       </p>
4619
   * @param mixed $replace <p>
4620 1
   *                       </p>
4621
   * @param mixed $subject <p>
4622
   *                       If subject is an array, then the search and
4623
   *                       replace is performed with every entry of
4624
   *                       subject, and the return value is an array as
4625 1
   *                       well.
4626 1
   *                       </p>
4627
   * @param int   $count   [optional] <p>
4628
   *                       The number of matched and replaced needles will
4629 1
   *                       be returned in count which is passed by
4630 1
   *                       reference.
4631 1
   *                       </p>
4632
   *
4633 1
   * @return mixed a string or an array of replacements.
4634
   * @since 5.0
4635
   */
4636
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4637
  {
4638
    $search = (array)$search;
4639
4640
    /** @noinspection AlterInForeachInspection */
4641
    foreach ($search as &$s) {
4642
      if ('' === $s .= '') {
4643
        $s = '/^(?<=.)$/';
4644
      } else {
4645
        $s = '/' . preg_quote($s, '/') . '/ui';
4646
      }
4647
    }
4648
4649
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4650
    $count = $replace;
4651
4652
    return $subject;
4653
  }
4654 8
4655
  /**
4656 8
   * Limit the number of characters in a string, but also after the next word.
4657
   *
4658 8
   * @param  string $str
4659
   * @param  int    $length
4660 8
   * @param  string $strAddOn
4661 2
   *
4662
   * @return string
4663
   */
4664 7
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4665
  {
4666 7
    $str = (string)$str;
4667 7
4668 7
    if (!isset($str[0])) {
4669
      return '';
4670 7
    }
4671
4672 7
    $length = (int)$length;
4673 6
4674
    if (self::strlen($str) <= $length) {
4675
      return $str;
4676 4
    }
4677
4678
    if (self::substr($str, $length - 1, 1) === ' ') {
4679 4
      return self::substr($str, 0, $length - 1) . $strAddOn;
4680 4
    }
4681 4
4682
    $str = self::substr($str, 0, $length);
4683 4
    $array = explode(' ', $str);
4684 3
    array_pop($array);
4685
    $new_str = implode(' ', $array);
4686 3
4687 3
    if ($new_str === '') {
4688 3
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
4689
    } else {
4690 3
      $str = $new_str . $strAddOn;
4691 1
    }
4692
4693 1
    return $str;
4694 1
  }
4695 1
4696
  /**
4697 1
   * Pad a UTF-8 string to given length with another string.
4698
   *
4699
   * @param    string $input      The input string
4700
   * @param    int    $pad_length The length of return string
4701
   * @param    string $pad_string String to use for padding the input string
4702
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4703
   *
4704
   * @return   string Returns the padded string
4705
   */
4706
  public static function str_pad($input, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4707
  {
4708
    $input_length = self::strlen($input);
4709
4710
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $input_length)) {
4711
      $ps_length = self::strlen($pad_string);
4712 1
4713 3
      $diff = $pad_length - $input_length;
4714
4715 4
      switch ($pad_type) {
4716 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4717
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4718
          $pre = self::substr($pre, 0, $diff);
4719
          $post = '';
4720 4
          break;
4721
4722
        case STR_PAD_BOTH:
4723
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4724
          $pre = self::substr($pre, 0, (int)$diff / 2);
4725 4
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4726 4
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4727 2
          break;
4728 2
4729
        case STR_PAD_RIGHT:
4730 2 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4731 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4732 1
          $post = self::substr($post, 0, $diff);
4733
          $pre = '';
4734 2
      }
4735
4736 4
      return $pre . $input . $post;
4737 4
    }
4738 4
4739 4
    return $input;
4740 1
  }
4741
4742 7
  /**
4743
   * Repeat a string.
4744 7
   *
4745
   * @param string $input      <p>
4746
   *                           The string to be repeated.
4747
   *                           </p>
4748
   * @param int    $multiplier <p>
4749
   *                           Number of time the input string should be
4750
   *                           repeated.
4751
   *                           </p>
4752
   *                           <p>
4753
   *                           multiplier has to be greater than or equal to 0.
4754
   *                           If the multiplier is set to 0, the function
4755
   *                           will return an empty string.
4756 1
   *                           </p>
4757
   *
4758 1
   * @return string the repeated string.
4759 1
   */
4760 1
  public static function str_repeat($input, $multiplier)
4761 1
  {
4762
    $input = self::filter($input);
4763 1
4764
    return str_repeat($input, $multiplier);
4765
  }
4766
4767 1
  /**
4768
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4769
   *
4770
   * (PHP 4, PHP 5)<br/>
4771
   * Replace all occurrences of the search string with the replacement string
4772
   *
4773
   * @link http://php.net/manual/en/function.str-replace.php
4774
   *
4775
   * @param mixed $search  <p>
4776 1
   *                       The value being searched for, otherwise known as the needle.
4777
   *                       An array may be used to designate multiple needles.
4778
   *                       </p>
4779 1
   * @param mixed $replace <p>
4780
   *                       The replacement value that replaces found search
4781
   *                       values. An array may be used to designate multiple replacements.
4782
   *                       </p>
4783
   * @param mixed $subject <p>
4784
   *                       The string or array being searched and replaced on,
4785
   *                       otherwise known as the haystack.
4786
   *                       </p>
4787
   *                       <p>
4788
   *                       If subject is an array, then the search and
4789
   *                       replace is performed with every entry of
4790 8
   *                       subject, and the return value is an array as
4791
   *                       well.
4792 8
   *                       </p>
4793
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4794
   *
4795
   * @return mixed This function returns a string or an array with the replaced values.
4796
   */
4797
  public static function str_replace($search, $replace, $subject, &$count = null)
4798
  {
4799
    return str_replace($search, $replace, $subject, $count);
4800
  }
4801
4802
  /**
4803
   * Shuffles all the characters in the string.
4804
   *
4805 8
   * @param    string $str The input string
4806
   *
4807 8
   * @return   string The shuffled string.
4808 5
   */
4809 5
  public static function str_shuffle($str)
4810 8
  {
4811
    $array = self::split($str);
4812
4813
    shuffle($array);
4814
4815
    return implode('', $array);
4816
  }
4817
4818
  /**
4819
   * Sort all characters according to code points.
4820
   *
4821
   * @param    string $str    A UTF-8 string.
4822
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4823 5
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4824
   *
4825 5
   * @return   string String of sorted characters
4826
   */
4827
  public static function str_sort($str, $unique = false, $desc = false)
4828
  {
4829 5
    $array = self::codepoints($str);
4830
4831
    if ($unique) {
4832 5
      $array = array_flip(array_flip($array));
4833
    }
4834
4835
    if ($desc) {
4836 5
      arsort($array);
4837 5
    } else {
4838
      asort($array);
4839
    }
4840
4841
    return self::string($array);
4842
  }
4843
4844
  /**
4845
   * Convert a string to an array.
4846
   *
4847
   * @param string $str
4848
   * @param int    $len
4849
   *
4850 2
   * @return array
4851
   */
4852 2
  public static function str_split($str, $len = 1)
4853 2
  {
4854
    // init
4855 2
    self::checkForSupport();
4856 2
    $len = (int)$len;
4857 2
4858
    if ($len < 1) {
4859 2
      return str_split($str, $len);
4860 2
    }
4861
4862
    if (self::$support['intl'] === true) {
4863
      $a = array();
4864
      $p = 0;
4865
      $l = strlen($str);
4866
      while ($p < $l) {
4867
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4868
      }
4869
    } else {
4870 1
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4871
      $a = $a[0];
4872 1
    }
4873
4874
    if ($len === 1) {
4875
      return $a;
4876
    }
4877
4878
    $arrayOutput = array();
4879
    $p = -1;
4880
4881
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4882
    foreach ($a as $l => $a) {
4883
      if ($l % $len) {
4884
        $arrayOutput[$p] .= $a;
4885
      } else {
4886
        $arrayOutput[++$p] = $a;
4887
      }
4888
    }
4889
4890
    return $arrayOutput;
4891
  }
4892
4893
  /**
4894 2
   * Get a binary representation of a specific character.
4895
   *
4896
   * @param   string $str The input character.
4897 2
   *
4898
   * @return  string
4899 2
   */
4900
  public static function str_to_binary($str)
4901
  {
4902
    $str = (string)$str;
4903
4904
    if (!isset($str[0])) {
4905
      return '';
4906
    }
4907
4908
    // init
4909
    $out = null;
4910
    $max = strlen($str);
4911
4912
    /** @noinspection ForeachInvariantsInspection */
4913
    for ($i = 0; $i < $max; ++$i) {
4914
      $out .= vsprintf('%08b', (array)self::ord($str[$i]));
4915
    }
4916
4917
    return $out;
4918
  }
4919
4920
  /**
4921
   * US-ASCII transliterations of Unicode text.
4922
   *
4923
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4924
   * Warning: you should only pass this well formed UTF-8!
4925 8
   * Be aware it works by making a copy of the input string which it appends transliterated
4926
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4927 8
   * requiring up to the same amount again as the input string
4928 8
   *
4929
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4930 8
   *
4931 2
   * @author <[email protected]>
4932
   *
4933
   * @param string $str     UTF-8 string to convert
4934
   * @param string $unknown Character use if character unknown. (default is ?)
4935 7
   *
4936
   * @return string US-ASCII string
4937 7
   */
4938 1
  public static function str_transliterate($str, $unknown = '?')
4939 1
  {
4940 1
    static $UTF8_TO_ASCII;
4941
4942
    $str = (string)$str;
4943 7
4944 1
    if (!isset($str[0])) {
4945 1
      return '';
4946
    }
4947 7
4948
    $str = self::clean($str);
4949
4950
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4951
    $chars = $ar[0];
4952
    foreach ($chars as &$c) {
4953
4954
      $ordC0 = ord($c[0]);
4955
4956
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4957
        continue;
4958
      }
4959 7
4960
      $ordC1 = ord($c[1]);
4961 7
4962 2
      // ASCII - next please
4963
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4964
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4965
      }
4966 5
4967
      if ($ordC0 >= 224) {
4968 5
        $ordC2 = ord($c[2]);
4969
4970
        if ($ordC0 <= 239) {
4971
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
4972
        }
4973
4974
        if ($ordC0 >= 240) {
4975
          $ordC3 = ord($c[3]);
4976
4977
          if ($ordC0 <= 247) {
4978
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
4979
          }
4980
4981
          if ($ordC0 >= 248) {
4982
            $ordC4 = ord($c[4]);
4983
4984 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4985 66
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
4986
            }
4987 66
4988
            if ($ordC0 >= 252) {
4989 66
              $ordC5 = ord($c[5]);
4990 4
4991 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4992
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
4993
              }
4994 65
            }
4995
          }
4996
        }
4997 65
      }
4998
4999
      if ($ordC0 >= 254 && $ordC0 <= 255) {
5000
        $c = $unknown;
5001 65
        continue;
5002
      }
5003
5004
      if (!isset($ord)) {
5005 65
        $c = $unknown;
5006
        continue;
5007
      }
5008
5009
      $bank = $ord >> 8;
5010
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
5011
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
5012
        if (file_exists($bankfile)) {
5013
          /** @noinspection PhpIncludeInspection */
5014
          require $bankfile;
5015
        } else {
5016
          $UTF8_TO_ASCII[$bank] = array();
5017 1
        }
5018
      }
5019 1
5020
      $newchar = $ord & 255;
5021
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
5022
        $c = $UTF8_TO_ASCII[$bank][$newchar];
5023
      } else {
5024
        $c = $unknown;
5025
      }
5026
    }
5027
5028
    return implode('', $chars);
5029
  }
5030
5031 2
  /**
5032
   * Counts number of words in the UTF-8 string.
5033 2
   *
5034
   * @param string $str    The input string.
5035
   * @param int    $format <strong>0</strong> => return a number of words<br />
5036
   *                       <strong>1</strong> => return an array of words
5037
   *                       <strong>2</strong> => return an array of words with word-offset as key
5038
   * @param string $charlist
5039
   *
5040
   * @return array|float The number of words in the string
5041
   */
5042
  public static function str_word_count($str, $format = 0, $charlist = '')
5043
  {
5044
    $charlist = self::rxClass($charlist, '\pL');
5045
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5046
5047
    $len = count($strParts);
5048
5049
    if ($format === 1) {
5050
5051
      $numberOfWords = array();
5052
      for ($i = 1; $i < $len; $i += 2) {
5053
        $numberOfWords[] = $strParts[$i];
5054
      }
5055
5056
    } elseif ($format === 2) {
5057
5058
      self::checkForSupport();
5059
5060
      $numberOfWords = array();
5061
      $offset = self::strlen($strParts[0]);
5062
      for ($i = 1; $i < $len; $i += 2) {
5063
        $numberOfWords[$offset] = $strParts[$i];
5064
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5065
      }
5066
5067
    } else {
5068
5069
      $numberOfWords = ($len - 1) / 2;
5070
5071
    }
5072
5073
    return $numberOfWords;
5074
  }
5075
5076
  /**
5077
   * Case-insensitive string comparison.
5078
   *
5079
   * @param string $str1
5080
   * @param string $str2
5081
   *
5082
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5083
   */
5084
  public static function strcasecmp($str1, $str2)
5085
  {
5086
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5087
  }
5088
5089
  /**
5090
   * String comparison.
5091
   *
5092
   * @param string $str1
5093
   * @param string $str2
5094
   *
5095
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
5096
   *              <strong>> 0</strong> if str1 is greater than str2<br />
5097
   *              <strong>0</strong> if they are equal.
5098
   */
5099
  public static function strcmp($str1, $str2)
5100
  {
5101
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5102
        \Normalizer::normalize($str1, \Normalizer::NFD),
5103 11
        \Normalizer::normalize($str2, \Normalizer::NFD)
5104
    );
5105 11
  }
5106 11
5107
  /**
5108 11
   * Find length of initial segment not matching mask.
5109 2
   *
5110
   * @param string $str
5111
   * @param string $charList
5112
   * @param int    $offset
5113 10
   * @param int    $length
5114 10
   *
5115
   * @return int|null
5116
   */
5117
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5118 10
  {
5119
    if ('' === $charList .= '') {
5120
      return null;
5121
    }
5122 10
5123
    if ($offset || 2147483647 !== $length) {
5124
      $str = (string)self::substr($str, $offset, $length);
5125
    } else {
5126 1
      $str = (string)$str;
5127 1
    }
5128 1
5129
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5130 10
      /** @noinspection OffsetOperationsInspection */
5131
      return self::strlen($length[1]);
5132
    } else {
5133 10
      return self::strlen($str);
5134 1
    }
5135 1
  }
5136
5137 10
  /**
5138
   * Makes a UTF-8 string from code points.
5139
   *
5140
   * @param    array $array Integer or Hexadecimal codepoints
5141
   *
5142
   * @return   string UTF-8 encoded string
5143
   */
5144
  public static function string($array)
5145
  {
5146
    return implode(
5147
        array_map(
5148
            array(
5149
                '\\voku\\helper\\UTF8',
5150
                'chr',
5151
            ),
5152
            $array
5153
        )
5154
    );
5155
  }
5156
5157
  /**
5158
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5159
   *
5160
   * @param    string $str The input string.
5161
   *
5162
   * @return   bool True if the string has BOM at the start, False otherwise.
5163
   */
5164
  public static function string_has_bom($str)
5165
  {
5166
    foreach (self::$bom as $bomString => $bomByteLength) {
5167
      if (0 === strpos($str, $bomString)) {
5168
        return true;
5169
      }
5170
    }
5171
5172
    return false;
5173
  }
5174
5175
  /**
5176
   * Strip HTML and PHP tags from a string.
5177
   *
5178
   * @link http://php.net/manual/en/function.strip-tags.php
5179
   *
5180
   * @param string $str            <p>
5181
   *                               The input string.
5182
   *                               </p>
5183
   * @param string $allowable_tags [optional] <p>
5184
   *                               You can use the optional second parameter to specify tags which should
5185
   *                               not be stripped.
5186 1
   *                               </p>
5187
   *                               <p>
5188 1
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5189
   *                               can not be changed with allowable_tags.
5190 1
   *                               </p>
5191
   *
5192
   * @return string the stripped string.
5193
   */
5194
  public static function strip_tags($str, $allowable_tags = null)
5195
  {
5196
    //clean broken utf8
5197
    $str = self::clean($str);
5198
5199
    return strip_tags($str, $allowable_tags);
5200 4
  }
5201
5202 4
  /**
5203
   * Finds position of first occurrence of a string within another, case insensitive.
5204
   *
5205
   * @link http://php.net/manual/en/function.mb-stripos.php
5206
   *
5207
   * @param string  $haystack  <p>
5208
   *                           The string from which to get the position of the first occurrence
5209
   *                           of needle
5210
   *                           </p>
5211
   * @param string  $needle    <p>
5212
   *                           The string to find in haystack
5213
   *                           </p>
5214
   * @param int     $offset    [optional] <p>
5215
   *                           The position in haystack
5216
   *                           to start searching
5217
   *                           </p>
5218
   * @param string  $encoding
5219
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5220
   *
5221
   * @return int Return the numeric position of the first occurrence of
5222
   * needle in the haystack
5223
   * string, or false if needle is not found.
5224
   */
5225
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5226
  {
5227
    $haystack = (string)$haystack;
5228
    $needle = (string)$needle;
5229
5230
    if (!isset($haystack[0], $needle[0])) {
5231
      return false;
5232
    }
5233 1
5234
    // init
5235 1
    self::checkForSupport();
5236
5237 1
    if ($cleanUtf8 === true) {
5238
      $haystack = self::clean($haystack);
5239
      $needle = self::clean($needle);
5240
    }
5241
5242
    // INFO: this is only a fallback for old versions
5243
    if ($encoding === true || $encoding === false) {
5244
      $encoding = 'UTF-8';
5245
    } else {
5246
      $encoding = self::normalizeEncoding($encoding);
5247
    }
5248
5249 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5250
  }
5251 1
5252
  /**
5253
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5254
   *
5255
   * @param string $str
5256
   * @param string $needle
5257
   * @param bool   $before_needle
5258
   *
5259
   * @return false|string
5260
   */
5261
  public static function stristr($str, $needle, $before_needle = false)
5262
  {
5263
    if ('' === $needle .= '') {
5264
      return false;
5265
    }
5266
5267
    // init
5268
    self::checkForSupport();
5269
5270
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5271
  }
5272
5273
  /**
5274
   * Get the string length, not the byte-length!
5275
   *
5276 10
   * @link     http://php.net/manual/en/function.mb-strlen.php
5277
   *
5278 10
   * @param string  $str       The string being checked for length.
5279 10
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5280
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5281 10
   *
5282 2
   * @return int the number of characters in
5283
   *           string str having character encoding
5284
   *           encoding. A multi-byte character is
5285
   *           counted as 1.
5286 9
   */
5287
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5288 9
  {
5289
    $str = (string)$str;
5290
5291
    if (!isset($str[0])) {
5292 9
      return 0;
5293 9
    }
5294
5295 9
    // INFO: this is only a fallback for old versions
5296
    if ($encoding === true || $encoding === false) {
5297
      $encoding = 'UTF-8';
5298 1
    } else {
5299 1
      $encoding = self::normalizeEncoding($encoding);
5300 1
    }
5301
5302 9
    switch ($encoding) {
5303 9
      case 'ASCII':
5304
      case 'CP850':
5305
        return strlen($str);
5306
    }
5307
5308
    self::checkForSupport();
5309
5310
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5311
      $str = self::clean($str);
5312
    }
5313
5314
    return \mb_strlen($str, $encoding);
5315
  }
5316
5317
  /**
5318
   * Case insensitive string comparisons using a "natural order" algorithm.
5319
   *
5320
   * @param string $str1
5321
   * @param string $str2
5322
   *
5323
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5324
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5325
   *             <strong>0</strong> if they are equal
5326
   */
5327
  public static function strnatcasecmp($str1, $str2)
5328
  {
5329
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5330
  }
5331
5332
  /**
5333
   * String comparisons using a "natural order" algorithm
5334
   *
5335
   * @link  http://php.net/manual/en/function.strnatcmp.php
5336
   *
5337
   * @param string $str1 <p>
5338
   *                     The first string.
5339 6
   *                     </p>
5340
   * @param string $str2 <p>
5341 6
   *                     The second string.
5342
   *                     </p>
5343
   *
5344
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5345 6
   * str1 is less than str2; &gt;
5346
   * 0 if str1 is greater than
5347
   * str2, and 0 if they are equal.
5348
   * @since 4.0
5349
   * @since 5.0
5350
   */
5351
  public static function strnatcmp($str1, $str2)
5352
  {
5353
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5354
  }
5355
5356
  /**
5357
   * Binary safe case-insensitive string comparison of the first n characters
5358
   *
5359
   * @link  http://php.net/manual/en/function.strncasecmp.php
5360
   *
5361
   * @param string $str1 <p>
5362
   *                     The first string.
5363
   *                     </p>
5364
   * @param string $str2 <p>
5365
   *                     The second string.
5366 1
   *                     </p>
5367
   * @param int    $len  <p>
5368 1
   *                     The length of strings to be used in the comparison.
5369
   *                     </p>
5370 1
   *
5371
   * @return int &lt; 0 if <i>str1</i> is less than
5372
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5373
   * greater than <i>str2</i>, and 0 if they are equal.
5374
   * @since 4.0.4
5375
   * @since 5.0
5376
   */
5377
  public static function strncasecmp($str1, $str2, $len)
5378
  {
5379
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5380
  }
5381
5382
  /**
5383 10
   * Binary safe string comparison of the first n characters
5384
   *
5385 10
   * @link  http://php.net/manual/en/function.strncmp.php
5386 10
   *
5387 10
   * @param string $str1 <p>
5388
   *                     The first string.
5389 10
   *                     </p>
5390 1
   * @param string $str2 <p>
5391 1
   *                     The second string.
5392 1
   *                     </p>
5393
   * @param int    $len  <p>
5394 10
   *                     Number of characters to use in the comparison.
5395
   *                     </p>
5396 10
   *
5397
   * @return int &lt; 0 if <i>str1</i> is less than
5398 10
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5399 1
   * is greater than <i>str2</i>, and 0 if they are
5400 1
   * equal.
5401
   * @since 4.0
5402
   * @since 5.0
5403 10
   */
5404 10
  public static function strncmp($str1, $str2, $len)
5405
  {
5406 10
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
5407
  }
5408 10
5409
  /**
5410
   * Search a string for any of a set of characters
5411
   *
5412
   * @link  http://php.net/manual/en/function.strpbrk.php
5413
   *
5414
   * @param string $haystack  <p>
5415
   *                          The string where char_list is looked for.
5416
   *                          </p>
5417
   * @param string $char_list <p>
5418
   *                          This parameter is case sensitive.
5419
   *                          </p>
5420
   *
5421
   * @return string a string starting from the character found, or false if it is
5422
   * not found.
5423
   * @since 5.0
5424 20
   */
5425
  public static function strpbrk($haystack, $char_list)
5426 20
  {
5427
    $haystack = (string)$haystack;
5428 20
    $char_list = (string)$char_list;
5429 5
5430
    if (!isset($haystack[0], $char_list[0])) {
5431
      return false;
5432
    }
5433 18
5434
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5435 18
      return substr($haystack, strpos($haystack, $m[0]));
5436
    } else {
5437
      return false;
5438
    }
5439
  }
5440
5441
  /**
5442
   * Find position of first occurrence of string in a string.
5443
   *
5444
   * @link http://php.net/manual/en/function.mb-strpos.php
5445 3
   *
5446
   * @param string  $haystack     <p>
5447 3
   *                              The string being checked.
5448
   *                              </p>
5449
   * @param string  $needle       <p>
5450
   *                              The position counted from the beginning of haystack.
5451
   *                              </p>
5452
   * @param int     $offset       [optional] <p>
5453
   *                              The search offset. If it is not specified, 0 is used.
5454
   *                              </p>
5455
   * @param string  $encoding
5456
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5457
   *
5458
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5459
   *             If needle is not found it returns false.
5460
   */
5461
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5462 16
  {
5463
    $haystack = (string)$haystack;
5464 16
    $needle = (string)$needle;
5465
5466 16
    if (!isset($haystack[0], $needle[0])) {
5467 4
      return false;
5468
    }
5469
5470
    // init
5471 15
    self::checkForSupport();
5472
    $offset = (int)$offset;
5473 15
5474 15
    // iconv and mbstring do not support integer $needle
5475
5476
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5477
      $needle = self::chr($needle);
5478
    }
5479
5480
    if ($cleanUtf8 === true) {
5481
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5482
      // iconv_strpos is not tolerant to invalid characters
5483
5484
      $needle = self::clean((string)$needle);
5485
      $haystack = self::clean($haystack);
5486
    }
5487
5488 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5489
5490
      // INFO: this is only a fallback for old versions
5491
      if ($encoding === true || $encoding === false) {
5492
        $encoding = 'UTF-8';
5493
      } else {
5494
        $encoding = self::normalizeEncoding($encoding);
5495
      }
5496
5497
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5498
    }
5499
5500
    if (self::$support['iconv'] === true) {
5501
      // ignore invalid negative offset to keep compatility
5502
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5503 1
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5504
    }
5505 1
5506
    if ($offset > 0) {
5507
      $haystack = self::substr($haystack, $offset);
5508
    }
5509
5510 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5511
      $left = substr($haystack, 0, $pos);
5512
5513
      // negative offset not supported in PHP strpos(), ignoring
5514
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5515
    }
5516
5517
    return false;
5518
  }
5519
5520 1
  /**
5521
   * Finds the last occurrence of a character in a string within another.
5522
   *
5523
   * @link http://php.net/manual/en/function.mb-strrchr.php
5524
   *
5525
   * @param string $haystack <p>
5526
   *                         The string from which to get the last occurrence
5527
   *                         of needle
5528
   *                         </p>
5529
   * @param string $needle   <p>
5530 1
   *                         The string to find in haystack
5531
   *                         </p>
5532
   * @param bool   $part     [optional] <p>
5533 1
   *                         Determines which portion of haystack
5534
   *                         this function returns.
5535 1
   *                         If set to true, it returns all of haystack
5536
   *                         from the beginning to the last occurrence of needle.
5537
   *                         If set to false, it returns all of haystack
5538
   *                         from the last occurrence of needle to the end,
5539
   *                         </p>
5540
   * @param string $encoding [optional] <p>
5541
   *                         Character encoding name to use.
5542
   *                         If it is omitted, internal character encoding is used.
5543
   *                         </p>
5544
   *
5545
   * @return string the portion of haystack.
5546
   * or false if needle is not found.
5547
   */
5548 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5549
  {
5550
    self::checkForSupport();
5551
    $encoding = self::normalizeEncoding($encoding);
5552
5553
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5554
  }
5555
5556
  /**
5557
   * Reverses characters order in the string.
5558 39
   *
5559
   * @param    string $str The input string
5560 39
   *
5561
   * @return   string The string with characters in the reverse sequence
5562 39
   */
5563 9
  public static function strrev($str)
5564
  {
5565
    return implode(array_reverse(self::split($str)));
5566
  }
5567 37
5568
  /**
5569 37
   * Finds the last occurrence of a character in a string within another, case insensitive.
5570
   *
5571
   * @link http://php.net/manual/en/function.mb-strrichr.php
5572
   *
5573 1
   * @param string $haystack <p>
5574 1
   *                         The string from which to get the last occurrence
5575
   *                         of needle
5576 37
   *                         </p>
5577 22
   * @param string $needle   <p>
5578 22
   *                         The string to find in haystack
5579 33
   *                         </p>
5580
   * @param bool   $part     [optional] <p>
5581
   *                         Determines which portion of haystack
5582 37
   *                         this function returns.
5583
   *                         If set to true, it returns all of haystack
5584
   *                         from the beginning to the last occurrence of needle.
5585 37
   *                         If set to false, it returns all of haystack
5586 1
   *                         from the last occurrence of needle to the end,
5587 1
   *                         </p>
5588
   * @param string $encoding [optional] <p>
5589 37
   *                         Character encoding name to use.
5590
   *                         If it is omitted, internal character encoding is used.
5591
   *                         </p>
5592
   *
5593
   * @return string the portion of haystack.
5594
   * or false if needle is not found.
5595
   */
5596 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5597
  {
5598
    self::checkForSupport();
5599
    $encoding = self::normalizeEncoding($encoding);
5600
5601
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5602
  }
5603
5604
  /**
5605
   * Find position of last occurrence of a case-insensitive string.
5606
   *
5607
   * @param    string $haystack The string to look in
5608
   * @param    string $needle   The string to look for
5609
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5610
   *
5611
   * @return   int The position of offset
5612
   */
5613
  public static function strripos($haystack, $needle, $offset = 0)
5614
  {
5615
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5616
  }
5617
5618 1
  /**
5619
   * Find position of last occurrence of a string in a string.
5620 1
   *
5621 1
   * @link http://php.net/manual/en/function.mb-strrpos.php
5622
   *
5623 1
   * @param string     $haystack  <p>
5624
   *                              The string being checked, for the last occurrence
5625
   *                              of needle
5626
   *                              </p>
5627
   * @param string|int $needle    <p>
5628
   *                              The string to find in haystack.
5629
   *                              Or a code point as int.
5630
   *                              </p>
5631
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5632
   *                              the string. Negative values will stop searching at an arbitrary point
5633
   *                              prior to the end of the string.
5634
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5635
   *
5636
   * @return int the numeric position of
5637
   * the last occurrence of needle in the
5638
   * haystack string. If
5639
   * needle is not found, it returns false.
5640
   */
5641
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5642
  {
5643
    $haystack = (string)$haystack;
5644
5645
    if (((int)$needle) === $needle && ($needle >= 0)) {
5646
      $needle = self::chr($needle);
5647
    }
5648
5649
    $needle = (string)$needle;
5650
5651
    if (!isset($haystack[0], $needle[0])) {
5652
      return false;
5653
    }
5654
5655
    // init
5656
    self::checkForSupport();
5657
5658
    $needle = (string)$needle;
5659
    $offset = (int)$offset;
5660
5661
    if ($cleanUtf8 === true) {
5662
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5663
5664
      $needle = self::clean($needle);
5665 6
      $haystack = self::clean($haystack);
5666
    }
5667
5668 6
    if (self::$support['mbstring'] === true) {
5669 1
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5670
    }
5671
5672 1
    if (self::$support['iconv'] === true) {
5673 1
      return \grapheme_strrpos($haystack, $needle, $offset);
5674 1
    }
5675 1
5676
    // fallback
5677
5678
    if ($offset > 0) {
5679 1
      $haystack = self::substr($haystack, $offset);
5680 1
    } elseif ($offset < 0) {
5681 1
      $haystack = self::substr($haystack, 0, $offset);
5682 1
    }
5683 1
5684 1 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5685 1
      $left = substr($haystack, 0, $pos);
5686 1
5687
      // negative offset not supported in PHP strpos(), ignoring
5688
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5689
    }
5690 1
5691 1
    return false;
5692 1
  }
5693 1
5694 1
  /**
5695 1
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5696 1
   * mask.
5697 1
   *
5698
   * @param string $str
5699
   * @param string $mask
5700 1
   * @param int    $offset
5701 1
   * @param int    $length
5702 1
   *
5703 1
   * @return int|null
5704
   */
5705
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5706
  {
5707 1
    if ($offset || 2147483647 !== $length) {
5708
      $str = self::substr($str, $offset, $length);
5709 6
    }
5710 1
5711 1
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5712 1
  }
5713 1
5714
  /**
5715 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5716
   *
5717
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5718 6
   *
5719 6
   * @param string $haystack      <p>
5720
   *                              The input string. Must be valid UTF-8.
5721 6
   *                              </p>
5722 4
   * @param string $needle        <p>
5723
   *                              The string to look for. Must be valid UTF-8.
5724 4
   *                              </p>
5725 4
   * @param bool   $before_needle [optional] <p>
5726
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5727 6
   *                              haystack before the first occurrence of the needle (excluding the needle).
5728
   *                              </p>
5729 6
   *
5730
   * @return string the portion of string, or FALSE if needle is not found.
5731
   */
5732
  public static function strstr($haystack, $needle, $before_needle = false)
5733
  {
5734
    self::checkForSupport();
5735
5736
    return \grapheme_strstr($haystack, $needle, $before_needle);
5737
  }
5738
5739
  /**
5740 1
   * Unicode transformation for case-less matching.
5741
   *
5742 1
   * @link http://unicode.org/reports/tr21/tr21-5.html
5743
   *
5744 1
   * @param string $str
5745 1
   * @param bool   $full
5746
   *
5747
   * @return string
5748 1
   */
5749
  public static function strtocasefold($str, $full = true)
5750 1
  {
5751 1
    static $fullCaseFold = null;
5752
    static $commonCaseFoldKeys = null;
5753 1
    static $commonCaseFoldValues = null;
5754
5755 1
    if ($commonCaseFoldKeys === null) {
5756 1
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5757
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5758 1
    }
5759
5760 1
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5761
5762 1
    if ($full) {
5763
5764 1
      if ($fullCaseFold === null) {
5765
        $fullCaseFold = self::getData('caseFolding_full');
5766
      }
5767
5768
      /** @noinspection OffsetOperationsInspection */
5769
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5770
    }
5771
5772
    $str = self::clean($str);
5773
5774
    return self::strtolower($str);
5775 6
  }
5776
5777 6
  /**
5778
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5779
   * Make a string lowercase.
5780
   *
5781
   * @link http://php.net/manual/en/function.mb-strtolower.php
5782
   *
5783
   * @param string $str <p>
5784
   *                    The string being lowercased.
5785
   *                    </p>
5786
   * @param string $encoding
5787
   *
5788
   * @return string str with all alphabetic characters converted to lowercase.
5789
   */
5790
  public static function strtolower($str, $encoding = 'UTF-8')
5791
  {
5792
    $str = (string)$str;
5793
5794
    if (!isset($str[0])) {
5795
      return '';
5796
    }
5797
5798
    // init
5799
    self::checkForSupport();
5800
    $encoding = self::normalizeEncoding($encoding);
5801
5802
    return \mb_strtolower($str, $encoding);
5803
  }
5804
5805
  /**
5806
   * Generic case sensitive transformation for collation matching.
5807
   *
5808
   * @param string $s
5809
   *
5810
   * @return string
5811
   */
5812 7
  protected static function strtonatfold($s)
5813
  {
5814 7
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5815
  }
5816 7
5817
  /**
5818 7
   * Make a string uppercase.
5819 2
   *
5820
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5821
   *
5822 6
   * @param string $str <p>
5823
   *                    The string being uppercased.
5824 6
   *                    </p>
5825 3
   * @param string $encoding
5826
   *
5827 3
   * @return string str with all alphabetic characters converted to uppercase.
5828
   */
5829 3
  public static function strtoupper($str, $encoding = 'UTF-8')
5830
  {
5831
    $str = (string)$str;
5832 3
5833
    if (!isset($str[0])) {
5834 3
      return '';
5835 3
    }
5836
5837
    // init
5838 3
    self::checkForSupport();
5839 3
5840 3
    if (self::$support['mbstring'] === true) {
5841
      $encoding = self::normalizeEncoding($encoding);
5842
5843
      return \mb_strtoupper($str, $encoding);
5844
    } else {
5845
5846
      // fallback
5847
5848
      static $caseTableKeys = null;
5849
      static $caseTableValues = null;
5850
5851
      if ($caseTableKeys === null) {
5852 3
        $caseTable = self::case_table();
5853
        $caseTableKeys = array_keys($caseTable);
5854 1
        $caseTableValues = array_values($caseTable);
5855 1
      }
5856 1
5857
      $str = self::clean($str);
5858 1
5859 1
      return str_replace($caseTableKeys, $caseTableValues, $str);
5860 1
    }
5861 1
  }
5862
5863 1
  /**
5864
   * Translate characters or replace sub-strings.
5865
   *
5866 1
   * @link  http://php.net/manual/en/function.strtr.php
5867
   *
5868
   * @param string       $str  <p>
5869 1
   *                           The string being translated.
5870
   *                           </p>
5871 3
   * @param string|array $from <p>
5872 1
   *                           The string replacing from.
5873 1
   *                           </p>
5874
   * @param string|array $to   <p>
5875 3
   *                           The string being translated to to.
5876 3
   *                           </p>
5877
   *
5878 3
   * @return string This function returns a copy of str,
5879 3
   * translating all occurrences of each character in
5880
   * from to the corresponding character in
5881 6
   * to.
5882
   * @since 4.0
5883
   * @since 5.0
5884
   */
5885
  public static function strtr($str, $from, $to = INF)
5886
  {
5887
    if (INF !== $to) {
5888
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5888 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5889
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5889 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5890
      $countFrom = count($from);
5891
      $countTo = count($to);
5892
5893
      if ($countFrom > $countTo) {
5894
        $from = array_slice($from, 0, $countTo);
5895
      } elseif ($countFrom < $countTo) {
5896
        $to = array_slice($to, 0, $countFrom);
5897
      }
5898
5899
      $from = array_combine($from, $to);
5900
    }
5901
5902
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5885 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5903 2
  }
5904
5905 2
  /**
5906
   * Return the width of a string.
5907
   *
5908
   * @param string $s
5909
   *
5910
   * @return int
5911
   */
5912
  public static function strwidth($s)
5913
  {
5914
    // init
5915
    self::checkForSupport();
5916
5917
    return \mb_strwidth($s, 'UTF-8');
5918
  }
5919
5920
  /**
5921
   * Get part of a string.
5922
   *
5923
   * @link http://php.net/manual/en/function.mb-substr.php
5924
   *
5925
   * @param string  $str       <p>
5926
   *                           The string being checked.
5927
   *                           </p>
5928
   * @param int     $start     <p>
5929 20
   *                           The first position used in str.
5930
   *                           </p>
5931 20
   * @param int     $length    [optional] <p>
5932 2
   *                           The maximum length of the returned string.
5933
   *                           </p>
5934 2
   * @param string  $encoding
5935 2
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5936
   *
5937 2
   * @return string mb_substr returns the portion of
5938
   * str specified by the start and length parameters.
5939
   */
5940 20
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5941
  {
5942 20
    $str = (string)$str;
5943 9
5944
    if (!isset($str[0])) {
5945
      return '';
5946 20
    }
5947
5948 20
    // init
5949
    self::checkForSupport();
5950 20
5951 20
    if ($cleanUtf8 === true) {
5952
      // iconv and mbstring are not tolerant to invalid encoding
5953 20
      // further, their behaviour is inconsistent with that of PHP's substr
5954 20
5955 20
      $str = self::clean($str);
5956 20
    }
5957
5958 20
    if ($length === null) {
5959
      $length = (int)self::strlen($str);
5960 18
    } else {
5961 17
      $length = (int)$length;
5962 17
    }
5963 17
5964 5 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5965 5
5966 5
      // INFO: this is only a fallback for old versions
5967
      if ($encoding === true || $encoding === false) {
5968
        $encoding = 'UTF-8';
5969 20
      } else {
5970
        $encoding = self::normalizeEncoding($encoding);
5971 18
      }
5972 14
5973 14
      return \mb_substr($str, $start, $length, $encoding);
5974 14
    }
5975 8
5976 8
    if (self::$support['iconv'] === true) {
5977 8
      return (string)\grapheme_substr($str, $start, $length);
5978
    }
5979
5980 19
    // fallback
5981
5982 9
    // split to array, and remove invalid characters
5983 3
    $array = self::split($str);
5984 3
5985 3
    // extract relevant part, and join to make sting again
5986 6
    return implode(array_slice($array, $start, $length));
5987 6
  }
5988 6
5989
  /**
5990
   * Binary safe comparison of two strings from an offset, up to length characters.
5991 9
   *
5992 6
   * @param string  $main_str           The main string being compared.
5993 6
   * @param string  $str                The secondary string being compared.
5994 6
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
5995
   *                                    end of the string.
5996
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
5997 20
   *                                    the str compared to the length of main_str less the offset.
5998
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
5999 2
   *
6000 2
   * @return int
6001
   */
6002
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6003 2
  {
6004 2
    $main_str = self::substr($main_str, $offset, $length);
6005 2
    $str = self::substr($str, 0, self::strlen($main_str));
6006
6007
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
6008 2
  }
6009 18
6010
  /**
6011 20
   * Count the number of substring occurrences
6012
   *
6013 20
   * @link  http://php.net/manual/en/function.substr-count.php
6014
   *
6015
   * @param string $haystack <p>
6016 20
   *                         The string to search in
6017 20
   *                         </p>
6018
   * @param string $needle   <p>
6019 3
   *                         The substring to search for
6020 20
   *                         </p>
6021
   * @param int    $offset   [optional] <p>
6022 20
   *                         The offset where to start counting
6023
   *                         </p>
6024
   * @param int    $length   [optional] <p>
6025 20
   *                         The maximum length after the specified offset to search for the
6026 20
   *                         substring. It outputs a warning if the offset plus the length is
6027 20
   *                         greater than the haystack length.
6028 2
   *                         </p>
6029 20
   *
6030
   * @return int This functions returns an integer.
6031 20
   * @since 4.0
6032
   * @since 5.0
6033 20
   */
6034
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
6035
  {
6036
    $haystack = (string)$haystack;
6037
    $needle = (string)$needle;
6038
6039
    if (!isset($haystack[0], $needle[0])) {
6040
      return 0;
6041
    }
6042
6043 2
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6044
      $offset = (int)$offset;
6045 2
      $length = (int)$length;
6046
6047 1
      $haystack = self::substr($haystack, $offset, $length);
6048
    }
6049 1
6050 1
    self::checkForSupport();
6051
6052 1
    return \mb_substr_count($haystack, $needle);
6053 2
  }
6054 2
6055
  /**
6056
   * Replace text within a portion of a string.
6057
   *
6058
   * source: https://gist.github.com/stemar/8287074
6059
   *
6060
   * @param string|array   $str
6061
   * @param string|array   $replacement
6062
   * @param int|array      $start
6063
   * @param null|int|array $length
6064
   *
6065
   * @return array|string
6066
   */
6067
  public static function substr_replace($str, $replacement, $start, $length = null)
6068
  {
6069
    if (is_array($str)) {
6070
      $num = count($str);
6071
6072
      // $replacement
6073 26
      if (is_array($replacement)) {
6074
        $replacement = array_slice($replacement, 0, $num);
6075 26
      } else {
6076
        $replacement = array_pad(array($replacement), $num, $replacement);
6077 26
      }
6078 5
6079
      // $start
6080 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6081
        $start = array_slice($start, 0, $num);
6082 22
        foreach ($start as &$valueTmp) {
6083 6
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6084
        }
6085
        unset($valueTmp);
6086 16
      } else {
6087
        $start = array_pad(array($start), $num, $start);
6088
      }
6089
6090
      // $length
6091
      if (!isset($length)) {
6092
        $length = array_fill(0, $num, 0);
6093 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6094
        $length = array_slice($length, 0, $num);
6095
        foreach ($length as &$valueTmpV2) {
6096 14
          if (isset($valueTmpV2)) {
6097
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6098 14
          } else {
6099
            $valueTmpV2 = 0;
6100
          }
6101
        }
6102
        unset($valueTmpV2);
6103
      } else {
6104
        $length = array_pad(array($length), $num, $length);
6105
      }
6106
6107
      // Recursive call
6108
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6109
    } else {
6110
      if (is_array($replacement)) {
6111
        if (count($replacement) > 0) {
6112
          $replacement = $replacement[0];
6113
        } else {
6114
          $replacement = '';
6115
        }
6116
      }
6117
    }
6118
6119
    preg_match_all('/./us', (string)$str, $smatches);
6120
    preg_match_all('/./us', (string)$replacement, $rmatches);
6121 8
6122
    if ($length === null) {
6123 8
      self::checkForSupport();
6124 2
6125
      $length = \mb_strlen($str);
6126
    }
6127
6128 7
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6129 7
6130
    return implode($smatches[0], null);
6131 7
  }
6132 1
6133 1
  /**
6134 7
   * Returns a case swapped version of the string.
6135
   *
6136
   * @param string $str
6137 7
   * @param string $encoding
6138
   *
6139 7
   * @return string each character's case swapped
6140
   */
6141
  public static function swapCase($str, $encoding = 'UTF-8')
6142
  {
6143 1
    $str = (string)$str;
6144 1
6145 1
    if (!isset($str[0])) {
6146 7
      return '';
6147 7
    }
6148 7
6149 7
    $encoding = self::normalizeEncoding($encoding);
6150 7
    $str = self::clean($str);
6151
6152 7
    $strSwappedCase = preg_replace_callback(
6153
        '/[\S]/u',
6154
        function ($match) use ($encoding) {
6155
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6156
6157
          if ($match[0] === $marchToUpper) {
6158
            return UTF8::strtolower($match[0], $encoding);
6159
          } else {
6160
            return $marchToUpper;
6161
          }
6162
        },
6163
        $str
6164
    );
6165
6166
    return $strSwappedCase;
6167
  }
6168
6169
  /**
6170
   * alias for "UTF8::to_ascii()"
6171
   *
6172 1
   * @param string $s The input string e.g. a UTF-8 String
6173
   * @param string $subst_chr
6174 1
   *
6175
   * @return string
6176 1
   */
6177 1
  public static function toAscii($s, $subst_chr = '?')
6178
  {
6179
    return self::to_ascii($s, $subst_chr);
6180 1
  }
6181
6182 1
  /**
6183
   * alias for "UTF8::to_latin1()"
6184 1
   *
6185 1
   * @param $str
6186 1
   *
6187 1
   * @return string
6188
   */
6189 1
  public static function toLatin1($str)
6190 1
  {
6191 1
    return self::to_latin1($str);
6192
  }
6193 1
6194
  /**
6195
   * alias for "UTF8::to_utf8"
6196
   *
6197
   * @param string $str
6198
   *
6199
   * @return string
6200
   */
6201
  public static function toUTF8($str)
6202
  {
6203
    return self::to_utf8($str);
6204
  }
6205
6206
  /**
6207
   * convert to ASCII
6208
   *
6209
   * @param string $s The input string e.g. a UTF-8 String
6210
   * @param string $subst_chr
6211
   *
6212
   * @return string
6213
   */
6214
  public static function to_ascii($s, $subst_chr = '?')
6215
  {
6216
    static $translitExtra = null;
6217
6218
    $s = (string)$s;
6219
6220
    if (!isset($s[0])) {
6221
      return '';
6222
    }
6223
6224
    $s = self::clean($s);
6225
6226
    if (preg_match("/[\x80-\xFF]/", $s)) {
6227
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6228
6229
      $glibc = 'glibc' === ICONV_IMPL;
6230
6231
      preg_match_all('/./u', $s, $s);
6232
6233
      /** @noinspection AlterInForeachInspection */
6234
      foreach ($s[0] as &$c) {
6235
6236
        if (!isset($c[1])) {
6237
          continue;
6238
        }
6239
6240
        if ($glibc) {
6241
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6242
        } else {
6243
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6244
6245
          if ($t !== false && is_string($t)) {
6246
            if (!isset($t[0])) {
6247
              $t = '?';
6248
            } elseif (isset($t[1])) {
6249
              $t = ltrim($t, '\'`"^~');
6250
            }
6251
          }
6252
        }
6253
6254
        if ('?' === $t) {
6255
6256
          if ($translitExtra === null) {
6257
            $translitExtra = (array)self::getData('translit_extra');
6258
          }
6259
6260
          if (isset($translitExtra[$c])) {
6261
            $t = $translitExtra[$c];
6262
          } else {
6263
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6264
6265
            if ($t[0] < "\x80") {
6266
              $t = $t[0];
6267
            } else {
6268
              $t = $subst_chr;
6269
            }
6270
          }
6271
        }
6272
6273
        if ('?' === $t) {
6274
          $t = self::str_transliterate($c, $subst_chr);
6275
        }
6276
6277
        $c = $t;
6278
      }
6279
6280
      $s = implode('', $s[0]);
6281
    }
6282
6283
    return $s;
6284
  }
6285
6286
  /**
6287
   * alias for "UTF8::to_win1252()"
6288
   *
6289
   * @param   string $str
6290
   *
6291
   * @return  array|string
6292
   */
6293
  public static function to_iso8859($str)
6294
  {
6295
    return self::to_win1252($str);
6296
  }
6297
6298
  /**
6299
   * alias for "UTF8::to_win1252()"
6300
   *
6301
   * @param string|array $str
6302
   *
6303
   * @return string|array
6304
   */
6305
  public static function to_latin1($str)
6306
  {
6307
    return self::to_win1252($str);
6308
  }
6309
6310
  /**
6311
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6312
   *
6313
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6314
   *
6315
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6316
   *
6317
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6318
   *    are followed by any of these:  ("group B")
6319
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6320
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6321
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6322
   * is also a valid unicode character, and will be left unchanged.
6323
   *
6324
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6325
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6326
   *
6327
   * @param string|array $str Any string or array.
6328
   *
6329
   * @return string The same string, but UTF8 encoded.
6330
   */
6331
  public static function to_utf8($str)
6332
  {
6333
    if (is_array($str)) {
6334
      foreach ($str as $k => $v) {
6335
        /** @noinspection AlterInForeachInspection */
6336
        $str[$k] = self::to_utf8($v);
6337
      }
6338
6339
      return $str;
6340
    }
6341
6342
    $str = (string)$str;
6343
6344
    if (!isset($str[0])) {
6345
      return $str;
6346
    }
6347
6348
    $max = strlen($str);
6349
    $buf = '';
6350
6351
    /** @noinspection ForeachInvariantsInspection */
6352
    for ($i = 0; $i < $max; $i++) {
6353
      $c1 = $str[$i];
6354
6355
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6356
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6357
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6358
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6359
6360
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6361
6362
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6363
            $buf .= $c1 . $c2;
6364
            $i++;
6365
          } else { // not valid UTF8 - convert it
6366
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6367
            $cc2 = ($c1 & "\x3f") | "\x80";
6368
            $buf .= $cc1 . $cc2;
6369
          }
6370
6371 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6372
6373
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6374
            $buf .= $c1 . $c2 . $c3;
6375
            $i += 2;
6376
          } else { // not valid UTF8 - convert it
6377
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6378
            $cc2 = ($c1 & "\x3f") | "\x80";
6379
            $buf .= $cc1 . $cc2;
6380
          }
6381
6382
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6383
6384 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6385
            $buf .= $c1 . $c2 . $c3 . $c4;
6386
            $i += 3;
6387
          } else { // not valid UTF8 - convert it
6388
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6389
            $cc2 = ($c1 & "\x3f") | "\x80";
6390
            $buf .= $cc1 . $cc2;
6391
          }
6392
6393
        } else { // doesn't look like UTF8, but should be converted
6394
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6395
          $cc2 = (($c1 & "\x3f") | "\x80");
6396
          $buf .= $cc1 . $cc2;
6397
        }
6398
6399
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6400
6401
        $ordC1 = ord($c1);
6402
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6403
          $buf .= self::$win1252ToUtf8[$ordC1];
6404
        } else {
6405
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6406
          $cc2 = (($c1 & "\x3f") | "\x80");
6407
          $buf .= $cc1 . $cc2;
6408
        }
6409
6410
      } else { // it doesn't need conversion
6411
        $buf .= $c1;
6412
      }
6413
    }
6414
6415
    self::checkForSupport();
6416
6417
    // decode unicode escape sequences
6418
    $buf = preg_replace_callback(
6419
        '/\\\\u([0-9a-f]{4})/i',
6420
        function ($match) {
6421
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6422
        },
6423
        $buf
6424
    );
6425
6426
    // decode UTF-8 codepoints
6427
    $buf = preg_replace_callback(
6428
        '/&#\d{2,4};/',
6429
        function ($match) {
6430
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6431
        },
6432
        $buf
6433
    );
6434
6435
    return $buf;
6436
  }
6437
6438
  /**
6439
   * Convert a string into "win1252"-encoding.
6440 6
   *
6441
   * @param  string|array $str
6442 6
   *
6443 6
   * @return string|array
6444
   */
6445 6
  protected static function to_win1252($str)
6446
  {
6447 6
    if (is_array($str)) {
6448 5
6449
      foreach ($str as $k => $v) {
6450
        /** @noinspection AlterInForeachInspection */
6451
        $str[$k] = self::to_win1252($v);
6452 6
      }
6453
6454 6
      return $str;
6455
    }
6456 6
6457 1
    $str = (string)$str;
6458 1
6459 1
    if (!isset($str[0])) {
6460
      return '';
6461 6
    }
6462
6463
    return self::utf8_decode($str);
6464
  }
6465
6466
  /**
6467
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6468
   *
6469
   * INFO: This is slower then "trim()"
6470
   *
6471 6
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6472
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6473 6
   *
6474
   * @param    string $str   The string to be trimmed
6475 6
   * @param    string $chars Optional characters to be stripped
6476 6
   *
6477
   * @return   string The trimmed string
6478
   */
6479 5
  public static function trim($str = '', $chars = INF)
6480 5
  {
6481
    $str = (string)$str;
6482 5
6483 1
    if (!isset($str[0])) {
6484 1
      return '';
6485 1
    }
6486
6487 5
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6488
    if ($chars === INF || !$chars) {
6489
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6490
    }
6491
6492
    return self::rtrim(self::ltrim($str, $chars), $chars);
6493
  }
6494
6495
  /**
6496
   * Makes string's first char uppercase.
6497
   *
6498
   * @param    string $str The input string
6499
   *
6500
   * @return   string The resulting string
6501
   */
6502
  public static function ucfirst($str)
6503
  {
6504
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
6505
  }
6506
6507
  /**
6508
   * alias for "UTF8::ucfirst"
6509
   *
6510
   * @param $str
6511
   *
6512
   * @return string
6513
   */
6514
  public static function ucword($str)
6515
  {
6516
    return self::ucfirst($str);
6517
  }
6518
6519 1
  /**
6520
   * Uppercase for all words in the string.
6521 1
   *
6522
   * @param  string $str
6523
   * @param array   $exceptions
6524
   *
6525
   * @return string
6526
   */
6527
  public static function ucwords($str, $exceptions = array())
6528
  {
6529
    if (!$str) {
6530
      return '';
6531
    }
6532
6533 1
    // init
6534
    $words = explode(' ', $str);
6535 1
    $newwords = array();
6536
6537
    if (count($exceptions) > 0) {
6538
      $useExceptions = true;
6539 1
    } else {
6540
      $useExceptions = false;
6541 1
    }
6542
6543
    foreach ($words as $word) {
6544 1
      if (
6545 1
          ($useExceptions === false)
6546 1
          ||
6547 1
          (
6548 1
              $useExceptions === true
6549
              &&
6550
              !in_array($word, $exceptions, true)
6551 1
          )
6552
      ) {
6553
        $word = self::ucfirst($word);
6554
      }
6555
      $newwords[] = $word;
6556
    }
6557
6558
    return self::ucfirst(implode(' ', $newwords));
6559
  }
6560
6561
  /**
6562
   * Multi decode html entity & fix urlencoded-win1252-chars.
6563
   *
6564 4
   * e.g:
6565
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6566 4
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6567
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6568
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6569
   * 'Düsseldorf'                   => 'Düsseldorf'
6570 4
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6571 4
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6572 4
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6573
   *
6574 4
   * @param string $str
6575 4
   *
6576 4
   * @return string
6577 4
   */
6578
  public static function urldecode($str)
6579 4
  {
6580
    $str = (string)$str;
6581
6582
    if (!isset($str[0])) {
6583
      return '';
6584 4
    }
6585
6586 4
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6587
6588
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6589
6590
    $str = self::fix_simple_utf8(
6591 4
        rawurldecode(
6592 4
            self::html_entity_decode(
6593
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6594 4
                $flags
6595 4
            )
6596 4
        )
6597 4
    );
6598 4
6599
    return (string)$str;
6600 4
  }
6601 4
6602 4
  /**
6603 4
   * Return a array with "urlencoded"-win1252 -> UTF-8
6604
   *
6605 4
   * @return mixed
6606 3
   */
6607 3
  public static function urldecode_fix_win1252_chars()
6608 3
  {
6609 3
    static $array = array(
6610
        '%20' => ' ',
6611 3
        '%21' => '!',
6612
        '%22' => '"',
6613
        '%23' => '#',
6614
        '%24' => '$',
6615 3
        '%25' => '%',
6616 3
        '%26' => '&',
6617
        '%27' => "'",
6618 4
        '%28' => '(',
6619
        '%29' => ')',
6620
        '%2A' => '*',
6621
        '%2B' => '+',
6622
        '%2C' => ',',
6623
        '%2D' => '-',
6624
        '%2E' => '.',
6625
        '%2F' => '/',
6626
        '%30' => '0',
6627
        '%31' => '1',
6628
        '%32' => '2',
6629
        '%33' => '3',
6630
        '%34' => '4',
6631
        '%35' => '5',
6632
        '%36' => '6',
6633
        '%37' => '7',
6634
        '%38' => '8',
6635
        '%39' => '9',
6636
        '%3A' => ':',
6637
        '%3B' => ';',
6638
        '%3C' => '<',
6639
        '%3D' => '=',
6640
        '%3E' => '>',
6641
        '%3F' => '?',
6642
        '%40' => '@',
6643
        '%41' => 'A',
6644
        '%42' => 'B',
6645
        '%43' => 'C',
6646
        '%44' => 'D',
6647
        '%45' => 'E',
6648
        '%46' => 'F',
6649
        '%47' => 'G',
6650
        '%48' => 'H',
6651
        '%49' => 'I',
6652
        '%4A' => 'J',
6653
        '%4B' => 'K',
6654
        '%4C' => 'L',
6655
        '%4D' => 'M',
6656
        '%4E' => 'N',
6657
        '%4F' => 'O',
6658
        '%50' => 'P',
6659
        '%51' => 'Q',
6660
        '%52' => 'R',
6661
        '%53' => 'S',
6662
        '%54' => 'T',
6663
        '%55' => 'U',
6664
        '%56' => 'V',
6665
        '%57' => 'W',
6666
        '%58' => 'X',
6667
        '%59' => 'Y',
6668
        '%5A' => 'Z',
6669
        '%5B' => '[',
6670
        '%5C' => '\\',
6671
        '%5D' => ']',
6672
        '%5E' => '^',
6673
        '%5F' => '_',
6674
        '%60' => '`',
6675
        '%61' => 'a',
6676
        '%62' => 'b',
6677
        '%63' => 'c',
6678
        '%64' => 'd',
6679
        '%65' => 'e',
6680
        '%66' => 'f',
6681
        '%67' => 'g',
6682
        '%68' => 'h',
6683
        '%69' => 'i',
6684
        '%6A' => 'j',
6685
        '%6B' => 'k',
6686
        '%6C' => 'l',
6687
        '%6D' => 'm',
6688
        '%6E' => 'n',
6689
        '%6F' => 'o',
6690
        '%70' => 'p',
6691
        '%71' => 'q',
6692
        '%72' => 'r',
6693
        '%73' => 's',
6694
        '%74' => 't',
6695
        '%75' => 'u',
6696
        '%76' => 'v',
6697
        '%77' => 'w',
6698
        '%78' => 'x',
6699
        '%79' => 'y',
6700
        '%7A' => 'z',
6701
        '%7B' => '{',
6702
        '%7C' => '|',
6703
        '%7D' => '}',
6704
        '%7E' => '~',
6705
        '%7F' => '',
6706
        '%80' => '`',
6707
        '%81' => '',
6708
        '%82' => '‚',
6709
        '%83' => 'ƒ',
6710
        '%84' => '„',
6711
        '%85' => '…',
6712
        '%86' => '†',
6713
        '%87' => '‡',
6714
        '%88' => 'ˆ',
6715
        '%89' => '‰',
6716
        '%8A' => 'Š',
6717
        '%8B' => '‹',
6718
        '%8C' => 'Œ',
6719
        '%8D' => '',
6720
        '%8E' => 'Ž',
6721
        '%8F' => '',
6722
        '%90' => '',
6723
        '%91' => '‘',
6724
        '%92' => '’',
6725
        '%93' => '“',
6726
        '%94' => '”',
6727
        '%95' => '•',
6728
        '%96' => '–',
6729
        '%97' => '—',
6730
        '%98' => '˜',
6731
        '%99' => '™',
6732
        '%9A' => 'š',
6733
        '%9B' => '›',
6734
        '%9C' => 'œ',
6735
        '%9D' => '',
6736
        '%9E' => 'ž',
6737
        '%9F' => 'Ÿ',
6738
        '%A0' => '',
6739
        '%A1' => '¡',
6740
        '%A2' => '¢',
6741
        '%A3' => '£',
6742
        '%A4' => '¤',
6743
        '%A5' => '¥',
6744
        '%A6' => '¦',
6745
        '%A7' => '§',
6746
        '%A8' => '¨',
6747
        '%A9' => '©',
6748
        '%AA' => 'ª',
6749
        '%AB' => '«',
6750
        '%AC' => '¬',
6751
        '%AD' => '',
6752
        '%AE' => '®',
6753
        '%AF' => '¯',
6754
        '%B0' => '°',
6755
        '%B1' => '±',
6756
        '%B2' => '²',
6757
        '%B3' => '³',
6758
        '%B4' => '´',
6759
        '%B5' => 'µ',
6760
        '%B6' => '¶',
6761
        '%B7' => '·',
6762
        '%B8' => '¸',
6763
        '%B9' => '¹',
6764
        '%BA' => 'º',
6765
        '%BB' => '»',
6766
        '%BC' => '¼',
6767
        '%BD' => '½',
6768
        '%BE' => '¾',
6769
        '%BF' => '¿',
6770
        '%C0' => 'À',
6771
        '%C1' => 'Á',
6772
        '%C2' => 'Â',
6773
        '%C3' => 'Ã',
6774
        '%C4' => 'Ä',
6775
        '%C5' => 'Å',
6776
        '%C6' => 'Æ',
6777
        '%C7' => 'Ç',
6778
        '%C8' => 'È',
6779
        '%C9' => 'É',
6780
        '%CA' => 'Ê',
6781
        '%CB' => 'Ë',
6782
        '%CC' => 'Ì',
6783
        '%CD' => 'Í',
6784
        '%CE' => 'Î',
6785
        '%CF' => 'Ï',
6786
        '%D0' => 'Ð',
6787
        '%D1' => 'Ñ',
6788
        '%D2' => 'Ò',
6789
        '%D3' => 'Ó',
6790
        '%D4' => 'Ô',
6791
        '%D5' => 'Õ',
6792
        '%D6' => 'Ö',
6793
        '%D7' => '×',
6794
        '%D8' => 'Ø',
6795
        '%D9' => 'Ù',
6796
        '%DA' => 'Ú',
6797
        '%DB' => 'Û',
6798
        '%DC' => 'Ü',
6799
        '%DD' => 'Ý',
6800
        '%DE' => 'Þ',
6801
        '%DF' => 'ß',
6802
        '%E0' => 'à',
6803
        '%E1' => 'á',
6804
        '%E2' => 'â',
6805
        '%E3' => 'ã',
6806
        '%E4' => 'ä',
6807
        '%E5' => 'å',
6808
        '%E6' => 'æ',
6809
        '%E7' => 'ç',
6810
        '%E8' => 'è',
6811
        '%E9' => 'é',
6812
        '%EA' => 'ê',
6813
        '%EB' => 'ë',
6814
        '%EC' => 'ì',
6815
        '%ED' => 'í',
6816
        '%EE' => 'î',
6817
        '%EF' => 'ï',
6818
        '%F0' => 'ð',
6819
        '%F1' => 'ñ',
6820
        '%F2' => 'ò',
6821
        '%F3' => 'ó',
6822
        '%F4' => 'ô',
6823
        '%F5' => 'õ',
6824
        '%F6' => 'ö',
6825
        '%F7' => '÷',
6826
        '%F8' => 'ø',
6827
        '%F9' => 'ù',
6828
        '%FA' => 'ú',
6829
        '%FB' => 'û',
6830
        '%FC' => 'ü',
6831
        '%FD' => 'ý',
6832
        '%FE' => 'þ',
6833
        '%FF' => 'ÿ',
6834
    );
6835
6836
    return $array;
6837
  }
6838
6839
  /**
6840
   * Decodes an UTF-8 string to ISO-8859-1.
6841
   *
6842
   * @param string $str
6843
   *
6844
   * @return string
6845
   */
6846
  public static function utf8_decode($str)
6847
  {
6848
    static $utf8ToWin1252Keys = null;
6849
    static $utf8ToWin1252Values = null;
6850
6851
    $str = (string)$str;
6852
6853
    if (!isset($str[0])) {
6854
      return '';
6855
    }
6856
6857
    // init
6858
    self::checkForSupport();
6859
6860
    $str = self::to_utf8($str);
6861
6862
    if ($utf8ToWin1252Keys === null) {
6863
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6864
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6865
    }
6866
6867
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6868
  }
6869
6870
  /**
6871
   * Encodes an ISO-8859-1 string to UTF-8.
6872
   *
6873
   * @param string $str
6874
   *
6875
   * @return string
6876
   */
6877
  public static function utf8_encode($str)
6878
  {
6879
    $str = \utf8_encode($str);
6880
6881
    if (false === strpos($str, "\xC2")) {
6882
      return $str;
6883
    } else {
6884
6885
      static $cp1252ToUtf8Keys = null;
6886
      static $cp1252ToUtf8Values = null;
6887
6888
      if ($cp1252ToUtf8Keys === null) {
6889
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6890
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6891
      }
6892
6893
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6894
    }
6895
  }
6896
6897
  /**
6898
   * fix -> utf8-win1252 chars
6899
   *
6900
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6901
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6902
   * See: http://en.wikipedia.org/wiki/Windows-1252
6903
   *
6904
   * @deprecated use "UTF8::fix_simple_utf8()"
6905
   *
6906
   * @param   string $str
6907
   *
6908
   * @return  string
6909
   */
6910
  public static function utf8_fix_win1252_chars($str)
6911
  {
6912
    return self::fix_simple_utf8($str);
6913
  }
6914
6915
  /**
6916
   * Returns an array with all utf8 whitespace characters.
6917
   *
6918
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6919
   *
6920
   * @author: Derek E. [email protected]
6921
   *
6922
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6923
   *         as defined in above URL
6924
   */
6925
  public static function whitespace_table()
6926
  {
6927
    return self::$whitespaceTable;
6928
  }
6929
6930
  /**
6931
   * Limit the number of words in a string.
6932
   *
6933
   * @param  string $str
6934
   * @param  int    $words
6935
   * @param  string $strAddOn
6936
   *
6937
   * @return string
6938
   */
6939
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6940
  {
6941
    $str = (string)$str;
6942
6943
    if (!isset($str[0])) {
6944
      return '';
6945
    }
6946
6947
    $words = (int)$words;
6948
6949
    if ($words < 1) {
6950
      return '';
6951
    }
6952
6953
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6954
6955
    if (
6956
        !isset($matches[0])
6957
        ||
6958
        self::strlen($str) === self::strlen($matches[0])
6959
    ) {
6960
      return $str;
6961
    }
6962
6963
    return self::rtrim($matches[0]) . $strAddOn;
6964
  }
6965
6966
  /**
6967
   * Wraps a string to a given number of characters
6968
   *
6969
   * @link  http://php.net/manual/en/function.wordwrap.php
6970
   *
6971
   * @param string $str   <p>
6972
   *                      The input string.
6973
   *                      </p>
6974
   * @param int    $width [optional] <p>
6975
   *                      The column width.
6976
   *                      </p>
6977
   * @param string $break [optional] <p>
6978
   *                      The line is broken using the optional
6979
   *                      break parameter.
6980
   *                      </p>
6981
   * @param bool   $cut   [optional] <p>
6982
   *                      If the cut is set to true, the string is
6983
   *                      always wrapped at or before the specified width. So if you have
6984
   *                      a word that is larger than the given width, it is broken apart.
6985
   *                      (See second example).
6986
   *                      </p>
6987
   *
6988
   * @return string the given string wrapped at the specified column.
6989
   * @since 4.0.2
6990
   * @since 5.0
6991
   */
6992
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6993
  {
6994
    $str = (string)$str;
6995
    $break = (string)$break;
6996
6997
    if (!isset($str[0], $break[0])) {
6998
      return '';
6999
    }
7000
7001
    $w = '';
7002
    $strSplit = explode($break, $str);
7003
    $count = count($strSplit);
7004
7005
    if (1 === $count && '' === $strSplit[0]) {
7006
      return '';
7007
    }
7008
7009
    $chars = array();
7010
    /** @noinspection ForeachInvariantsInspection */
7011
    for ($i = 0; $i < $count; ++$i) {
7012
7013
      if ($i) {
7014
        $chars[] = $break;
7015
        $w .= '#';
7016
      }
7017
7018
      $c = $strSplit[$i];
7019
      unset($strSplit[$i]);
7020
7021
      foreach (self::split($c) as $c) {
7022
        $chars[] = $c;
7023
        $w .= ' ' === $c ? ' ' : '?';
7024
      }
7025
    }
7026
7027
    $strReturn = '';
7028
    $j = 0;
7029
    $b = $i = -1;
7030
    $w = wordwrap($w, $width, '#', $cut);
7031
7032
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7033
      for (++$i; $i < $b; ++$i) {
7034
        $strReturn .= $chars[$j];
7035
        unset($chars[$j++]);
7036
      }
7037
7038
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7039
        unset($chars[$j++]);
7040
      }
7041
7042
      $strReturn .= $break;
7043
    }
7044
7045
    return $strReturn . implode('', $chars);
7046
  }
7047
7048
  /**
7049
   * Returns an array of Unicode White Space characters.
7050
   *
7051
   * @return   array An array with numeric code point as key and White Space Character as value.
7052
   */
7053
  public static function ws()
7054
  {
7055
    return self::$whitespace;
7056
  }
7057
7058
}
7059