Completed
Push — master ( 8a7006...68e55d )
by Lars
06:47
created

UTF8::substr()   C

Complexity

Conditions 12
Paths 37

Size

Total Lines 57
Code Lines 26

Duplication

Lines 11
Ratio 19.3 %

Code Coverage

Tests 36
CRAP Score 12.0208

Importance

Changes 9
Bugs 2 Features 1
Metric Value
c 9
b 2
f 1
dl 11
loc 57
ccs 36
cts 38
cp 0.9474
rs 6.62
cc 12
eloc 26
nc 37
nop 5
crap 12.0208

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  protected static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  protected static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  protected static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  protected static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  protected static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  protected static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  protected static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  protected static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  protected static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  protected static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  protected static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790 1
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792 1
      'ISO-IR-230',
793 1
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803 1
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809
    self::checkForSupport();
810
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param    string $str A UTF-8 string.
816
   * @param    int    $pos The position of character to return.
817
   *
818
   * @return   string Single Multi-Byte character.
819
   */
820
  public static function access($str, $pos)
821
  {
822
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param    string $str The input string
831
   *
832
   * @return   string The output string that contains BOM
833 2
   */
834
  public static function add_bom_to_string($str)
835 2
  {
836
    if (self::string_has_bom($str) === false) {
837
      $str = self::bom() . $str;
838
    }
839
840
    return $str;
841
  }
842
843
  /**
844
   * Returns the UTF-8 Byte Order Mark Character.
845
   *
846 1
   * @return string UTF-8 Byte Order Mark
847
   */
848 1
  public static function bom()
849
  {
850
    return "\xEF\xBB\xBF";
851
  }
852
853
  /**
854
   * @alias of UTF8::chr_map()
855
   *
856
   * @param string|array $callback
857
   * @param string       $str
858
   *
859
   * @return array
860
   */
861
  public static function callback($callback, $str)
862
  {
863
    return self::chr_map($callback, $str);
864
  }
865
866
  /**
867
   * Returns an array of all lower and upper case UTF-8 encoded characters.
868
   *
869
   * @return   string An array with lower case chars as keys and upper chars as values.
870
   */
871
  protected static function case_table()
872
  {
873
    static $case = array(
874
875
      // lower => upper
876
      "\xf0\x90\x91\x8f" => "\xf0\x90\x90\xa7",
877
      "\xf0\x90\x91\x8e" => "\xf0\x90\x90\xa6",
878
      "\xf0\x90\x91\x8d" => "\xf0\x90\x90\xa5",
879
      "\xf0\x90\x91\x8c" => "\xf0\x90\x90\xa4",
880
      "\xf0\x90\x91\x8b" => "\xf0\x90\x90\xa3",
881
      "\xf0\x90\x91\x8a" => "\xf0\x90\x90\xa2",
882
      "\xf0\x90\x91\x89" => "\xf0\x90\x90\xa1",
883
      "\xf0\x90\x91\x88" => "\xf0\x90\x90\xa0",
884
      "\xf0\x90\x91\x87" => "\xf0\x90\x90\x9f",
885
      "\xf0\x90\x91\x86" => "\xf0\x90\x90\x9e",
886
      "\xf0\x90\x91\x85" => "\xf0\x90\x90\x9d",
887
      "\xf0\x90\x91\x84" => "\xf0\x90\x90\x9c",
888
      "\xf0\x90\x91\x83" => "\xf0\x90\x90\x9b",
889
      "\xf0\x90\x91\x82" => "\xf0\x90\x90\x9a",
890
      "\xf0\x90\x91\x81" => "\xf0\x90\x90\x99",
891
      "\xf0\x90\x91\x80" => "\xf0\x90\x90\x98",
892
      "\xf0\x90\x90\xbf" => "\xf0\x90\x90\x97",
893
      "\xf0\x90\x90\xbe" => "\xf0\x90\x90\x96",
894
      "\xf0\x90\x90\xbd" => "\xf0\x90\x90\x95",
895
      "\xf0\x90\x90\xbc" => "\xf0\x90\x90\x94",
896
      "\xf0\x90\x90\xbb" => "\xf0\x90\x90\x93",
897
      "\xf0\x90\x90\xba" => "\xf0\x90\x90\x92",
898
      "\xf0\x90\x90\xb9" => "\xf0\x90\x90\x91",
899
      "\xf0\x90\x90\xb8" => "\xf0\x90\x90\x90",
900
      "\xf0\x90\x90\xb7" => "\xf0\x90\x90\x8f",
901
      "\xf0\x90\x90\xb6" => "\xf0\x90\x90\x8e",
902
      "\xf0\x90\x90\xb5" => "\xf0\x90\x90\x8d",
903
      "\xf0\x90\x90\xb4" => "\xf0\x90\x90\x8c",
904
      "\xf0\x90\x90\xb3" => "\xf0\x90\x90\x8b",
905
      "\xf0\x90\x90\xb2" => "\xf0\x90\x90\x8a",
906
      "\xf0\x90\x90\xb1" => "\xf0\x90\x90\x89",
907
      "\xf0\x90\x90\xb0" => "\xf0\x90\x90\x88",
908
      "\xf0\x90\x90\xaf" => "\xf0\x90\x90\x87",
909
      "\xf0\x90\x90\xae" => "\xf0\x90\x90\x86",
910
      "\xf0\x90\x90\xad" => "\xf0\x90\x90\x85",
911
      "\xf0\x90\x90\xac" => "\xf0\x90\x90\x84",
912
      "\xf0\x90\x90\xab" => "\xf0\x90\x90\x83",
913
      "\xf0\x90\x90\xaa" => "\xf0\x90\x90\x82",
914
      "\xf0\x90\x90\xa9" => "\xf0\x90\x90\x81",
915
      "\xf0\x90\x90\xa8" => "\xf0\x90\x90\x80",
916
      "\xef\xbd\x9a"     => "\xef\xbc\xba",
917
      "\xef\xbd\x99"     => "\xef\xbc\xb9",
918
      "\xef\xbd\x98"     => "\xef\xbc\xb8",
919
      "\xef\xbd\x97"     => "\xef\xbc\xb7",
920
      "\xef\xbd\x96"     => "\xef\xbc\xb6",
921
      "\xef\xbd\x95"     => "\xef\xbc\xb5",
922
      "\xef\xbd\x94"     => "\xef\xbc\xb4",
923
      "\xef\xbd\x93"     => "\xef\xbc\xb3",
924
      "\xef\xbd\x92"     => "\xef\xbc\xb2",
925
      "\xef\xbd\x91"     => "\xef\xbc\xb1",
926
      "\xef\xbd\x90"     => "\xef\xbc\xb0",
927
      "\xef\xbd\x8f"     => "\xef\xbc\xaf",
928
      "\xef\xbd\x8e"     => "\xef\xbc\xae",
929
      "\xef\xbd\x8d"     => "\xef\xbc\xad",
930
      "\xef\xbd\x8c"     => "\xef\xbc\xac",
931
      "\xef\xbd\x8b"     => "\xef\xbc\xab",
932
      "\xef\xbd\x8a"     => "\xef\xbc\xaa",
933
      "\xef\xbd\x89"     => "\xef\xbc\xa9",
934
      "\xef\xbd\x88"     => "\xef\xbc\xa8",
935
      "\xef\xbd\x87"     => "\xef\xbc\xa7",
936
      "\xef\xbd\x86"     => "\xef\xbc\xa6",
937
      "\xef\xbd\x85"     => "\xef\xbc\xa5",
938
      "\xef\xbd\x84"     => "\xef\xbc\xa4",
939
      "\xef\xbd\x83"     => "\xef\xbc\xa3",
940
      "\xef\xbd\x82"     => "\xef\xbc\xa2",
941
      "\xef\xbd\x81"     => "\xef\xbc\xa1",
942
      "\xea\x9e\x8c"     => "\xea\x9e\x8b",
943
      "\xea\x9e\x87"     => "\xea\x9e\x86",
944
      "\xea\x9e\x85"     => "\xea\x9e\x84",
945
      "\xea\x9e\x83"     => "\xea\x9e\x82",
946
      "\xea\x9e\x81"     => "\xea\x9e\x80",
947
      "\xea\x9d\xbf"     => "\xea\x9d\xbe",
948
      "\xea\x9d\xbc"     => "\xea\x9d\xbb",
949
      "\xea\x9d\xba"     => "\xea\x9d\xb9",
950
      "\xea\x9d\xaf"     => "\xea\x9d\xae",
951
      "\xea\x9d\xad"     => "\xea\x9d\xac",
952
      "\xea\x9d\xab"     => "\xea\x9d\xaa",
953
      "\xea\x9d\xa9"     => "\xea\x9d\xa8",
954
      "\xea\x9d\xa7"     => "\xea\x9d\xa6",
955
      "\xea\x9d\xa5"     => "\xea\x9d\xa4",
956
      "\xea\x9d\xa3"     => "\xea\x9d\xa2",
957
      "\xea\x9d\xa1"     => "\xea\x9d\xa0",
958
      "\xea\x9d\x9f"     => "\xea\x9d\x9e",
959
      "\xea\x9d\x9d"     => "\xea\x9d\x9c",
960
      "\xea\x9d\x9b"     => "\xea\x9d\x9a",
961
      "\xea\x9d\x99"     => "\xea\x9d\x98",
962
      "\xea\x9d\x97"     => "\xea\x9d\x96",
963
      "\xea\x9d\x95"     => "\xea\x9d\x94",
964
      "\xea\x9d\x93"     => "\xea\x9d\x92",
965
      "\xea\x9d\x91"     => "\xea\x9d\x90",
966
      "\xea\x9d\x8f"     => "\xea\x9d\x8e",
967
      "\xea\x9d\x8d"     => "\xea\x9d\x8c",
968
      "\xea\x9d\x8b"     => "\xea\x9d\x8a",
969
      "\xea\x9d\x89"     => "\xea\x9d\x88",
970
      "\xea\x9d\x87"     => "\xea\x9d\x86",
971
      "\xea\x9d\x85"     => "\xea\x9d\x84",
972
      "\xea\x9d\x83"     => "\xea\x9d\x82",
973
      "\xea\x9d\x81"     => "\xea\x9d\x80",
974
      "\xea\x9c\xbf"     => "\xea\x9c\xbe",
975
      "\xea\x9c\xbd"     => "\xea\x9c\xbc",
976
      "\xea\x9c\xbb"     => "\xea\x9c\xba",
977
      "\xea\x9c\xb9"     => "\xea\x9c\xb8",
978
      "\xea\x9c\xb7"     => "\xea\x9c\xb6",
979
      "\xea\x9c\xb5"     => "\xea\x9c\xb4",
980
      "\xea\x9c\xb3"     => "\xea\x9c\xb2",
981
      "\xea\x9c\xaf"     => "\xea\x9c\xae",
982
      "\xea\x9c\xad"     => "\xea\x9c\xac",
983
      "\xea\x9c\xab"     => "\xea\x9c\xaa",
984
      "\xea\x9c\xa9"     => "\xea\x9c\xa8",
985
      "\xea\x9c\xa7"     => "\xea\x9c\xa6",
986
      "\xea\x9c\xa5"     => "\xea\x9c\xa4",
987
      "\xea\x9c\xa3"     => "\xea\x9c\xa2",
988
      "\xea\x9a\x97"     => "\xea\x9a\x96",
989
      "\xea\x9a\x95"     => "\xea\x9a\x94",
990
      "\xea\x9a\x93"     => "\xea\x9a\x92",
991
      "\xea\x9a\x91"     => "\xea\x9a\x90",
992
      "\xea\x9a\x8f"     => "\xea\x9a\x8e",
993
      "\xea\x9a\x8d"     => "\xea\x9a\x8c",
994
      "\xea\x9a\x8b"     => "\xea\x9a\x8a",
995
      "\xea\x9a\x89"     => "\xea\x9a\x88",
996
      "\xea\x9a\x87"     => "\xea\x9a\x86",
997
      "\xea\x9a\x85"     => "\xea\x9a\x84",
998
      "\xea\x9a\x83"     => "\xea\x9a\x82",
999
      "\xea\x9a\x81"     => "\xea\x9a\x80",
1000
      "\xea\x99\xad"     => "\xea\x99\xac",
1001
      "\xea\x99\xab"     => "\xea\x99\xaa",
1002
      "\xea\x99\xa9"     => "\xea\x99\xa8",
1003
      "\xea\x99\xa7"     => "\xea\x99\xa6",
1004
      "\xea\x99\xa5"     => "\xea\x99\xa4",
1005
      "\xea\x99\xa3"     => "\xea\x99\xa2",
1006
      "\xea\x99\x9f"     => "\xea\x99\x9e",
1007
      "\xea\x99\x9d"     => "\xea\x99\x9c",
1008
      "\xea\x99\x9b"     => "\xea\x99\x9a",
1009
      "\xea\x99\x99"     => "\xea\x99\x98",
1010
      "\xea\x99\x97"     => "\xea\x99\x96",
1011
      "\xea\x99\x95"     => "\xea\x99\x94",
1012
      "\xea\x99\x93"     => "\xea\x99\x92",
1013
      "\xea\x99\x91"     => "\xea\x99\x90",
1014
      "\xea\x99\x8f"     => "\xea\x99\x8e",
1015
      "\xea\x99\x8d"     => "\xea\x99\x8c",
1016
      "\xea\x99\x8b"     => "\xea\x99\x8a",
1017
      "\xea\x99\x89"     => "\xea\x99\x88",
1018
      "\xea\x99\x87"     => "\xea\x99\x86",
1019
      "\xea\x99\x85"     => "\xea\x99\x84",
1020
      "\xea\x99\x83"     => "\xea\x99\x82",
1021
      "\xea\x99\x81"     => "\xea\x99\x80",
1022
      "\xe2\xb4\xa5"     => "\xe1\x83\x85",
1023
      "\xe2\xb4\xa4"     => "\xe1\x83\x84",
1024
      "\xe2\xb4\xa3"     => "\xe1\x83\x83",
1025
      "\xe2\xb4\xa2"     => "\xe1\x83\x82",
1026
      "\xe2\xb4\xa1"     => "\xe1\x83\x81",
1027
      "\xe2\xb4\xa0"     => "\xe1\x83\x80",
1028
      "\xe2\xb4\x9f"     => "\xe1\x82\xbf",
1029
      "\xe2\xb4\x9e"     => "\xe1\x82\xbe",
1030
      "\xe2\xb4\x9d"     => "\xe1\x82\xbd",
1031
      "\xe2\xb4\x9c"     => "\xe1\x82\xbc",
1032
      "\xe2\xb4\x9b"     => "\xe1\x82\xbb",
1033
      "\xe2\xb4\x9a"     => "\xe1\x82\xba",
1034
      "\xe2\xb4\x99"     => "\xe1\x82\xb9",
1035
      "\xe2\xb4\x98"     => "\xe1\x82\xb8",
1036
      "\xe2\xb4\x97"     => "\xe1\x82\xb7",
1037
      "\xe2\xb4\x96"     => "\xe1\x82\xb6",
1038
      "\xe2\xb4\x95"     => "\xe1\x82\xb5",
1039
      "\xe2\xb4\x94"     => "\xe1\x82\xb4",
1040
      "\xe2\xb4\x93"     => "\xe1\x82\xb3",
1041
      "\xe2\xb4\x92"     => "\xe1\x82\xb2",
1042
      "\xe2\xb4\x91"     => "\xe1\x82\xb1",
1043
      "\xe2\xb4\x90"     => "\xe1\x82\xb0",
1044
      "\xe2\xb4\x8f"     => "\xe1\x82\xaf",
1045
      "\xe2\xb4\x8e"     => "\xe1\x82\xae",
1046
      "\xe2\xb4\x8d"     => "\xe1\x82\xad",
1047
      "\xe2\xb4\x8c"     => "\xe1\x82\xac",
1048
      "\xe2\xb4\x8b"     => "\xe1\x82\xab",
1049
      "\xe2\xb4\x8a"     => "\xe1\x82\xaa",
1050
      "\xe2\xb4\x89"     => "\xe1\x82\xa9",
1051
      "\xe2\xb4\x88"     => "\xe1\x82\xa8",
1052
      "\xe2\xb4\x87"     => "\xe1\x82\xa7",
1053
      "\xe2\xb4\x86"     => "\xe1\x82\xa6",
1054
      "\xe2\xb4\x85"     => "\xe1\x82\xa5",
1055
      "\xe2\xb4\x84"     => "\xe1\x82\xa4",
1056
      "\xe2\xb4\x83"     => "\xe1\x82\xa3",
1057
      "\xe2\xb4\x82"     => "\xe1\x82\xa2",
1058
      "\xe2\xb4\x81"     => "\xe1\x82\xa1",
1059
      "\xe2\xb4\x80"     => "\xe1\x82\xa0",
1060
      "\xe2\xb3\xae"     => "\xe2\xb3\xad",
1061
      "\xe2\xb3\xac"     => "\xe2\xb3\xab",
1062
      "\xe2\xb3\xa3"     => "\xe2\xb3\xa2",
1063
      "\xe2\xb3\xa1"     => "\xe2\xb3\xa0",
1064
      "\xe2\xb3\x9f"     => "\xe2\xb3\x9e",
1065
      "\xe2\xb3\x9d"     => "\xe2\xb3\x9c",
1066
      "\xe2\xb3\x9b"     => "\xe2\xb3\x9a",
1067
      "\xe2\xb3\x99"     => "\xe2\xb3\x98",
1068
      "\xe2\xb3\x97"     => "\xe2\xb3\x96",
1069
      "\xe2\xb3\x95"     => "\xe2\xb3\x94",
1070
      "\xe2\xb3\x93"     => "\xe2\xb3\x92",
1071
      "\xe2\xb3\x91"     => "\xe2\xb3\x90",
1072
      "\xe2\xb3\x8f"     => "\xe2\xb3\x8e",
1073
      "\xe2\xb3\x8d"     => "\xe2\xb3\x8c",
1074
      "\xe2\xb3\x8b"     => "\xe2\xb3\x8a",
1075
      "\xe2\xb3\x89"     => "\xe2\xb3\x88",
1076
      "\xe2\xb3\x87"     => "\xe2\xb3\x86",
1077
      "\xe2\xb3\x85"     => "\xe2\xb3\x84",
1078
      "\xe2\xb3\x83"     => "\xe2\xb3\x82",
1079
      "\xe2\xb3\x81"     => "\xe2\xb3\x80",
1080
      "\xe2\xb2\xbf"     => "\xe2\xb2\xbe",
1081
      "\xe2\xb2\xbd"     => "\xe2\xb2\xbc",
1082
      "\xe2\xb2\xbb"     => "\xe2\xb2\xba",
1083
      "\xe2\xb2\xb9"     => "\xe2\xb2\xb8",
1084
      "\xe2\xb2\xb7"     => "\xe2\xb2\xb6",
1085
      "\xe2\xb2\xb5"     => "\xe2\xb2\xb4",
1086
      "\xe2\xb2\xb3"     => "\xe2\xb2\xb2",
1087
      "\xe2\xb2\xb1"     => "\xe2\xb2\xb0",
1088
      "\xe2\xb2\xaf"     => "\xe2\xb2\xae",
1089
      "\xe2\xb2\xad"     => "\xe2\xb2\xac",
1090
      "\xe2\xb2\xab"     => "\xe2\xb2\xaa",
1091
      "\xe2\xb2\xa9"     => "\xe2\xb2\xa8",
1092
      "\xe2\xb2\xa7"     => "\xe2\xb2\xa6",
1093
      "\xe2\xb2\xa5"     => "\xe2\xb2\xa4",
1094
      "\xe2\xb2\xa3"     => "\xe2\xb2\xa2",
1095
      "\xe2\xb2\xa1"     => "\xe2\xb2\xa0",
1096
      "\xe2\xb2\x9f"     => "\xe2\xb2\x9e",
1097
      "\xe2\xb2\x9d"     => "\xe2\xb2\x9c",
1098
      "\xe2\xb2\x9b"     => "\xe2\xb2\x9a",
1099
      "\xe2\xb2\x99"     => "\xe2\xb2\x98",
1100
      "\xe2\xb2\x97"     => "\xe2\xb2\x96",
1101
      "\xe2\xb2\x95"     => "\xe2\xb2\x94",
1102
      "\xe2\xb2\x93"     => "\xe2\xb2\x92",
1103
      "\xe2\xb2\x91"     => "\xe2\xb2\x90",
1104
      "\xe2\xb2\x8f"     => "\xe2\xb2\x8e",
1105
      "\xe2\xb2\x8d"     => "\xe2\xb2\x8c",
1106
      "\xe2\xb2\x8b"     => "\xe2\xb2\x8a",
1107
      "\xe2\xb2\x89"     => "\xe2\xb2\x88",
1108
      "\xe2\xb2\x87"     => "\xe2\xb2\x86",
1109
      "\xe2\xb2\x85"     => "\xe2\xb2\x84",
1110
      "\xe2\xb2\x83"     => "\xe2\xb2\x82",
1111
      "\xe2\xb2\x81"     => "\xe2\xb2\x80",
1112
      "\xe2\xb1\xb6"     => "\xe2\xb1\xb5",
1113
      "\xe2\xb1\xb3"     => "\xe2\xb1\xb2",
1114
      "\xe2\xb1\xac"     => "\xe2\xb1\xab",
1115
      "\xe2\xb1\xaa"     => "\xe2\xb1\xa9",
1116
      "\xe2\xb1\xa8"     => "\xe2\xb1\xa7",
1117
      "\xe2\xb1\xa6"     => "\xc8\xbe",
1118
      "\xe2\xb1\xa5"     => "\xc8\xba",
1119
      "\xe2\xb1\xa1"     => "\xe2\xb1\xa0",
1120
      "\xe2\xb1\x9e"     => "\xe2\xb0\xae",
1121
      "\xe2\xb1\x9d"     => "\xe2\xb0\xad",
1122
      "\xe2\xb1\x9c"     => "\xe2\xb0\xac",
1123
      "\xe2\xb1\x9b"     => "\xe2\xb0\xab",
1124
      "\xe2\xb1\x9a"     => "\xe2\xb0\xaa",
1125
      "\xe2\xb1\x99"     => "\xe2\xb0\xa9",
1126
      "\xe2\xb1\x98"     => "\xe2\xb0\xa8",
1127
      "\xe2\xb1\x97"     => "\xe2\xb0\xa7",
1128
      "\xe2\xb1\x96"     => "\xe2\xb0\xa6",
1129
      "\xe2\xb1\x95"     => "\xe2\xb0\xa5",
1130
      "\xe2\xb1\x94"     => "\xe2\xb0\xa4",
1131
      "\xe2\xb1\x93"     => "\xe2\xb0\xa3",
1132
      "\xe2\xb1\x92"     => "\xe2\xb0\xa2",
1133
      "\xe2\xb1\x91"     => "\xe2\xb0\xa1",
1134
      "\xe2\xb1\x90"     => "\xe2\xb0\xa0",
1135
      "\xe2\xb1\x8f"     => "\xe2\xb0\x9f",
1136
      "\xe2\xb1\x8e"     => "\xe2\xb0\x9e",
1137
      "\xe2\xb1\x8d"     => "\xe2\xb0\x9d",
1138
      "\xe2\xb1\x8c"     => "\xe2\xb0\x9c",
1139
      "\xe2\xb1\x8b"     => "\xe2\xb0\x9b",
1140
      "\xe2\xb1\x8a"     => "\xe2\xb0\x9a",
1141
      "\xe2\xb1\x89"     => "\xe2\xb0\x99",
1142
      "\xe2\xb1\x88"     => "\xe2\xb0\x98",
1143
      "\xe2\xb1\x87"     => "\xe2\xb0\x97",
1144
      "\xe2\xb1\x86"     => "\xe2\xb0\x96",
1145
      "\xe2\xb1\x85"     => "\xe2\xb0\x95",
1146
      "\xe2\xb1\x84"     => "\xe2\xb0\x94",
1147
      "\xe2\xb1\x83"     => "\xe2\xb0\x93",
1148
      "\xe2\xb1\x82"     => "\xe2\xb0\x92",
1149
      "\xe2\xb1\x81"     => "\xe2\xb0\x91",
1150
      "\xe2\xb1\x80"     => "\xe2\xb0\x90",
1151
      "\xe2\xb0\xbf"     => "\xe2\xb0\x8f",
1152
      "\xe2\xb0\xbe"     => "\xe2\xb0\x8e",
1153
      "\xe2\xb0\xbd"     => "\xe2\xb0\x8d",
1154
      "\xe2\xb0\xbc"     => "\xe2\xb0\x8c",
1155
      "\xe2\xb0\xbb"     => "\xe2\xb0\x8b",
1156
      "\xe2\xb0\xba"     => "\xe2\xb0\x8a",
1157
      "\xe2\xb0\xb9"     => "\xe2\xb0\x89",
1158
      "\xe2\xb0\xb8"     => "\xe2\xb0\x88",
1159
      "\xe2\xb0\xb7"     => "\xe2\xb0\x87",
1160
      "\xe2\xb0\xb6"     => "\xe2\xb0\x86",
1161
      "\xe2\xb0\xb5"     => "\xe2\xb0\x85",
1162
      "\xe2\xb0\xb4"     => "\xe2\xb0\x84",
1163
      "\xe2\xb0\xb3"     => "\xe2\xb0\x83",
1164
      "\xe2\xb0\xb2"     => "\xe2\xb0\x82",
1165
      "\xe2\xb0\xb1"     => "\xe2\xb0\x81",
1166
      "\xe2\xb0\xb0"     => "\xe2\xb0\x80",
1167
      "\xe2\x86\x84"     => "\xe2\x86\x83",
1168
      "\xe2\x85\x8e"     => "\xe2\x84\xb2",
1169
      "\xe1\xbf\xb3"     => "\xe1\xbf\xbc",
1170
      "\xe1\xbf\xa5"     => "\xe1\xbf\xac",
1171
      "\xe1\xbf\xa1"     => "\xe1\xbf\xa9",
1172
      "\xe1\xbf\xa0"     => "\xe1\xbf\xa8",
1173
      "\xe1\xbf\x91"     => "\xe1\xbf\x99",
1174
      "\xe1\xbf\x90"     => "\xe1\xbf\x98",
1175
      "\xe1\xbf\x83"     => "\xe1\xbf\x8c",
1176
      "\xe1\xbe\xbe"     => "\xce\x99",
1177
      "\xe1\xbe\xb3"     => "\xe1\xbe\xbc",
1178
      "\xe1\xbe\xb1"     => "\xe1\xbe\xb9",
1179
      "\xe1\xbe\xb0"     => "\xe1\xbe\xb8",
1180
      "\xe1\xbe\xa7"     => "\xe1\xbe\xaf",
1181
      "\xe1\xbe\xa6"     => "\xe1\xbe\xae",
1182
      "\xe1\xbe\xa5"     => "\xe1\xbe\xad",
1183
      "\xe1\xbe\xa4"     => "\xe1\xbe\xac",
1184
      "\xe1\xbe\xa3"     => "\xe1\xbe\xab",
1185
      "\xe1\xbe\xa2"     => "\xe1\xbe\xaa",
1186
      "\xe1\xbe\xa1"     => "\xe1\xbe\xa9",
1187
      "\xe1\xbe\xa0"     => "\xe1\xbe\xa8",
1188
      "\xe1\xbe\x97"     => "\xe1\xbe\x9f",
1189
      "\xe1\xbe\x96"     => "\xe1\xbe\x9e",
1190
      "\xe1\xbe\x95"     => "\xe1\xbe\x9d",
1191
      "\xe1\xbe\x94"     => "\xe1\xbe\x9c",
1192
      "\xe1\xbe\x93"     => "\xe1\xbe\x9b",
1193
      "\xe1\xbe\x92"     => "\xe1\xbe\x9a",
1194
      "\xe1\xbe\x91"     => "\xe1\xbe\x99",
1195
      "\xe1\xbe\x90"     => "\xe1\xbe\x98",
1196
      "\xe1\xbe\x87"     => "\xe1\xbe\x8f",
1197
      "\xe1\xbe\x86"     => "\xe1\xbe\x8e",
1198
      "\xe1\xbe\x85"     => "\xe1\xbe\x8d",
1199
      "\xe1\xbe\x84"     => "\xe1\xbe\x8c",
1200
      "\xe1\xbe\x83"     => "\xe1\xbe\x8b",
1201
      "\xe1\xbe\x82"     => "\xe1\xbe\x8a",
1202
      "\xe1\xbe\x81"     => "\xe1\xbe\x89",
1203
      "\xe1\xbe\x80"     => "\xe1\xbe\x88",
1204
      "\xe1\xbd\xbd"     => "\xe1\xbf\xbb",
1205
      "\xe1\xbd\xbc"     => "\xe1\xbf\xba",
1206
      "\xe1\xbd\xbb"     => "\xe1\xbf\xab",
1207
      "\xe1\xbd\xba"     => "\xe1\xbf\xaa",
1208
      "\xe1\xbd\xb9"     => "\xe1\xbf\xb9",
1209
      "\xe1\xbd\xb8"     => "\xe1\xbf\xb8",
1210
      "\xe1\xbd\xb7"     => "\xe1\xbf\x9b",
1211
      "\xe1\xbd\xb6"     => "\xe1\xbf\x9a",
1212
      "\xe1\xbd\xb5"     => "\xe1\xbf\x8b",
1213
      "\xe1\xbd\xb4"     => "\xe1\xbf\x8a",
1214
      "\xe1\xbd\xb3"     => "\xe1\xbf\x89",
1215
      "\xe1\xbd\xb2"     => "\xe1\xbf\x88",
1216
      "\xe1\xbd\xb1"     => "\xe1\xbe\xbb",
1217
      "\xe1\xbd\xb0"     => "\xe1\xbe\xba",
1218
      "\xe1\xbd\xa7"     => "\xe1\xbd\xaf",
1219
      "\xe1\xbd\xa6"     => "\xe1\xbd\xae",
1220
      "\xe1\xbd\xa5"     => "\xe1\xbd\xad",
1221
      "\xe1\xbd\xa4"     => "\xe1\xbd\xac",
1222
      "\xe1\xbd\xa3"     => "\xe1\xbd\xab",
1223
      "\xe1\xbd\xa2"     => "\xe1\xbd\xaa",
1224
      "\xe1\xbd\xa1"     => "\xe1\xbd\xa9",
1225
      "\xe1\xbd\xa0"     => "\xe1\xbd\xa8",
1226
      "\xe1\xbd\x97"     => "\xe1\xbd\x9f",
1227
      "\xe1\xbd\x95"     => "\xe1\xbd\x9d",
1228
      "\xe1\xbd\x93"     => "\xe1\xbd\x9b",
1229
      "\xe1\xbd\x91"     => "\xe1\xbd\x99",
1230
      "\xe1\xbd\x85"     => "\xe1\xbd\x8d",
1231
      "\xe1\xbd\x84"     => "\xe1\xbd\x8c",
1232
      "\xe1\xbd\x83"     => "\xe1\xbd\x8b",
1233
      "\xe1\xbd\x82"     => "\xe1\xbd\x8a",
1234
      "\xe1\xbd\x81"     => "\xe1\xbd\x89",
1235
      "\xe1\xbd\x80"     => "\xe1\xbd\x88",
1236
      "\xe1\xbc\xb7"     => "\xe1\xbc\xbf",
1237
      "\xe1\xbc\xb6"     => "\xe1\xbc\xbe",
1238
      "\xe1\xbc\xb5"     => "\xe1\xbc\xbd",
1239
      "\xe1\xbc\xb4"     => "\xe1\xbc\xbc",
1240
      "\xe1\xbc\xb3"     => "\xe1\xbc\xbb",
1241
      "\xe1\xbc\xb2"     => "\xe1\xbc\xba",
1242
      "\xe1\xbc\xb1"     => "\xe1\xbc\xb9",
1243
      "\xe1\xbc\xb0"     => "\xe1\xbc\xb8",
1244
      "\xe1\xbc\xa7"     => "\xe1\xbc\xaf",
1245
      "\xe1\xbc\xa6"     => "\xe1\xbc\xae",
1246
      "\xe1\xbc\xa5"     => "\xe1\xbc\xad",
1247
      "\xe1\xbc\xa4"     => "\xe1\xbc\xac",
1248
      "\xe1\xbc\xa3"     => "\xe1\xbc\xab",
1249
      "\xe1\xbc\xa2"     => "\xe1\xbc\xaa",
1250
      "\xe1\xbc\xa1"     => "\xe1\xbc\xa9",
1251
      "\xe1\xbc\xa0"     => "\xe1\xbc\xa8",
1252
      "\xe1\xbc\x95"     => "\xe1\xbc\x9d",
1253
      "\xe1\xbc\x94"     => "\xe1\xbc\x9c",
1254
      "\xe1\xbc\x93"     => "\xe1\xbc\x9b",
1255
      "\xe1\xbc\x92"     => "\xe1\xbc\x9a",
1256
      "\xe1\xbc\x91"     => "\xe1\xbc\x99",
1257
      "\xe1\xbc\x90"     => "\xe1\xbc\x98",
1258
      "\xe1\xbc\x87"     => "\xe1\xbc\x8f",
1259
      "\xe1\xbc\x86"     => "\xe1\xbc\x8e",
1260
      "\xe1\xbc\x85"     => "\xe1\xbc\x8d",
1261
      "\xe1\xbc\x84"     => "\xe1\xbc\x8c",
1262
      "\xe1\xbc\x83"     => "\xe1\xbc\x8b",
1263
      "\xe1\xbc\x82"     => "\xe1\xbc\x8a",
1264
      "\xe1\xbc\x81"     => "\xe1\xbc\x89",
1265
      "\xe1\xbc\x80"     => "\xe1\xbc\x88",
1266
      "\xe1\xbb\xbf"     => "\xe1\xbb\xbe",
1267
      "\xe1\xbb\xbd"     => "\xe1\xbb\xbc",
1268
      "\xe1\xbb\xbb"     => "\xe1\xbb\xba",
1269
      "\xe1\xbb\xb9"     => "\xe1\xbb\xb8",
1270
      "\xe1\xbb\xb7"     => "\xe1\xbb\xb6",
1271
      "\xe1\xbb\xb5"     => "\xe1\xbb\xb4",
1272
      "\xe1\xbb\xb3"     => "\xe1\xbb\xb2",
1273
      "\xe1\xbb\xb1"     => "\xe1\xbb\xb0",
1274
      "\xe1\xbb\xaf"     => "\xe1\xbb\xae",
1275
      "\xe1\xbb\xad"     => "\xe1\xbb\xac",
1276
      "\xe1\xbb\xab"     => "\xe1\xbb\xaa",
1277
      "\xe1\xbb\xa9"     => "\xe1\xbb\xa8",
1278
      "\xe1\xbb\xa7"     => "\xe1\xbb\xa6",
1279
      "\xe1\xbb\xa5"     => "\xe1\xbb\xa4",
1280
      "\xe1\xbb\xa3"     => "\xe1\xbb\xa2",
1281
      "\xe1\xbb\xa1"     => "\xe1\xbb\xa0",
1282
      "\xe1\xbb\x9f"     => "\xe1\xbb\x9e",
1283
      "\xe1\xbb\x9d"     => "\xe1\xbb\x9c",
1284
      "\xe1\xbb\x9b"     => "\xe1\xbb\x9a",
1285
      "\xe1\xbb\x99"     => "\xe1\xbb\x98",
1286
      "\xe1\xbb\x97"     => "\xe1\xbb\x96",
1287
      "\xe1\xbb\x95"     => "\xe1\xbb\x94",
1288
      "\xe1\xbb\x93"     => "\xe1\xbb\x92",
1289
      "\xe1\xbb\x91"     => "\xe1\xbb\x90",
1290
      "\xe1\xbb\x8f"     => "\xe1\xbb\x8e",
1291
      "\xe1\xbb\x8d"     => "\xe1\xbb\x8c",
1292
      "\xe1\xbb\x8b"     => "\xe1\xbb\x8a",
1293
      "\xe1\xbb\x89"     => "\xe1\xbb\x88",
1294
      "\xe1\xbb\x87"     => "\xe1\xbb\x86",
1295
      "\xe1\xbb\x85"     => "\xe1\xbb\x84",
1296
      "\xe1\xbb\x83"     => "\xe1\xbb\x82",
1297
      "\xe1\xbb\x81"     => "\xe1\xbb\x80",
1298
      "\xe1\xba\xbf"     => "\xe1\xba\xbe",
1299
      "\xe1\xba\xbd"     => "\xe1\xba\xbc",
1300
      "\xe1\xba\xbb"     => "\xe1\xba\xba",
1301
      "\xe1\xba\xb9"     => "\xe1\xba\xb8",
1302
      "\xe1\xba\xb7"     => "\xe1\xba\xb6",
1303
      "\xe1\xba\xb5"     => "\xe1\xba\xb4",
1304
      "\xe1\xba\xb3"     => "\xe1\xba\xb2",
1305
      "\xe1\xba\xb1"     => "\xe1\xba\xb0",
1306
      "\xe1\xba\xaf"     => "\xe1\xba\xae",
1307
      "\xe1\xba\xad"     => "\xe1\xba\xac",
1308
      "\xe1\xba\xab"     => "\xe1\xba\xaa",
1309
      "\xe1\xba\xa9"     => "\xe1\xba\xa8",
1310
      "\xe1\xba\xa7"     => "\xe1\xba\xa6",
1311
      "\xe1\xba\xa5"     => "\xe1\xba\xa4",
1312
      "\xe1\xba\xa3"     => "\xe1\xba\xa2",
1313
      "\xe1\xba\xa1"     => "\xe1\xba\xa0",
1314
      "\xe1\xba\x9b"     => "\xe1\xb9\xa0",
1315
      "\xe1\xba\x95"     => "\xe1\xba\x94",
1316
      "\xe1\xba\x93"     => "\xe1\xba\x92",
1317
      "\xe1\xba\x91"     => "\xe1\xba\x90",
1318
      "\xe1\xba\x8f"     => "\xe1\xba\x8e",
1319
      "\xe1\xba\x8d"     => "\xe1\xba\x8c",
1320
      "\xe1\xba\x8b"     => "\xe1\xba\x8a",
1321
      "\xe1\xba\x89"     => "\xe1\xba\x88",
1322
      "\xe1\xba\x87"     => "\xe1\xba\x86",
1323
      "\xe1\xba\x85"     => "\xe1\xba\x84",
1324
      "\xe1\xba\x83"     => "\xe1\xba\x82",
1325
      "\xe1\xba\x81"     => "\xe1\xba\x80",
1326
      "\xe1\xb9\xbf"     => "\xe1\xb9\xbe",
1327
      "\xe1\xb9\xbd"     => "\xe1\xb9\xbc",
1328
      "\xe1\xb9\xbb"     => "\xe1\xb9\xba",
1329
      "\xe1\xb9\xb9"     => "\xe1\xb9\xb8",
1330
      "\xe1\xb9\xb7"     => "\xe1\xb9\xb6",
1331
      "\xe1\xb9\xb5"     => "\xe1\xb9\xb4",
1332
      "\xe1\xb9\xb3"     => "\xe1\xb9\xb2",
1333
      "\xe1\xb9\xb1"     => "\xe1\xb9\xb0",
1334
      "\xe1\xb9\xaf"     => "\xe1\xb9\xae",
1335
      "\xe1\xb9\xad"     => "\xe1\xb9\xac",
1336
      "\xe1\xb9\xab"     => "\xe1\xb9\xaa",
1337
      "\xe1\xb9\xa9"     => "\xe1\xb9\xa8",
1338
      "\xe1\xb9\xa7"     => "\xe1\xb9\xa6",
1339
      "\xe1\xb9\xa5"     => "\xe1\xb9\xa4",
1340
      "\xe1\xb9\xa3"     => "\xe1\xb9\xa2",
1341
      "\xe1\xb9\xa1"     => "\xe1\xb9\xa0",
1342
      "\xe1\xb9\x9f"     => "\xe1\xb9\x9e",
1343
      "\xe1\xb9\x9d"     => "\xe1\xb9\x9c",
1344
      "\xe1\xb9\x9b"     => "\xe1\xb9\x9a",
1345
      "\xe1\xb9\x99"     => "\xe1\xb9\x98",
1346
      "\xe1\xb9\x97"     => "\xe1\xb9\x96",
1347
      "\xe1\xb9\x95"     => "\xe1\xb9\x94",
1348
      "\xe1\xb9\x93"     => "\xe1\xb9\x92",
1349
      "\xe1\xb9\x91"     => "\xe1\xb9\x90",
1350
      "\xe1\xb9\x8f"     => "\xe1\xb9\x8e",
1351
      "\xe1\xb9\x8d"     => "\xe1\xb9\x8c",
1352
      "\xe1\xb9\x8b"     => "\xe1\xb9\x8a",
1353
      "\xe1\xb9\x89"     => "\xe1\xb9\x88",
1354
      "\xe1\xb9\x87"     => "\xe1\xb9\x86",
1355
      "\xe1\xb9\x85"     => "\xe1\xb9\x84",
1356
      "\xe1\xb9\x83"     => "\xe1\xb9\x82",
1357
      "\xe1\xb9\x81"     => "\xe1\xb9\x80",
1358
      "\xe1\xb8\xbf"     => "\xe1\xb8\xbe",
1359
      "\xe1\xb8\xbd"     => "\xe1\xb8\xbc",
1360
      "\xe1\xb8\xbb"     => "\xe1\xb8\xba",
1361
      "\xe1\xb8\xb9"     => "\xe1\xb8\xb8",
1362
      "\xe1\xb8\xb7"     => "\xe1\xb8\xb6",
1363
      "\xe1\xb8\xb5"     => "\xe1\xb8\xb4",
1364
      "\xe1\xb8\xb3"     => "\xe1\xb8\xb2",
1365
      "\xe1\xb8\xb1"     => "\xe1\xb8\xb0",
1366
      "\xe1\xb8\xaf"     => "\xe1\xb8\xae",
1367
      "\xe1\xb8\xad"     => "\xe1\xb8\xac",
1368
      "\xe1\xb8\xab"     => "\xe1\xb8\xaa",
1369
      "\xe1\xb8\xa9"     => "\xe1\xb8\xa8",
1370
      "\xe1\xb8\xa7"     => "\xe1\xb8\xa6",
1371
      "\xe1\xb8\xa5"     => "\xe1\xb8\xa4",
1372
      "\xe1\xb8\xa3"     => "\xe1\xb8\xa2",
1373
      "\xe1\xb8\xa1"     => "\xe1\xb8\xa0",
1374
      "\xe1\xb8\x9f"     => "\xe1\xb8\x9e",
1375
      "\xe1\xb8\x9d"     => "\xe1\xb8\x9c",
1376
      "\xe1\xb8\x9b"     => "\xe1\xb8\x9a",
1377
      "\xe1\xb8\x99"     => "\xe1\xb8\x98",
1378
      "\xe1\xb8\x97"     => "\xe1\xb8\x96",
1379
      "\xe1\xb8\x95"     => "\xe1\xb8\x94",
1380
      "\xe1\xb8\x93"     => "\xe1\xb8\x92",
1381
      "\xe1\xb8\x91"     => "\xe1\xb8\x90",
1382
      "\xe1\xb8\x8f"     => "\xe1\xb8\x8e",
1383
      "\xe1\xb8\x8d"     => "\xe1\xb8\x8c",
1384
      "\xe1\xb8\x8b"     => "\xe1\xb8\x8a",
1385
      "\xe1\xb8\x89"     => "\xe1\xb8\x88",
1386
      "\xe1\xb8\x87"     => "\xe1\xb8\x86",
1387
      "\xe1\xb8\x85"     => "\xe1\xb8\x84",
1388
      "\xe1\xb8\x83"     => "\xe1\xb8\x82",
1389
      "\xe1\xb8\x81"     => "\xe1\xb8\x80",
1390
      "\xe1\xb5\xbd"     => "\xe2\xb1\xa3",
1391
      "\xe1\xb5\xb9"     => "\xea\x9d\xbd",
1392
      "\xd6\x86"         => "\xd5\x96",
1393
      "\xd6\x85"         => "\xd5\x95",
1394
      "\xd6\x84"         => "\xd5\x94",
1395
      "\xd6\x83"         => "\xd5\x93",
1396
      "\xd6\x82"         => "\xd5\x92",
1397
      "\xd6\x81"         => "\xd5\x91",
1398
      "\xd6\x80"         => "\xd5\x90",
1399
      "\xd5\xbf"         => "\xd5\x8f",
1400
      "\xd5\xbe"         => "\xd5\x8e",
1401
      "\xd5\xbd"         => "\xd5\x8d",
1402
      "\xd5\xbc"         => "\xd5\x8c",
1403
      "\xd5\xbb"         => "\xd5\x8b",
1404
      "\xd5\xba"         => "\xd5\x8a",
1405
      "\xd5\xb9"         => "\xd5\x89",
1406
      "\xd5\xb8"         => "\xd5\x88",
1407
      "\xd5\xb7"         => "\xd5\x87",
1408
      "\xd5\xb6"         => "\xd5\x86",
1409
      "\xd5\xb5"         => "\xd5\x85",
1410
      "\xd5\xb4"         => "\xd5\x84",
1411
      "\xd5\xb3"         => "\xd5\x83",
1412
      "\xd5\xb2"         => "\xd5\x82",
1413
      "\xd5\xb1"         => "\xd5\x81",
1414
      "\xd5\xb0"         => "\xd5\x80",
1415
      "\xd5\xaf"         => "\xd4\xbf",
1416
      "\xd5\xae"         => "\xd4\xbe",
1417
      "\xd5\xad"         => "\xd4\xbd",
1418
      "\xd5\xac"         => "\xd4\xbc",
1419
      "\xd5\xab"         => "\xd4\xbb",
1420
      "\xd5\xaa"         => "\xd4\xba",
1421
      "\xd5\xa9"         => "\xd4\xb9",
1422
      "\xd5\xa8"         => "\xd4\xb8",
1423
      "\xd5\xa7"         => "\xd4\xb7",
1424
      "\xd5\xa6"         => "\xd4\xb6",
1425
      "\xd5\xa5"         => "\xd4\xb5",
1426
      "\xd5\xa4"         => "\xd4\xb4",
1427
      "\xd5\xa3"         => "\xd4\xb3",
1428
      "\xd5\xa2"         => "\xd4\xb2",
1429
      "\xd5\xa1"         => "\xd4\xb1",
1430
      "\xd4\xa5"         => "\xd4\xa4",
1431
      "\xd4\xa3"         => "\xd4\xa2",
1432
      "\xd4\xa1"         => "\xd4\xa0",
1433
      "\xd4\x9f"         => "\xd4\x9e",
1434
      "\xd4\x9d"         => "\xd4\x9c",
1435
      "\xd4\x9b"         => "\xd4\x9a",
1436
      "\xd4\x99"         => "\xd4\x98",
1437
      "\xd4\x97"         => "\xd4\x96",
1438
      "\xd4\x95"         => "\xd4\x94",
1439
      "\xd4\x93"         => "\xd4\x92",
1440
      "\xd4\x91"         => "\xd4\x90",
1441
      "\xd4\x8f"         => "\xd4\x8e",
1442
      "\xd4\x8d"         => "\xd4\x8c",
1443
      "\xd4\x8b"         => "\xd4\x8a",
1444
      "\xd4\x89"         => "\xd4\x88",
1445
      "\xd4\x87"         => "\xd4\x86",
1446
      "\xd4\x85"         => "\xd4\x84",
1447
      "\xd4\x83"         => "\xd4\x82",
1448
      "\xd4\x81"         => "\xd4\x80",
1449
      "\xd3\xbf"         => "\xd3\xbe",
1450
      "\xd3\xbd"         => "\xd3\xbc",
1451
      "\xd3\xbb"         => "\xd3\xba",
1452
      "\xd3\xb9"         => "\xd3\xb8",
1453
      "\xd3\xb7"         => "\xd3\xb6",
1454
      "\xd3\xb5"         => "\xd3\xb4",
1455
      "\xd3\xb3"         => "\xd3\xb2",
1456
      "\xd3\xb1"         => "\xd3\xb0",
1457
      "\xd3\xaf"         => "\xd3\xae",
1458
      "\xd3\xad"         => "\xd3\xac",
1459
      "\xd3\xab"         => "\xd3\xaa",
1460
      "\xd3\xa9"         => "\xd3\xa8",
1461
      "\xd3\xa7"         => "\xd3\xa6",
1462
      "\xd3\xa5"         => "\xd3\xa4",
1463
      "\xd3\xa3"         => "\xd3\xa2",
1464
      "\xd3\xa1"         => "\xd3\xa0",
1465
      "\xd3\x9f"         => "\xd3\x9e",
1466
      "\xd3\x9d"         => "\xd3\x9c",
1467
      "\xd3\x9b"         => "\xd3\x9a",
1468
      "\xd3\x99"         => "\xd3\x98",
1469
      "\xd3\x97"         => "\xd3\x96",
1470
      "\xd3\x95"         => "\xd3\x94",
1471
      "\xd3\x93"         => "\xd3\x92",
1472
      "\xd3\x91"         => "\xd3\x90",
1473
      "\xd3\x8f"         => "\xd3\x80",
1474
      "\xd3\x8e"         => "\xd3\x8d",
1475
      "\xd3\x8c"         => "\xd3\x8b",
1476
      "\xd3\x8a"         => "\xd3\x89",
1477
      "\xd3\x88"         => "\xd3\x87",
1478
      "\xd3\x86"         => "\xd3\x85",
1479
      "\xd3\x84"         => "\xd3\x83",
1480
      "\xd3\x82"         => "\xd3\x81",
1481
      "\xd2\xbf"         => "\xd2\xbe",
1482
      "\xd2\xbd"         => "\xd2\xbc",
1483
      "\xd2\xbb"         => "\xd2\xba",
1484
      "\xd2\xb9"         => "\xd2\xb8",
1485
      "\xd2\xb7"         => "\xd2\xb6",
1486
      "\xd2\xb5"         => "\xd2\xb4",
1487
      "\xd2\xb3"         => "\xd2\xb2",
1488
      "\xd2\xb1"         => "\xd2\xb0",
1489
      "\xd2\xaf"         => "\xd2\xae",
1490
      "\xd2\xad"         => "\xd2\xac",
1491
      "\xd2\xab"         => "\xd2\xaa",
1492
      "\xd2\xa9"         => "\xd2\xa8",
1493
      "\xd2\xa7"         => "\xd2\xa6",
1494
      "\xd2\xa5"         => "\xd2\xa4",
1495
      "\xd2\xa3"         => "\xd2\xa2",
1496
      "\xd2\xa1"         => "\xd2\xa0",
1497
      "\xd2\x9f"         => "\xd2\x9e",
1498
      "\xd2\x9d"         => "\xd2\x9c",
1499
      "\xd2\x9b"         => "\xd2\x9a",
1500
      "\xd2\x99"         => "\xd2\x98",
1501
      "\xd2\x97"         => "\xd2\x96",
1502
      "\xd2\x95"         => "\xd2\x94",
1503
      "\xd2\x93"         => "\xd2\x92",
1504
      "\xd2\x91"         => "\xd2\x90",
1505
      "\xd2\x8f"         => "\xd2\x8e",
1506
      "\xd2\x8d"         => "\xd2\x8c",
1507
      "\xd2\x8b"         => "\xd2\x8a",
1508
      "\xd2\x81"         => "\xd2\x80",
1509
      "\xd1\xbf"         => "\xd1\xbe",
1510
      "\xd1\xbd"         => "\xd1\xbc",
1511
      "\xd1\xbb"         => "\xd1\xba",
1512
      "\xd1\xb9"         => "\xd1\xb8",
1513
      "\xd1\xb7"         => "\xd1\xb6",
1514
      "\xd1\xb5"         => "\xd1\xb4",
1515
      "\xd1\xb3"         => "\xd1\xb2",
1516
      "\xd1\xb1"         => "\xd1\xb0",
1517
      "\xd1\xaf"         => "\xd1\xae",
1518
      "\xd1\xad"         => "\xd1\xac",
1519
      "\xd1\xab"         => "\xd1\xaa",
1520
      "\xd1\xa9"         => "\xd1\xa8",
1521
      "\xd1\xa7"         => "\xd1\xa6",
1522
      "\xd1\xa5"         => "\xd1\xa4",
1523
      "\xd1\xa3"         => "\xd1\xa2",
1524
      "\xd1\xa1"         => "\xd1\xa0",
1525
      "\xd1\x9f"         => "\xd0\x8f",
1526
      "\xd1\x9e"         => "\xd0\x8e",
1527
      "\xd1\x9d"         => "\xd0\x8d",
1528
      "\xd1\x9c"         => "\xd0\x8c",
1529
      "\xd1\x9b"         => "\xd0\x8b",
1530
      "\xd1\x9a"         => "\xd0\x8a",
1531
      "\xd1\x99"         => "\xd0\x89",
1532
      "\xd1\x98"         => "\xd0\x88",
1533
      "\xd1\x97"         => "\xd0\x87",
1534
      "\xd1\x96"         => "\xd0\x86",
1535
      "\xd1\x95"         => "\xd0\x85",
1536
      "\xd1\x94"         => "\xd0\x84",
1537
      "\xd1\x93"         => "\xd0\x83",
1538
      "\xd1\x92"         => "\xd0\x82",
1539
      "\xd1\x91"         => "\xd0\x81",
1540
      "\xd1\x90"         => "\xd0\x80",
1541
      "\xd1\x8f"         => "\xd0\xaf",
1542
      "\xd1\x8e"         => "\xd0\xae",
1543
      "\xd1\x8d"         => "\xd0\xad",
1544
      "\xd1\x8c"         => "\xd0\xac",
1545
      "\xd1\x8b"         => "\xd0\xab",
1546
      "\xd1\x8a"         => "\xd0\xaa",
1547
      "\xd1\x89"         => "\xd0\xa9",
1548
      "\xd1\x88"         => "\xd0\xa8",
1549
      "\xd1\x87"         => "\xd0\xa7",
1550
      "\xd1\x86"         => "\xd0\xa6",
1551
      "\xd1\x85"         => "\xd0\xa5",
1552
      "\xd1\x84"         => "\xd0\xa4",
1553
      "\xd1\x83"         => "\xd0\xa3",
1554
      "\xd1\x82"         => "\xd0\xa2",
1555
      "\xd1\x81"         => "\xd0\xa1",
1556
      "\xd1\x80"         => "\xd0\xa0",
1557
      "\xd0\xbf"         => "\xd0\x9f",
1558
      "\xd0\xbe"         => "\xd0\x9e",
1559
      "\xd0\xbd"         => "\xd0\x9d",
1560
      "\xd0\xbc"         => "\xd0\x9c",
1561
      "\xd0\xbb"         => "\xd0\x9b",
1562
      "\xd0\xba"         => "\xd0\x9a",
1563
      "\xd0\xb9"         => "\xd0\x99",
1564
      "\xd0\xb8"         => "\xd0\x98",
1565
      "\xd0\xb7"         => "\xd0\x97",
1566
      "\xd0\xb6"         => "\xd0\x96",
1567
      "\xd0\xb5"         => "\xd0\x95",
1568
      "\xd0\xb4"         => "\xd0\x94",
1569
      "\xd0\xb3"         => "\xd0\x93",
1570
      "\xd0\xb2"         => "\xd0\x92",
1571
      "\xd0\xb1"         => "\xd0\x91",
1572
      "\xd0\xb0"         => "\xd0\x90",
1573
      "\xcf\xbb"         => "\xcf\xba",
1574
      "\xcf\xb8"         => "\xcf\xb7",
1575
      "\xcf\xb5"         => "\xce\x95",
1576
      "\xcf\xb2"         => "\xcf\xb9",
1577
      "\xcf\xb1"         => "\xce\xa1",
1578
      "\xcf\xb0"         => "\xce\x9a",
1579
      "\xcf\xaf"         => "\xcf\xae",
1580
      "\xcf\xad"         => "\xcf\xac",
1581
      "\xcf\xab"         => "\xcf\xaa",
1582
      "\xcf\xa9"         => "\xcf\xa8",
1583
      "\xcf\xa7"         => "\xcf\xa6",
1584
      "\xcf\xa5"         => "\xcf\xa4",
1585
      "\xcf\xa3"         => "\xcf\xa2",
1586
      "\xcf\xa1"         => "\xcf\xa0",
1587
      "\xcf\x9f"         => "\xcf\x9e",
1588
      "\xcf\x9d"         => "\xcf\x9c",
1589
      "\xcf\x9b"         => "\xcf\x9a",
1590
      "\xcf\x99"         => "\xcf\x98",
1591
      "\xcf\x97"         => "\xcf\x8f",
1592
      "\xcf\x96"         => "\xce\xa0",
1593
      "\xcf\x95"         => "\xce\xa6",
1594
      "\xcf\x91"         => "\xce\x98",
1595
      "\xcf\x90"         => "\xce\x92",
1596
      "\xcf\x8e"         => "\xce\x8f",
1597
      "\xcf\x8d"         => "\xce\x8e",
1598
      "\xcf\x8c"         => "\xce\x8c",
1599
      "\xcf\x8b"         => "\xce\xab",
1600
      "\xcf\x8a"         => "\xce\xaa",
1601
      "\xcf\x89"         => "\xce\xa9",
1602
      "\xcf\x88"         => "\xce\xa8",
1603
      "\xcf\x87"         => "\xce\xa7",
1604
      "\xcf\x86"         => "\xce\xa6",
1605
      "\xcf\x85"         => "\xce\xa5",
1606
      "\xcf\x84"         => "\xce\xa4",
1607
      "\xcf\x83"         => "\xce\xa3",
1608
      "\xcf\x82"         => "\xce\xa3",
1609
      "\xcf\x81"         => "\xce\xa1",
1610
      "\xcf\x80"         => "\xce\xa0",
1611
      "\xce\xbf"         => "\xce\x9f",
1612
      "\xce\xbe"         => "\xce\x9e",
1613
      "\xce\xbd"         => "\xce\x9d",
1614
      "\xce\xbc"         => "\xce\x9c",
1615
      "\xce\xbb"         => "\xce\x9b",
1616
      "\xce\xba"         => "\xce\x9a",
1617
      "\xce\xb9"         => "\xce\x99",
1618
      "\xce\xb8"         => "\xce\x98",
1619
      "\xce\xb7"         => "\xce\x97",
1620
      "\xce\xb6"         => "\xce\x96",
1621
      "\xce\xb5"         => "\xce\x95",
1622
      "\xce\xb4"         => "\xce\x94",
1623
      "\xce\xb3"         => "\xce\x93",
1624
      "\xce\xb2"         => "\xce\x92",
1625
      "\xce\xb1"         => "\xce\x91",
1626
      "\xce\xaf"         => "\xce\x8a",
1627
      "\xce\xae"         => "\xce\x89",
1628
      "\xce\xad"         => "\xce\x88",
1629
      "\xce\xac"         => "\xce\x86",
1630
      "\xcd\xbd"         => "\xcf\xbf",
1631
      "\xcd\xbc"         => "\xcf\xbe",
1632
      "\xcd\xbb"         => "\xcf\xbd",
1633
      "\xcd\xb7"         => "\xcd\xb6",
1634
      "\xcd\xb3"         => "\xcd\xb2",
1635
      "\xcd\xb1"         => "\xcd\xb0",
1636
      "\xca\x92"         => "\xc6\xb7",
1637
      "\xca\x8c"         => "\xc9\x85",
1638
      "\xca\x8b"         => "\xc6\xb2",
1639
      "\xca\x8a"         => "\xc6\xb1",
1640
      "\xca\x89"         => "\xc9\x84",
1641
      "\xca\x88"         => "\xc6\xae",
1642
      "\xca\x83"         => "\xc6\xa9",
1643
      "\xca\x80"         => "\xc6\xa6",
1644
      "\xc9\xbd"         => "\xe2\xb1\xa4",
1645
      "\xc9\xb5"         => "\xc6\x9f",
1646
      "\xc9\xb2"         => "\xc6\x9d",
1647
      "\xc9\xb1"         => "\xe2\xb1\xae",
1648
      "\xc9\xaf"         => "\xc6\x9c",
1649
      "\xc9\xab"         => "\xe2\xb1\xa2",
1650
      "\xc9\xa9"         => "\xc6\x96",
1651
      "\xc9\xa8"         => "\xc6\x97",
1652
      "\xc9\xa5"         => "\xea\x9e\x8d",
1653
      "\xc9\xa3"         => "\xc6\x94",
1654
      "\xc9\xa0"         => "\xc6\x93",
1655
      "\xc9\x9b"         => "\xc6\x90",
1656
      "\xc9\x99"         => "\xc6\x8f",
1657
      "\xc9\x97"         => "\xc6\x8a",
1658
      "\xc9\x96"         => "\xc6\x89",
1659
      "\xc9\x94"         => "\xc6\x86",
1660
      "\xc9\x93"         => "\xc6\x81",
1661
      "\xc9\x92"         => "\xe2\xb1\xb0",
1662
      "\xc9\x91"         => "\xe2\xb1\xad",
1663
      "\xc9\x90"         => "\xe2\xb1\xaf",
1664
      "\xc9\x8f"         => "\xc9\x8e",
1665
      "\xc9\x8d"         => "\xc9\x8c",
1666
      "\xc9\x8b"         => "\xc9\x8a",
1667
      "\xc9\x89"         => "\xc9\x88",
1668
      "\xc9\x87"         => "\xc9\x86",
1669
      "\xc9\x82"         => "\xc9\x81",
1670
      "\xc9\x80"         => "\xe2\xb1\xbf",
1671
      "\xc8\xbf"         => "\xe2\xb1\xbe",
1672
      "\xc8\xbc"         => "\xc8\xbb",
1673
      "\xc8\xb3"         => "\xc8\xb2",
1674
      "\xc8\xb1"         => "\xc8\xb0",
1675
      "\xc8\xaf"         => "\xc8\xae",
1676
      "\xc8\xad"         => "\xc8\xac",
1677
      "\xc8\xab"         => "\xc8\xaa",
1678
      "\xc8\xa9"         => "\xc8\xa8",
1679
      "\xc8\xa7"         => "\xc8\xa6",
1680
      "\xc8\xa5"         => "\xc8\xa4",
1681
      "\xc8\xa3"         => "\xc8\xa2",
1682
      "\xc8\x9f"         => "\xc8\x9e",
1683
      "\xc8\x9d"         => "\xc8\x9c",
1684
      "\xc8\x9b"         => "\xc8\x9a",
1685
      "\xc8\x99"         => "\xc8\x98",
1686
      "\xc8\x97"         => "\xc8\x96",
1687
      "\xc8\x95"         => "\xc8\x94",
1688
      "\xc8\x93"         => "\xc8\x92",
1689
      "\xc8\x91"         => "\xc8\x90",
1690
      "\xc8\x8f"         => "\xc8\x8e",
1691
      "\xc8\x8d"         => "\xc8\x8c",
1692
      "\xc8\x8b"         => "\xc8\x8a",
1693
      "\xc8\x89"         => "\xc8\x88",
1694
      "\xc8\x87"         => "\xc8\x86",
1695
      "\xc8\x85"         => "\xc8\x84",
1696
      "\xc8\x83"         => "\xc8\x82",
1697
      "\xc8\x81"         => "\xc8\x80",
1698
      "\xc7\xbf"         => "\xc7\xbe",
1699
      "\xc7\xbd"         => "\xc7\xbc",
1700
      "\xc7\xbb"         => "\xc7\xba",
1701
      "\xc7\xb9"         => "\xc7\xb8",
1702
      "\xc7\xb5"         => "\xc7\xb4",
1703
      "\xc7\xb3"         => "\xc7\xb2",
1704
      "\xc7\xaf"         => "\xc7\xae",
1705
      "\xc7\xad"         => "\xc7\xac",
1706
      "\xc7\xab"         => "\xc7\xaa",
1707
      "\xc7\xa9"         => "\xc7\xa8",
1708
      "\xc7\xa7"         => "\xc7\xa6",
1709
      "\xc7\xa5"         => "\xc7\xa4",
1710
      "\xc7\xa3"         => "\xc7\xa2",
1711
      "\xc7\xa1"         => "\xc7\xa0",
1712
      "\xc7\x9f"         => "\xc7\x9e",
1713
      "\xc7\x9d"         => "\xc6\x8e",
1714
      "\xc7\x9c"         => "\xc7\x9b",
1715
      "\xc7\x9a"         => "\xc7\x99",
1716
      "\xc7\x98"         => "\xc7\x97",
1717
      "\xc7\x96"         => "\xc7\x95",
1718
      "\xc7\x94"         => "\xc7\x93",
1719
      "\xc7\x92"         => "\xc7\x91",
1720
      "\xc7\x90"         => "\xc7\x8f",
1721
      "\xc7\x8e"         => "\xc7\x8d",
1722
      "\xc7\x8c"         => "\xc7\x8b",
1723
      "\xc7\x89"         => "\xc7\x88",
1724
      "\xc7\x86"         => "\xc7\x85",
1725
      "\xc6\xbf"         => "\xc7\xb7",
1726
      "\xc6\xbd"         => "\xc6\xbc",
1727
      "\xc6\xb9"         => "\xc6\xb8",
1728
      "\xc6\xb6"         => "\xc6\xb5",
1729
      "\xc6\xb4"         => "\xc6\xb3",
1730
      "\xc6\xb0"         => "\xc6\xaf",
1731
      "\xc6\xad"         => "\xc6\xac",
1732
      "\xc6\xa8"         => "\xc6\xa7",
1733
      "\xc6\xa5"         => "\xc6\xa4",
1734
      "\xc6\xa3"         => "\xc6\xa2",
1735
      "\xc6\xa1"         => "\xc6\xa0",
1736
      "\xc6\x9e"         => "\xc8\xa0",
1737
      "\xc6\x9a"         => "\xc8\xbd",
1738
      "\xc6\x99"         => "\xc6\x98",
1739
      "\xc6\x95"         => "\xc7\xb6",
1740
      "\xc6\x92"         => "\xc6\x91",
1741
      "\xc6\x8c"         => "\xc6\x8b",
1742
      "\xc6\x88"         => "\xc6\x87",
1743
      "\xc6\x85"         => "\xc6\x84",
1744
      "\xc6\x83"         => "\xc6\x82",
1745
      "\xc6\x80"         => "\xc9\x83",
1746
      "\xc5\xbf"         => "\x53",
1747
      "\xc5\xbe"         => "\xc5\xbd",
1748
      "\xc5\xbc"         => "\xc5\xbb",
1749
      "\xc5\xba"         => "\xc5\xb9",
1750
      "\xc5\xb7"         => "\xc5\xb6",
1751
      "\xc5\xb5"         => "\xc5\xb4",
1752
      "\xc5\xb3"         => "\xc5\xb2",
1753
      "\xc5\xb1"         => "\xc5\xb0",
1754
      "\xc5\xaf"         => "\xc5\xae",
1755
      "\xc5\xad"         => "\xc5\xac",
1756
      "\xc5\xab"         => "\xc5\xaa",
1757
      "\xc5\xa9"         => "\xc5\xa8",
1758
      "\xc5\xa7"         => "\xc5\xa6",
1759
      "\xc5\xa5"         => "\xc5\xa4",
1760
      "\xc5\xa3"         => "\xc5\xa2",
1761
      "\xc5\xa1"         => "\xc5\xa0",
1762
      "\xc5\x9f"         => "\xc5\x9e",
1763
      "\xc5\x9d"         => "\xc5\x9c",
1764
      "\xc5\x9b"         => "\xc5\x9a",
1765
      "\xc5\x99"         => "\xc5\x98",
1766
      "\xc5\x97"         => "\xc5\x96",
1767
      "\xc5\x95"         => "\xc5\x94",
1768
      "\xc5\x93"         => "\xc5\x92",
1769
      "\xc5\x91"         => "\xc5\x90",
1770
      "\xc5\x8f"         => "\xc5\x8e",
1771
      "\xc5\x8d"         => "\xc5\x8c",
1772
      "\xc5\x8b"         => "\xc5\x8a",
1773
      "\xc5\x88"         => "\xc5\x87",
1774
      "\xc5\x86"         => "\xc5\x85",
1775
      "\xc5\x84"         => "\xc5\x83",
1776
      "\xc5\x82"         => "\xc5\x81",
1777
      "\xc5\x80"         => "\xc4\xbf",
1778
      "\xc4\xbe"         => "\xc4\xbd",
1779
      "\xc4\xbc"         => "\xc4\xbb",
1780
      "\xc4\xba"         => "\xc4\xb9",
1781
      "\xc4\xb7"         => "\xc4\xb6",
1782
      "\xc4\xb5"         => "\xc4\xb4",
1783
      "\xc4\xb3"         => "\xc4\xb2",
1784
      "\xc4\xb1"         => "\x49",
1785
      "\xc4\xaf"         => "\xc4\xae",
1786
      "\xc4\xad"         => "\xc4\xac",
1787
      "\xc4\xab"         => "\xc4\xaa",
1788
      "\xc4\xa9"         => "\xc4\xa8",
1789
      "\xc4\xa7"         => "\xc4\xa6",
1790
      "\xc4\xa5"         => "\xc4\xa4",
1791
      "\xc4\xa3"         => "\xc4\xa2",
1792
      "\xc4\xa1"         => "\xc4\xa0",
1793
      "\xc4\x9f"         => "\xc4\x9e",
1794
      "\xc4\x9d"         => "\xc4\x9c",
1795
      "\xc4\x9b"         => "\xc4\x9a",
1796
      "\xc4\x99"         => "\xc4\x98",
1797
      "\xc4\x97"         => "\xc4\x96",
1798
      "\xc4\x95"         => "\xc4\x94",
1799
      "\xc4\x93"         => "\xc4\x92",
1800
      "\xc4\x91"         => "\xc4\x90",
1801
      "\xc4\x8f"         => "\xc4\x8e",
1802
      "\xc4\x8d"         => "\xc4\x8c",
1803
      "\xc4\x8b"         => "\xc4\x8a",
1804
      "\xc4\x89"         => "\xc4\x88",
1805
      "\xc4\x87"         => "\xc4\x86",
1806
      "\xc4\x85"         => "\xc4\x84",
1807
      "\xc4\x83"         => "\xc4\x82",
1808
      "\xc4\x81"         => "\xc4\x80",
1809
      "\xc3\xbf"         => "\xc5\xb8",
1810
      "\xc3\xbe"         => "\xc3\x9e",
1811
      "\xc3\xbd"         => "\xc3\x9d",
1812
      "\xc3\xbc"         => "\xc3\x9c",
1813
      "\xc3\xbb"         => "\xc3\x9b",
1814
      "\xc3\xba"         => "\xc3\x9a",
1815
      "\xc3\xb9"         => "\xc3\x99",
1816
      "\xc3\xb8"         => "\xc3\x98",
1817
      "\xc3\xb6"         => "\xc3\x96",
1818
      "\xc3\xb5"         => "\xc3\x95",
1819
      "\xc3\xb4"         => "\xc3\x94",
1820
      "\xc3\xb3"         => "\xc3\x93",
1821
      "\xc3\xb2"         => "\xc3\x92",
1822
      "\xc3\xb1"         => "\xc3\x91",
1823
      "\xc3\xb0"         => "\xc3\x90",
1824
      "\xc3\xaf"         => "\xc3\x8f",
1825
      "\xc3\xae"         => "\xc3\x8e",
1826
      "\xc3\xad"         => "\xc3\x8d",
1827
      "\xc3\xac"         => "\xc3\x8c",
1828
      "\xc3\xab"         => "\xc3\x8b",
1829
      "\xc3\xaa"         => "\xc3\x8a",
1830
      "\xc3\xa9"         => "\xc3\x89",
1831
      "\xc3\xa8"         => "\xc3\x88",
1832
      "\xc3\xa7"         => "\xc3\x87",
1833
      "\xc3\xa6"         => "\xc3\x86",
1834
      "\xc3\xa5"         => "\xc3\x85",
1835
      "\xc3\xa4"         => "\xc3\x84",
1836
      "\xc3\xa3"         => "\xc3\x83",
1837
      "\xc3\xa2"         => "\xc3\x82",
1838
      "\xc3\xa1"         => "\xc3\x81",
1839
      "\xc3\xa0"         => "\xc3\x80",
1840
      "\xc2\xb5"         => "\xce\x9c",
1841
      "\x7a"             => "\x5a",
1842
      "\x79"             => "\x59",
1843
      "\x78"             => "\x58",
1844
      "\x77"             => "\x57",
1845
      "\x76"             => "\x56",
1846
      "\x75"             => "\x55",
1847
      "\x74"             => "\x54",
1848
      "\x73"             => "\x53",
1849
      "\x72"             => "\x52",
1850
      "\x71"             => "\x51",
1851
      "\x70"             => "\x50",
1852
      "\x6f"             => "\x4f",
1853
      "\x6e"             => "\x4e",
1854
      "\x6d"             => "\x4d",
1855
      "\x6c"             => "\x4c",
1856
      "\x6b"             => "\x4b",
1857
      "\x6a"             => "\x4a",
1858
      "\x69"             => "\x49",
1859
      "\x68"             => "\x48",
1860
      "\x67"             => "\x47",
1861 157
      "\x66"             => "\x46",
1862
      "\x65"             => "\x45",
1863 157
      "\x64"             => "\x44",
1864
      "\x63"             => "\x43",
1865 1
      "\x62"             => "\x42",
1866 1
      "\x61"             => "\x41",
1867 1
1868 1
    );
1869 1
1870 157
    return $case;
1871
  }
1872
1873
  /**
1874
   * This method will auto-detect your server environment for UTF-8 support.
1875
   *
1876
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
1877
   */
1878
  public static function checkForSupport()
1879 8
  {
1880
    if (!isset(self::$support['mbstring'])) {
1881 8
1882
      self::$support['mbstring'] = self::mbstring_loaded();
1883 8
      self::$support['iconv'] = self::iconv_loaded();
1884
      self::$support['intl'] = self::intl_loaded();
1885
      self::$support['intlChar'] = self::intlChar_loaded();
1886
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
1887
    }
1888
  }
1889
1890 8
  /**
1891
   * Generates a UTF-8 encoded character from the given code point.
1892
   *
1893
   * @param    int $code_point The code point for which to generate a character.
1894
   *
1895
   * @return   string Multi-Byte character, returns empty string on failure to encode.
1896
   */
1897
  public static function chr($code_point)
1898
  {
1899
    self::checkForSupport();
1900
1901
    $i = (int)$code_point;
1902 1
1903
    if (self::$support['intlChar'] === true) {
1904 1
      $return = \IntlChar::chr($code_point);
1905
      if ($return) {
1906 1
        return $return;
1907
      } else {
1908
        return '';
1909
      }
1910
    }
1911
1912
    if ($i !== $code_point) {
1913
      $i = (int)self::hex_to_int($code_point);
1914
    }
1915
1916
    if (!$i) {
1917
      return '';
1918
    }
1919
1920
    $return = self::html_entity_decode("&#{$i};", ENT_QUOTES);
1921 2
    if ($return) {
1922
      return $return;
1923 2
    }
1924 2
1925
    return '';
1926
  }
1927 2
1928
  /**
1929
   * Applies callback to all characters of a string.
1930
   *
1931
   * @param  string|array $callback The callback function.
1932
   * @param  string       $str      UTF-8 string to run callback on.
1933
   *
1934
   * @return array The outcome of callback.
1935
   */
1936
  public static function chr_map($callback, $str)
1937 2
  {
1938
    $chars = self::split($str);
1939 2
1940 2
    return array_map($callback, $chars);
1941 2
  }
1942
1943 2
  /**
1944
   * Generates an array of byte length of each character of a Unicode string.
1945 2
   *
1946
   * 1 byte => U+0000  - U+007F
1947
   * 2 byte => U+0080  - U+07FF
1948 2
   * 3 byte => U+0800  - U+FFFF
1949
   * 4 byte => U+10000 - U+10FFFF
1950 2
   *
1951 2
   * @param    string $str The original Unicode string.
1952 2
   *
1953
   * @return   array An array of byte lengths of each character.
1954 1
   */
1955 1
  public static function chr_size_list($str)
1956 1
  {
1957
    if (!$str) {
1958
      return array();
1959
    }
1960
1961
    return array_map('strlen', self::split($str));
1962 2
  }
1963
1964 2
  /**
1965 2
   * Get a decimal code representation of a specific character.
1966
   *
1967 2
   * @param   string $char The input character
1968
   *
1969
   * @return  int
1970
   */
1971
  public static function chr_to_decimal($char)
1972
  {
1973
    $char = (string)$char;
1974
    $code = self::ord($char[0]);
1975
    $bytes = 1;
1976
1977
    if (!($code & 0x80)) {
1978
      // 0xxxxxxx
1979
      return $code;
1980
    }
1981
1982
    if (($code & 0xe0) === 0xc0) {
1983
      // 110xxxxx
1984
      $bytes = 2;
1985
      $code &= ~0xc0;
1986
    } elseif (($code & 0xf0) === 0xe0) {
1987
      // 1110xxxx
1988
      $bytes = 3;
1989
      $code &= ~0xe0;
1990
    } elseif (($code & 0xf8) === 0xf0) {
1991
      // 11110xxx
1992
      $bytes = 4;
1993 1
      $code &= ~0xf0;
1994
    }
1995 1
1996
    for ($i = 2; $i <= $bytes; $i++) {
1997
      // 10xxxxxx
1998
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1999
    }
2000
2001
    return $code;
2002
  }
2003
2004
  /**
2005
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
2006
   *
2007
   * @param    string $char The input character
2008
   * @param    string $pfix
2009 35
   *
2010
   * @return   string The code point encoded as U+xxxx
2011
   */
2012
  public static function chr_to_hex($char, $pfix = 'U+')
2013
  {
2014
    return self::int_to_hex(self::ord($char), $pfix);
2015
  }
2016
2017
  /**
2018
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
2019
   *
2020
   * @param    string $body     The original string to be split.
2021
   * @param    int    $chunklen The maximum character length of a chunk.
2022
   * @param    string $end      The character(s) to be inserted at the end of each chunk.
2023
   *
2024 35
   * @return   string The chunked string
2025 35
   */
2026
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
2027 35
  {
2028 35
    return implode($end, self::split($body, $chunklen));
2029
  }
2030 35
2031 7
  /**
2032 7
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
2033
   *
2034 35
   * @param string $str                     The string to be sanitized.
2035 1
   * @param bool   $remove_bom
2036 1
   * @param bool   $normalize_whitespace
2037
   * @param bool   $normalize_msword        e.g.: "…" => "..."
2038 35
   * @param bool   $keep_non_breaking_space set true, to keep non-breaking-spaces
2039 4
   *
2040 4
   * @return string Clean UTF-8 encoded string
2041
   */
2042 35
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
2043
  {
2044
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
2045
    // caused connection reset problem on larger strings
2046
2047
    $regx = '/
2048
      (
2049
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
2050
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
2051
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
2052 3
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
2053
        ){1,100}                      # ...one or more times
2054 3
      )
2055
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
2056 3
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
2057 1
    /x';
2058
    $str = preg_replace($regx, '$1', $str);
2059
2060
    $str = self::replace_diamond_question_mark($str, '');
2061 3
    $str = self::remove_invisible_characters($str);
2062
2063
    if ($normalize_whitespace === true) {
2064
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
2065
    }
2066
2067
    if ($normalize_msword === true) {
2068 3
      $str = self::normalize_msword($str);
2069
    }
2070 3
2071
    if ($remove_bom === true) {
2072
      $str = self::removeBOM($str);
2073
    }
2074
2075
    return $str;
2076
  }
2077
2078
  /**
2079
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
2080
   *
2081
   * @param string $str
2082 3
   *
2083
   * @return string
2084 3
   */
2085 3
  public static function cleanup($str)
2086 3
  {
2087
    $str = (string)$str;
2088 3
2089
    if (!isset($str[0])) {
2090 3
      return '';
2091 3
    }
2092 3
2093
    // fixed ISO <-> UTF-8 Errors
2094 3
    $str = self::fix_simple_utf8($str);
2095
2096 3
    // remove all none UTF-8 symbols
2097
    // && remove diamond question mark (�)
2098
    // && remove remove invisible characters (e.g. "\0")
2099
    // && remove BOM
2100
    // && normalize whitespace chars (but keep non-breaking-spaces)
2101
    $str = self::clean($str, true, true, false, true);
2102
2103
    return (string)$str;
2104
  }
2105
2106 3
  /**
2107
   * Accepts a string or a array of strings and returns an array of Unicode code points.
2108
   *
2109
   * @param    string|string[] $arg     A UTF-8 encoded string or an array of such strings.
2110
   * @param    bool            $u_style If True, will return code points in U+xxxx format,
2111
   *                                    default, code points will be returned as integers.
2112
   *
2113
   * @return   array The array of code points
2114
   */
2115
  public static function codepoints($arg, $u_style = false)
2116
  {
2117 3
    if (is_string($arg)) {
2118
      $arg = self::split($arg);
2119 3
    }
2120
2121 3
    $arg = array_map(
2122
        array(
2123 3
            '\\voku\\helper\\UTF8',
2124
            'ord',
2125
        ),
2126
        $arg
2127
    );
2128
2129
    if ($u_style) {
2130
      $arg = array_map(
2131
          array(
2132
              '\\voku\\helper\\UTF8',
2133 1
              'int_to_hex',
2134
          ),
2135 1
          $arg
2136
      );
2137 1
    }
2138 1
2139 1
    return $arg;
2140
  }
2141 1
2142
  /**
2143
   * Returns count of characters used in a string.
2144
   *
2145
   * @param    string $str       The input string.
2146
   * @param    bool   $cleanUtf8 Clean non UTF-8 chars from the string.
2147
   *
2148
   * @return   array An associative array of Character as keys and
2149
   *           their count as values.
2150
   */
2151
  public static function count_chars($str, $cleanUtf8 = false)
2152
  {
2153
    return array_count_values(self::split($str, 1, $cleanUtf8));
2154
  }
2155 11
2156
  /**
2157 11
   * Get a UTF-8 character from its decimal code representation.
2158
   *
2159 11
   * @param   int $code Code.
2160 11
   *
2161
   * @return  string
2162
   */
2163 1
  public static function decimal_to_chr($code)
2164 1
  {
2165
    self::checkForSupport();
2166
2167
    return \mb_convert_encoding(
2168
        '&#x' . dechex($code) . ';',
2169
        'UTF-8',
2170
        'HTML-ENTITIES'
2171
    );
2172
  }
2173
2174
  /**
2175
   * Encode a string with a new charset-encoding.
2176
   *
2177
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
2178
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
2179
   *
2180
   * @param string $encoding e.g. 'UTF-8', 'ISO-8859-1', etc.
2181
   * @param string $str      the string
2182
   * @param bool   $force    force the new encoding (we try to fix broken / double encoding for UTF-8)<br />
2183
   *                         otherwise we auto-detect the current string-encoding
2184
   *
2185
   * @return string
2186
   */
2187
  public static function encode($encoding, $str, $force = true)
2188
  {
2189
    $str = (string)$str;
2190
    $encoding = (string)$encoding;
2191
2192
    if (!isset($str[0], $encoding[0])) {
2193
      return $str;
2194
    }
2195
2196
    $encoding = self::normalizeEncoding($encoding);
2197
    $encodingDetected = self::str_detect_encoding($str);
2198
2199
    if (
2200
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
2201
        &&
2202
        (
2203
            $force === true
2204
            ||
2205
            $encodingDetected !== $encoding
2206
        )
2207
    ) {
2208
      self::checkForSupport();
2209
2210 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2211
          $encoding === 'UTF-8'
2212
          &&
2213
          (
2214
              $force === true
2215
              || $encodingDetected === 'UTF-8'
2216
              || $encodingDetected === 'WINDOWS-1252'
2217
              || $encodingDetected === 'ISO-8859-1'
2218
          )
2219
      ) {
2220
        return self::to_utf8($str);
2221
      }
2222
2223 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2224
          $encoding === 'ISO-8859-1'
2225
          &&
2226
          (
2227
              $force === true
2228
              || $encodingDetected === 'ISO-8859-1'
2229
              || $encodingDetected === 'UTF-8'
2230
          )
2231
      ) {
2232
        return self::to_win1252($str);
2233
      }
2234
2235
      $strEncoded = \mb_convert_encoding(
2236
          $str,
2237
          $encoding,
2238
          $encodingDetected
2239
      );
2240
2241
      if ($strEncoded) {
2242
        return $strEncoded;
2243
      }
2244
    }
2245
2246
    return $str;
2247
  }
2248
2249
  /**
2250
   * Callback function for preg_replace_callback use.
2251
   *
2252 2
   * @internal used for "UTF8::html_entity_decode()"
2253
   *
2254
   * @param  array $matches PREG matches
2255 2
   *
2256 2
   * @return string
2257
   */
2258 2
  protected static function html_entity_decode_callback($matches)
2259 2
  {
2260
    self::checkForSupport();
2261
2262
    $return = \mb_convert_encoding($matches[0], 'UTF-8', 'HTML-ENTITIES');
2263 2
2264 2
    if ($return === "'") {
2265
      return '&#x27;';
2266 2
    }
2267 2
2268
    return $return;
2269 2
  }
2270 1
2271 1
  /**
2272 2
   * Reads entire file into a string.
2273
   *
2274
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
2275
   *
2276 2
   * @link http://php.net/manual/en/function.file-get-contents.php
2277
   *
2278
   * @param string        $filename      <p>
2279
   *                                     Name of the file to read.
2280 2
   *                                     </p>
2281 2
   * @param int|null      $flags         [optional] <p>
2282
   *                                     Prior to PHP 6, this parameter is called
2283 2
   *                                     use_include_path and is a bool.
2284
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
2285 2
   *                                     to trigger include path
2286 1
   *                                     search.
2287 1
   *                                     </p>
2288 1
   *                                     <p>
2289 1
   *                                     The value of flags can be any combination of
2290 1
   *                                     the following flags (with some restrictions), joined with the
2291 1
   *                                     binary OR (|)
2292
   *                                     operator.
2293 2
   *                                     </p>
2294 2
   *                                     <p>
2295 2
   *                                     <table>
2296 2
   *                                     Available flags
2297
   *                                     <tr valign="top">
2298
   *                                     <td>Flag</td>
2299 2
   *                                     <td>Description</td>
2300
   *                                     </tr>
2301
   *                                     <tr valign="top">
2302
   *                                     <td>
2303
   *                                     FILE_USE_INCLUDE_PATH
2304
   *                                     </td>
2305
   *                                     <td>
2306
   *                                     Search for filename in the include directory.
2307
   *                                     See include_path for more
2308
   *                                     information.
2309 1
   *                                     </td>
2310
   *                                     </tr>
2311 1
   *                                     <tr valign="top">
2312
   *                                     <td>
2313
   *                                     FILE_TEXT
2314
   *                                     </td>
2315
   *                                     <td>
2316
   *                                     As of PHP 6, the default encoding of the read
2317
   *                                     data is UTF-8. You can specify a different encoding by creating a
2318
   *                                     custom context or by changing the default using
2319
   *                                     stream_default_encoding. This flag cannot be
2320
   *                                     used with FILE_BINARY.
2321
   *                                     </td>
2322
   *                                     </tr>
2323 7
   *                                     <tr valign="top">
2324
   *                                     <td>
2325 7
   *                                     FILE_BINARY
2326 7
   *                                     </td>
2327 2
   *                                     <td>
2328
   *                                     With this flag, the file is read in binary mode. This is the default
2329 1
   *                                     setting and cannot be used with FILE_TEXT.
2330 2
   *                                     </td>
2331 2
   *                                     </tr>
2332 7
   *                                     </table>
2333 1
   *                                     </p>
2334 1
   * @param resource|null $context       [optional] <p>
2335 1
   *                                     A valid context resource created with
2336 1
   *                                     stream_context_create. If you don't need to use a
2337 7
   *                                     custom context, you can skip this parameter by &null;.
2338 7
   *                                     </p>
2339
   * @param int|null      $offset        [optional] <p>
2340
   *                                     The offset where the reading starts.
2341
   *                                     </p>
2342 7
   * @param int|null      $maxlen        [optional] <p>
2343 7
   *                                     Maximum length of data read. The default is to read until end
2344 1
   *                                     of file is reached.
2345 1
   *                                     </p>
2346 7
   * @param int           $timeout
2347
   *
2348 7
   * @param boolean       $convertToUtf8 WARNING: maybe you can't use this option for images or pdf, because they used
2349 5
   *                                     non default utf-8 chars
2350 5
   *
2351 4
   * @return string The function returns the read data or false on failure.
2352
   */
2353
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
2354
  {
2355 7
    // init
2356
    $timeout = (int)$timeout;
2357
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
2358
2359
    if ($timeout && $context === null) {
2360 7
      $context = stream_context_create(
2361 7
          array(
2362 7
              'http' =>
2363
                  array(
2364 7
                      'timeout' => $timeout,
2365
                  ),
2366
          )
2367
      );
2368
    }
2369
2370
    if (is_int($maxlen)) {
2371
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
2372
    } else {
2373
      $data = file_get_contents($filename, $flags, $context, $offset);
2374
    }
2375
2376
    // return false on error
2377
    if ($data === false) {
2378
      return false;
2379
    }
2380
2381
    if ($convertToUtf8 === true) {
2382
      self::checkForSupport();
2383
2384
      $data = self::encode('UTF-8', $data, false);
2385
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2386
    }
2387
2388
    // clean utf-8 string
2389
    return $data;
2390
  }
2391
2392
  /**
2393
   * Checks if a file starts with BOM (Byte Order Mark) character.
2394
   *
2395
   * @param    string $file_path Path to a valid file.
2396
   *
2397
   * @return   bool True if the file has BOM at the start, False otherwise.
2398
   */
2399
  public static function file_has_bom($file_path)
2400
  {
2401
    return self::string_has_bom(file_get_contents($file_path));
2402
  }
2403
2404
  /**
2405
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2406
   *
2407
   * @param mixed  $var
2408
   * @param int    $normalization_form
2409
   * @param string $leading_combining
2410
   *
2411
   * @return mixed
2412
   */
2413
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
2414
  {
2415
    switch (gettype($var)) {
2416 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2417 1
        foreach ($var as $k => $v) {
2418
          /** @noinspection AlterInForeachInspection */
2419 1
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
2420 1
        }
2421 1
        break;
2422 1 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2423
        foreach ($var as $k => $v) {
2424
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
2425 1
        }
2426
        break;
2427
      case 'string':
2428
        if (false !== strpos($var, "\r")) {
2429
          // Workaround https://bugs.php.net/65732
2430
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
2431
        }
2432
        if (preg_match('/[\x80-\xFF]/', $var)) {
2433
          if (\Normalizer::isNormalized($var, $normalization_form)) {
2434
            $n = '-';
2435
          } else {
2436
            $n = \Normalizer::normalize($var, $normalization_form);
2437 1
2438
            if (isset($n[0])) {
2439 1
              $var = $n;
2440 1
            } else {
2441 1
              $var = self::encode('UTF-8', $var);
2442 1
            }
2443
2444
          }
2445 1
          if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
2446
            // Prevent leading combining chars
2447
            // for NFC-safe concatenations.
2448
            $var = $leading_combining . $var;
2449
          }
2450
        }
2451
        break;
2452
    }
2453
2454
    return $var;
2455
  }
2456
2457 1
  /**
2458
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2459 1
   *
2460
   * @param int    $type
2461
   * @param string $var
2462
   * @param int    $filter
2463
   * @param mixed  $option
2464
   *
2465
   * @return mixed
2466
   */
2467 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2468
  {
2469 8
    if (4 > func_num_args()) {
2470
      $var = filter_input($type, $var, $filter);
2471 8
    } else {
2472 8
      $var = filter_input($type, $var, $filter, $option);
2473
    }
2474 8
2475
    return self::filter($var);
2476 8
  }
2477 2
2478
  /**
2479
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2480 8
   *
2481 1
   * @param int   $type
2482 1
   * @param mixed $definition
2483 1
   * @param bool  $add_empty
2484
   *
2485 8
   * @return mixed
2486
   */
2487 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2488
  {
2489
    if (2 > func_num_args()) {
2490
      $a = filter_input_array($type);
2491
    } else {
2492
      $a = filter_input_array($type, $definition, $add_empty);
2493
    }
2494
2495 1
    return self::filter($a);
2496
  }
2497 1
2498
  /**
2499
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2500
   *
2501
   * @param mixed $var
2502
   * @param int   $filter
2503
   * @param mixed $option
2504
   *
2505
   * @return mixed
2506
   */
2507 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2508 1
  {
2509 1
    if (3 > func_num_args()) {
2510 1
      $var = filter_var($var, $filter);
2511 1
    } else {
2512
      $var = filter_var($var, $filter, $option);
2513 1
    }
2514
2515
    return self::filter($var);
2516
  }
2517
2518
  /**
2519
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2520
   *
2521
   * @param array $data
2522
   * @param mixed $definition
2523 1
   * @param bool  $add_empty
2524
   *
2525 1
   * @return mixed
2526
   */
2527 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2528 1
  {
2529
    if (2 > func_num_args()) {
2530
      $a = filter_var_array($data);
2531 1
    } else {
2532
      $a = filter_var_array($data, $definition, $add_empty);
2533 1
    }
2534 1
2535 1
    return self::filter($a);
2536 1
  }
2537 1
2538 1
  /**
2539 1
   * Check if the number of unicode characters are not more than the specified integer.
2540 1
   *
2541 1
   * @param    string $str      The original string to be checked.
2542 1
   * @param    int    $box_size The size in number of chars to be checked against string.
2543 1
   *
2544
   * @return   bool true if string is less than or equal to $box_size, false otherwise.
2545
   */
2546
  public static function fits_inside($str, $box_size)
2547
  {
2548
    return (self::strlen($str) <= $box_size);
2549
  }
2550
2551
  /**
2552
   * Try to fix simple broken UTF-8 strings.
2553
   *
2554
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2555
   *
2556
   * @param string $str
2557
   *
2558
   * @return string
2559
   */
2560
  public static function fix_simple_utf8($str)
2561
  {
2562
    static $brokenUtf8ToUtf8Keys = null;
2563 1
    static $brokenUtf8ToUtf8Values = null;
2564 1
2565
    $str = (string)$str;
2566
2567
    if (!isset($str[0])) {
2568
      return '';
2569
    }
2570
2571
    if ($brokenUtf8ToUtf8Keys === null) {
2572
      $brokenUtf8ToUtf8Keys = array_keys(self::$brokenUtf8ToUtf8);
2573
      $brokenUtf8ToUtf8Values = array_values(self::$brokenUtf8ToUtf8);
2574
    }
2575
2576
    return str_replace($brokenUtf8ToUtf8Keys, $brokenUtf8ToUtf8Values, $str);
2577
  }
2578
2579
  /**
2580
   * Fix a double (or multiple) encoded UTF8 string.
2581
   *
2582
   * @param string|string[] $str You can use a string or an array of strings.
2583
   *
2584
   * @return mixed
2585
   */
2586
  public static function fix_utf8($str)
2587
  {
2588
    if (is_array($str)) {
2589
2590
      foreach ($str as $k => $v) {
2591
        /** @noinspection AlterInForeachInspection */
2592
        /** @noinspection OffsetOperationsInspection */
2593
        $str[$k] = self::fix_utf8($v);
2594
      }
2595
2596
      return $str;
2597
    }
2598
2599
    $last = '';
2600
    while ($last !== $str) {
2601
      $last = $str;
2602
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 2602 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
2603
    }
2604
2605
    return $str;
2606
  }
2607
2608
  /**
2609
   * Get character of a specific character.
2610
   *
2611
   * @param   string $char Character.
2612
   *
2613
   * @return  string 'RTL' or 'LTR'
2614
   */
2615
  public static function getCharDirection($char)
2616
  {
2617
    // init
2618
    self::checkForSupport();
2619
2620
    if (self::$support['intlChar'] === true) {
2621
      $tmpReturn = \IntlChar::charDirection($char);
2622
2623 2
      // from "IntlChar"-Class
2624
      $charDirection = array(
2625 2
          'RTL' => array(1, 13, 14, 15, 21),
2626 2
          'LTR' => array(0, 11, 12, 20),
2627 2
      );
2628
2629
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
2630
        return 'LTR';
2631
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
2632
        return 'RTL';
2633
      }
2634
    }
2635
2636
    $c = static::chr_to_decimal($char);
2637
2638
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
2639
      return 'LTR';
2640 1
    }
2641
2642 1
    if (0x85e >= $c) {
2643 1
2644
      if (0x5be === $c ||
2645 1
          0x5c0 === $c ||
2646 1
          0x5c3 === $c ||
2647
          0x5c6 === $c ||
2648
          (0x5d0 <= $c && 0x5ea >= $c) ||
2649
          (0x5f0 <= $c && 0x5f4 >= $c) ||
2650 1
          0x608 === $c ||
2651
          0x60b === $c ||
2652 1
          0x60d === $c ||
2653 1
          0x61b === $c ||
2654 1
          (0x61e <= $c && 0x64a >= $c) ||
2655
          (0x66d <= $c && 0x66f >= $c) ||
2656 1
          (0x671 <= $c && 0x6d5 >= $c) ||
2657 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
2658 1
          (0x6ee <= $c && 0x6ef >= $c) ||
2659 1
          (0x6fa <= $c && 0x70d >= $c) ||
2660 1
          0x710 === $c ||
2661
          (0x712 <= $c && 0x72f >= $c) ||
2662 1
          (0x74d <= $c && 0x7a5 >= $c) ||
2663
          0x7b1 === $c ||
2664 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
2665 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
2666
          0x7fa === $c ||
2667
          (0x800 <= $c && 0x815 >= $c) ||
2668
          0x81a === $c ||
2669 1
          0x824 === $c ||
2670 1
          0x828 === $c ||
2671
          (0x830 <= $c && 0x83e >= $c) ||
2672 1
          (0x840 <= $c && 0x858 >= $c) ||
2673
          0x85e === $c
2674 1
      ) {
2675 1
        return 'RTL';
2676 1
      }
2677
2678 1
    } elseif (0x200f === $c) {
2679
2680
      return 'RTL';
2681
2682
    } elseif (0xfb1d <= $c) {
2683
2684
      if (0xfb1d === $c ||
2685
          (0xfb1f <= $c && 0xfb28 >= $c) ||
2686
          (0xfb2a <= $c && 0xfb36 >= $c) ||
2687
          (0xfb38 <= $c && 0xfb3c >= $c) ||
2688
          0xfb3e === $c ||
2689
          (0xfb40 <= $c && 0xfb41 >= $c) ||
2690
          (0xfb43 <= $c && 0xfb44 >= $c) ||
2691
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
2692
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
2693
          (0xfd50 <= $c && 0xfd8f >= $c) ||
2694
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
2695
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
2696
          (0xfe70 <= $c && 0xfe74 >= $c) ||
2697
          (0xfe76 <= $c && 0xfefc >= $c) ||
2698
          (0x10800 <= $c && 0x10805 >= $c) ||
2699
          0x10808 === $c ||
2700
          (0x1080a <= $c && 0x10835 >= $c) ||
2701
          (0x10837 <= $c && 0x10838 >= $c) ||
2702
          0x1083c === $c ||
2703
          (0x1083f <= $c && 0x10855 >= $c) ||
2704
          (0x10857 <= $c && 0x1085f >= $c) ||
2705
          (0x10900 <= $c && 0x1091b >= $c) ||
2706
          (0x10920 <= $c && 0x10939 >= $c) ||
2707
          0x1093f === $c ||
2708 1
          0x10a00 === $c ||
2709
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2710 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2711 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2712
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2713 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2714 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2715 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2716 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2717 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2718 1
          (0x10b78 <= $c && 0x10b7f >= $c)
2719
      ) {
2720
        return 'RTL';
2721
      }
2722
    }
2723
2724
    return 'LTR';
2725
  }
2726
2727
  /**
2728
   * get data from "/data/*.ser"
2729
   *
2730
   * @param string $file
2731
   *
2732
   * @return bool|string|array|int false on error
2733
   */
2734
  protected static function getData($file)
2735
  {
2736
    $file = __DIR__ . '/data/' . $file . '.php';
2737
    if (file_exists($file)) {
2738
      /** @noinspection PhpIncludeInspection */
2739
      return require $file;
2740
    } else {
2741
      return false;
2742
    }
2743
  }
2744
2745
  /**
2746
   * Creates a random string of UTF-8 characters.
2747
   *
2748
   * WARNING: This method does not create a hash of something, maybe it will be renamed in future.
2749
   *
2750
   * @param    int $len The length of string in characters.
2751
   *
2752
   * @return   string String consisting of random characters.
2753
   *
2754
   * @deprecated
2755
   */
2756
  public static function hash($len = 8)
2757
  {
2758
    static $chars = array();
2759
    static $chars_len = null;
2760
2761
    if ($len <= 0) {
2762
      return '';
2763
    }
2764
2765
    // init
2766
    self::checkForSupport();
2767
2768
    if (!$chars) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $chars of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
2769
      if (self::$support['pcre_utf8'] === true) {
2770
        $chars = array_map(
2771
            array(
2772
                '\\voku\\helper\\UTF8',
2773
                'chr',
2774
            ),
2775
            range(48, 79)
2776
        );
2777
2778
        $chars = preg_replace('/[^\p{N}\p{Lu}\p{Ll}]/u', '', $chars);
2779
2780
        $chars = array_values(array_filter($chars));
2781
      } else {
2782
        $chars = array_merge(range('0', '9'), range('A', 'Z'), range('a', 'z'));
2783
      }
2784
2785
      $chars_len = count($chars);
2786
    }
2787
2788
    $hash = '';
2789
2790 15
    for (; $len; --$len) {
2791
      $hash .= $chars[mt_rand() % $chars_len];
2792 15
    }
2793
2794 15
    return $hash;
2795 3
  }
2796
2797
  /**
2798 15
   * Converts hexadecimal U+xxxx code point representation to integer.
2799 4
   *
2800
   * INFO: opposite to UTF8::int_to_hex()
2801
   *
2802 15
   * @param    string $str The hexadecimal code point representation.
2803 3
   *
2804 3
   * @return   int The code point, or 0 on failure.
2805 3
   */
2806
  public static function hex_to_int($str)
2807
  {
2808 3
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
2809
      return intval($match[1], 16);
2810
    }
2811 15
2812
    return 0;
2813 15
  }
2814
2815
  /**
2816 15
   * alias for "UTF8::html_entity_decode()"
2817 15
   *
2818 15
   * @param string $str
2819
   * @param int    $flags
2820 15
   * @param string $encoding
2821
   *
2822 15
   * @return string
2823
   */
2824 15
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2825
  {
2826
    return self::html_entity_decode($str, $flags, $encoding);
2827
  }
2828
2829
  /**
2830
   * Converts a UTF-8 string to a series of HTML numbered entities.
2831
   *
2832
   * INFO: opposite to UTF8::html_decode()
2833
   *
2834 12
   * @param  string $str            The Unicode string to be encoded as numbered entities.
2835
   * @param  bool   $keepAsciiChars Keep ASCII chars.
2836 12
   * @param  string $encoding
2837
   *
2838 12
   * @return string HTML numbered entities.
2839
   */
2840 12
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2841 5
  {
2842
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2843
    if (function_exists('mb_encode_numericentity')) {
2844 11
2845
      $startCode = 0x00;
2846
      if ($keepAsciiChars === true) {
2847
        $startCode = 0x80;
2848
      }
2849
2850
      $encoding = self::normalizeEncoding($encoding);
2851
2852
      return mb_encode_numericentity(
2853
          $str,
2854
          array($startCode, 0xffff, 0, 0xffff,),
2855
          $encoding
2856
      );
2857
    }
2858
2859
    return implode(
2860
        array_map(
2861
            function ($data) use ($keepAsciiChars) {
2862
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
2863
            },
2864
            self::split($str)
2865
        )
2866
    );
2867
  }
2868
2869
  /**
2870
   * UTF-8 version of html_entity_decode()
2871
   *
2872
   * The reason we are not using html_entity_decode() by itself is because
2873
   * while it is not technically correct to leave out the semicolon
2874
   * at the end of an entity most browsers will still interpret the entity
2875
   * correctly. html_entity_decode() does not convert entities without
2876
   * semicolons, so we are left with our own little solution here. Bummer.
2877
   *
2878
   * Convert all HTML entities to their applicable characters
2879
   *
2880
   * INFO: opposite to UTF8::html_encode()
2881
   *
2882
   * @link http://php.net/manual/en/function.html-entity-decode.php
2883
   *
2884
   * @param string $str      <p>
2885
   *                         The input string.
2886
   *                         </p>
2887
   * @param int    $flags    [optional] <p>
2888
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2889
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2890
   *                         <table>
2891
   *                         Available <i>flags</i> constants
2892
   *                         <tr valign="top">
2893
   *                         <td>Constant Name</td>
2894
   *                         <td>Description</td>
2895
   *                         </tr>
2896
   *                         <tr valign="top">
2897
   *                         <td><b>ENT_COMPAT</b></td>
2898
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2899
   *                         </tr>
2900
   *                         <tr valign="top">
2901
   *                         <td><b>ENT_QUOTES</b></td>
2902
   *                         <td>Will convert both double and single quotes.</td>
2903
   *                         </tr>
2904
   *                         <tr valign="top">
2905
   *                         <td><b>ENT_NOQUOTES</b></td>
2906
   *                         <td>Will leave both double and single quotes unconverted.</td>
2907
   *                         </tr>
2908
   *                         <tr valign="top">
2909
   *                         <td><b>ENT_HTML401</b></td>
2910
   *                         <td>
2911
   *                         Handle code as HTML 4.01.
2912
   *                         </td>
2913
   *                         </tr>
2914
   *                         <tr valign="top">
2915
   *                         <td><b>ENT_XML1</b></td>
2916
   *                         <td>
2917
   *                         Handle code as XML 1.
2918
   *                         </td>
2919
   *                         </tr>
2920
   *                         <tr valign="top">
2921
   *                         <td><b>ENT_XHTML</b></td>
2922
   *                         <td>
2923
   *                         Handle code as XHTML.
2924
   *                         </td>
2925
   *                         </tr>
2926
   *                         <tr valign="top">
2927
   *                         <td><b>ENT_HTML5</b></td>
2928
   *                         <td>
2929
   *                         Handle code as HTML 5.
2930
   *                         </td>
2931
   *                         </tr>
2932
   *                         </table>
2933
   *                         </p>
2934
   * @param string $encoding [optional] <p>
2935
   *                         Encoding to use.
2936
   *                         </p>
2937
   *
2938
   * @return string the decoded string.
2939
   */
2940
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2941
  {
2942
    $str = (string)$str;
2943
2944
    if (!isset($str[0])) {
2945
      return '';
2946
    }
2947
2948
    if (strpos($str, '&') === false) {
2949
      return $str;
2950 2
    }
2951
2952 2
    $encoding = self::normalizeEncoding($encoding);
2953
2954
    if ($flags === null) {
2955
      if (Bootup::is_php('5.4') === true) {
2956
        $flags = ENT_COMPAT | ENT_HTML5;
2957
      } else {
2958
        $flags = ENT_COMPAT;
2959
      }
2960
    }
2961
2962
    do {
2963
      $str_compare = $str;
2964
2965
      $str = preg_replace_callback("/&#\d{2,5};/", array('\voku\helper\UTF8', 'html_entity_decode_callback'), $str);
2966
2967
      // decode numeric & UTF16 two byte entities
2968
      $str = html_entity_decode(
2969
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
2970
          $flags,
2971
          $encoding
2972
      );
2973
2974
    } while ($str_compare !== $str);
2975
2976
    return $str;
2977
  }
2978
2979
  /**
2980
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2981
   *
2982
   * @link http://php.net/manual/en/function.htmlentities.php
2983
   *
2984
   * @param string $str           <p>
2985
   *                              The input string.
2986
   *                              </p>
2987
   * @param int    $flags         [optional] <p>
2988
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2989
   *                              invalid code unit sequences and the used document type. The default is
2990
   *                              ENT_COMPAT | ENT_HTML401.
2991
   *                              <table>
2992
   *                              Available <i>flags</i> constants
2993
   *                              <tr valign="top">
2994
   *                              <td>Constant Name</td>
2995
   *                              <td>Description</td>
2996
   *                              </tr>
2997
   *                              <tr valign="top">
2998
   *                              <td><b>ENT_COMPAT</b></td>
2999
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3000
   *                              </tr>
3001
   *                              <tr valign="top">
3002
   *                              <td><b>ENT_QUOTES</b></td>
3003
   *                              <td>Will convert both double and single quotes.</td>
3004
   *                              </tr>
3005
   *                              <tr valign="top">
3006
   *                              <td><b>ENT_NOQUOTES</b></td>
3007
   *                              <td>Will leave both double and single quotes unconverted.</td>
3008
   *                              </tr>
3009
   *                              <tr valign="top">
3010
   *                              <td><b>ENT_IGNORE</b></td>
3011
   *                              <td>
3012
   *                              Silently discard invalid code unit sequences instead of returning
3013
   *                              an empty string. Using this flag is discouraged as it
3014
   *                              may have security implications.
3015
   *                              </td>
3016
   *                              </tr>
3017
   *                              <tr valign="top">
3018
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3019
   *                              <td>
3020
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3021
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3022
   *                              </td>
3023
   *                              </tr>
3024
   *                              <tr valign="top">
3025
   *                              <td><b>ENT_DISALLOWED</b></td>
3026
   *                              <td>
3027
   *                              Replace invalid code points for the given document type with a
3028
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3029
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3030
   *                              instance, to ensure the well-formedness of XML documents with
3031
   *                              embedded external content.
3032
   *                              </td>
3033
   *                              </tr>
3034
   *                              <tr valign="top">
3035
   *                              <td><b>ENT_HTML401</b></td>
3036
   *                              <td>
3037
   *                              Handle code as HTML 4.01.
3038
   *                              </td>
3039
   *                              </tr>
3040
   *                              <tr valign="top">
3041
   *                              <td><b>ENT_XML1</b></td>
3042
   *                              <td>
3043
   *                              Handle code as XML 1.
3044
   *                              </td>
3045
   *                              </tr>
3046
   *                              <tr valign="top">
3047
   *                              <td><b>ENT_XHTML</b></td>
3048
   *                              <td>
3049
   *                              Handle code as XHTML.
3050
   *                              </td>
3051
   *                              </tr>
3052
   *                              <tr valign="top">
3053
   *                              <td><b>ENT_HTML5</b></td>
3054
   *                              <td>
3055
   *                              Handle code as HTML 5.
3056
   *                              </td>
3057
   *                              </tr>
3058
   *                              </table>
3059
   *                              </p>
3060
   * @param string $encoding      [optional] <p>
3061
   *                              Like <b>htmlspecialchars</b>,
3062 1
   *                              <b>htmlentities</b> takes an optional third argument
3063
   *                              <i>encoding</i> which defines encoding used in
3064 1
   *                              conversion.
3065
   *                              Although this argument is technically optional, you are highly
3066
   *                              encouraged to specify the correct value for your code.
3067
   *                              </p>
3068
   * @param bool   $double_encode [optional] <p>
3069
   *                              When <i>double_encode</i> is turned off PHP will not
3070
   *                              encode existing html entities. The default is to convert everything.
3071
   *                              </p>
3072 1
   *
3073
   *
3074 1
   * @return string the encoded string.
3075
   * </p>
3076
   * <p>
3077
   * If the input <i>string</i> contains an invalid code unit
3078
   * sequence within the given <i>encoding</i> an empty string
3079
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3080
   * <b>ENT_SUBSTITUTE</b> flags are set.
3081
   */
3082
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3083
  {
3084
    $encoding = self::normalizeEncoding($encoding);
3085
3086
    $str = htmlentities($str, $flags, $encoding, $double_encode);
3087
3088
    if ($encoding !== 'UTF-8') {
3089
      return $str;
3090
    }
3091
3092
    $byteLengths = self::chr_size_list($str);
3093
    $search = array();
3094
    $replacements = array();
3095
    foreach ($byteLengths as $counter => $byteLength) {
3096
      if ($byteLength >= 3) {
3097
        $char = self::access($str, $counter);
3098
3099
        if (!isset($replacements[$char])) {
3100
          $search[$char] = $char;
3101
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 3097 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3102
        }
3103 1
      }
3104
    }
3105 1
3106
    return str_replace($search, $replacements, $str);
3107
  }
3108
3109
  /**
3110
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3111
   *
3112
   * INFO: Take a look at "UTF8::htmlentities()"
3113
   *
3114
   * @link http://php.net/manual/en/function.htmlspecialchars.php
3115 1
   *
3116
   * @param string $str           <p>
3117 1
   *                              The string being converted.
3118
   *                              </p>
3119
   * @param int    $flags         [optional] <p>
3120
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
3121
   *                              invalid code unit sequences and the used document type. The default is
3122
   *                              ENT_COMPAT | ENT_HTML401.
3123
   *                              <table>
3124
   *                              Available <i>flags</i> constants
3125
   *                              <tr valign="top">
3126
   *                              <td>Constant Name</td>
3127 1
   *                              <td>Description</td>
3128
   *                              </tr>
3129 1
   *                              <tr valign="top">
3130
   *                              <td><b>ENT_COMPAT</b></td>
3131
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3132
   *                              </tr>
3133
   *                              <tr valign="top">
3134
   *                              <td><b>ENT_QUOTES</b></td>
3135
   *                              <td>Will convert both double and single quotes.</td>
3136
   *                              </tr>
3137
   *                              <tr valign="top">
3138
   *                              <td><b>ENT_NOQUOTES</b></td>
3139
   *                              <td>Will leave both double and single quotes unconverted.</td>
3140
   *                              </tr>
3141
   *                              <tr valign="top">
3142
   *                              <td><b>ENT_IGNORE</b></td>
3143
   *                              <td>
3144
   *                              Silently discard invalid code unit sequences instead of returning
3145
   *                              an empty string. Using this flag is discouraged as it
3146
   *                              may have security implications.
3147
   *                              </td>
3148
   *                              </tr>
3149
   *                              <tr valign="top">
3150
   *                              <td><b>ENT_SUBSTITUTE</b></td>
3151
   *                              <td>
3152
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
3153
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
3154
   *                              </td>
3155
   *                              </tr>
3156
   *                              <tr valign="top">
3157
   *                              <td><b>ENT_DISALLOWED</b></td>
3158
   *                              <td>
3159
   *                              Replace invalid code points for the given document type with a
3160
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3161
   *                              (otherwise) instead of leaving them as is. This may be useful, for
3162
   *                              instance, to ensure the well-formedness of XML documents with
3163
   *                              embedded external content.
3164
   *                              </td>
3165
   *                              </tr>
3166
   *                              <tr valign="top">
3167
   *                              <td><b>ENT_HTML401</b></td>
3168
   *                              <td>
3169
   *                              Handle code as HTML 4.01.
3170
   *                              </td>
3171
   *                              </tr>
3172
   *                              <tr valign="top">
3173
   *                              <td><b>ENT_XML1</b></td>
3174
   *                              <td>
3175
   *                              Handle code as XML 1.
3176
   *                              </td>
3177
   *                              </tr>
3178
   *                              <tr valign="top">
3179 16
   *                              <td><b>ENT_XHTML</b></td>
3180
   *                              <td>
3181 16
   *                              Handle code as XHTML.
3182
   *                              </td>
3183
   *                              </tr>
3184
   *                              <tr valign="top">
3185
   *                              <td><b>ENT_HTML5</b></td>
3186
   *                              <td>
3187
   *                              Handle code as HTML 5.
3188
   *                              </td>
3189
   *                              </tr>
3190
   *                              </table>
3191
   *                              </p>
3192 4
   * @param string $encoding      [optional] <p>
3193
   *                              Defines encoding used in conversion.
3194 4
   *                              </p>
3195
   *                              <p>
3196
   *                              For the purposes of this function, the encodings
3197
   *                              ISO-8859-1, ISO-8859-15,
3198
   *                              UTF-8, cp866,
3199
   *                              cp1251, cp1252, and
3200
   *                              KOI8-R are effectively equivalent, provided the
3201
   *                              <i>string</i> itself is valid for the encoding, as
3202
   *                              the characters affected by <b>htmlspecialchars</b> occupy
3203
   *                              the same positions in all of these encodings.
3204 1
   *                              </p>
3205
   * @param bool   $double_encode [optional] <p>
3206 1
   *                              When <i>double_encode</i> is turned off PHP will not
3207
   *                              encode existing html entities, the default is to convert everything.
3208 1
   *                              </p>
3209 1
   *
3210
   * @return string The converted string.
3211
   * </p>
3212 1
   * <p>
3213 1
   * If the input <i>string</i> contains an invalid code unit
3214
   * sequence within the given <i>encoding</i> an empty string
3215 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
3216
   * <b>ENT_SUBSTITUTE</b> flags are set.
3217
   */
3218
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
3219
  {
3220
    $encoding = self::normalizeEncoding($encoding);
3221
3222
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
3223
  }
3224
3225
  /**
3226 4
   * checks whether iconv is available on the server
3227
   *
3228
   * @return   bool True if available, False otherwise
3229 4
   */
3230
  public static function iconv_loaded()
3231
  {
3232 4
    return extension_loaded('iconv') ? true : false;
3233
  }
3234 4
3235 4
  /**
3236 4
   * Converts Integer to hexadecimal U+xxxx code point representation.
3237 4
   *
3238 3
   * INFO: opposite to UTF8::hex_to_int()
3239
   *
3240 4
   * @param    int    $int The integer to be converted to hexadecimal code point.
3241
   * @param    string $pfix
3242
   *
3243
   * @return   string The code point, or empty string on failure.
3244
   */
3245
  public static function int_to_hex($int, $pfix = 'U+')
3246
  {
3247
    if (ctype_digit((string)$int)) {
3248
      $hex = dechex((int)$int);
3249
3250
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
3251
3252
      return $pfix . $hex;
3253
    }
3254
3255
    return '';
3256
  }
3257
3258
  /**
3259
   * checks whether intl is available on the server
3260
   *
3261
   * @return   bool True if available, False otherwise
3262
   */
3263
  public static function intl_loaded()
3264
  {
3265
    return extension_loaded('intl') ? true : false;
3266
  }
3267
3268
  /**
3269
   * checks whether intl-char is available on the server
3270
   *
3271
   * @return   bool True if available, False otherwise
3272
   */
3273 2
  public static function intlChar_loaded()
3274
  {
3275 2
    return Bootup::is_php('7.0') === true and class_exists('IntlChar');
0 ignored issues
show
Comprehensibility Best Practice introduced by
Using logical operators such as and instead of && is generally not recommended.

PHP has two types of connecting operators (logical operators, and boolean operators):

  Logical Operators Boolean Operator
AND - meaning and &&
OR - meaning or ||

The difference between these is the order in which they are executed. In most cases, you would want to use a boolean operator like &&, or ||.

Let’s take a look at a few examples:

// Logical operators have lower precedence:
$f = false or true;

// is executed like this:
($f = false) or true;


// Boolean operators have higher precedence:
$f = false || true;

// is executed like this:
$f = (false || true);

Logical Operators are used for Control-Flow

One case where you explicitly want to use logical operators is for control-flow such as this:

$x === 5
    or die('$x must be 5.');

// Instead of
if ($x !== 5) {
    die('$x must be 5.');
}

Since die introduces problems of its own, f.e. it makes our code hardly testable, and prevents any kind of more sophisticated error handling; you probably do not want to use this in real-world code. Unfortunately, logical operators cannot be combined with throw at this point:

// The following is currently a parse error.
$x === 5
    or throw new RuntimeException('$x must be 5.');

These limitations lead to logical operators rarely being of use in current PHP code.

Loading history...
3276
  }
3277
3278
  /**
3279
   * alias for "UTF8::is_ascii()"
3280
   *
3281
   * @param string $str
3282
   *
3283
   * @return boolean
3284
   */
3285 2
  public static function isAscii($str)
3286
  {
3287 2
    return self::is_ascii($str);
3288 2
  }
3289
3290 2
  /**
3291 2
   * alias for "UTF8::is_base64()"
3292 2
   *
3293 2
   * @param string $str
3294 2
   *
3295 2
   * @return bool
3296 2
   */
3297 2
  public static function isBase64($str)
3298 2
  {
3299 1
    return self::is_base64($str);
3300 1
  }
3301 2
3302 2
  /**
3303 2
   * alias for "UTF8::is_binary()"
3304
   *
3305 2
   * @param string $str
3306 2
   *
3307 2
   * @return bool
3308 2
   */
3309 2
  public static function isBinary($str)
3310 2
  {
3311 2
    return self::is_binary($str);
3312 2
  }
3313 2
3314 1
  /**
3315 1
   * alias for "UTF8::is_bom()"
3316 2
   *
3317 2
   * @param string $utf8_chr
3318 2
   *
3319
   * @return boolean
3320 2
   */
3321 1
  public static function isBom($utf8_chr)
3322 1
  {
3323
    return self::is_bom($utf8_chr);
3324 1
  }
3325
3326
  /**
3327
   * alias for "UTF8::is_json()"
3328 2
   *
3329
   * @param string $str
3330 2
   *
3331
   * @return bool
3332
   */
3333
  public static function isJson($str)
3334
  {
3335
    return self::is_json($str);
3336
  }
3337
3338
  /**
3339
   * alias for "UTF8::is_html()"
3340 2
   *
3341
   * @param string $str
3342 2
   *
3343 2
   * @return boolean
3344
   */
3345 2
  public static function isHtml($str)
3346 2
  {
3347 2
    return self::is_html($str);
3348 2
  }
3349 2
3350 2
  /**
3351 2
   * alias for "UTF8::is_utf8()"
3352 2
   *
3353 2
   * @param string $str
3354
   *
3355
   * @return bool
3356 2
   */
3357 2
  public static function isUtf8($str)
3358 2
  {
3359
    return self::is_utf8($str);
3360 2
  }
3361 2
3362 2
  /**
3363 1
   * alias for "UTF8::is_utf16()"
3364 1
   *
3365 1
   * @param string $str
3366 1
   *
3367 1
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
3368 1
   */
3369
  public static function isUtf16($str)
3370
  {
3371 1
    return self::is_utf16($str);
3372 1
  }
3373 1
3374
  /**
3375 2
   * alias for "UTF8::is_utf32()"
3376
   *
3377
   * @param string $str
3378
   *
3379
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
3380
   */
3381
  public static function isUtf32($str)
3382
  {
3383 2
    return self::is_utf32($str);
3384
  }
3385 2
3386
  /**
3387
   * Checks if a string is 7 bit ASCII.
3388
   *
3389
   * @param    string $str The string to check.
3390
   *
3391
   * @return   bool <strong>true</strong> if it is ASCII<br />
3392
   *                <strong>false</strong> otherwise
3393
   */
3394
  public static function is_ascii($str)
3395
  {
3396
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
3397 34
  }
3398
3399 34
  /**
3400
   * Returns true if the string is base64 encoded, false otherwise.
3401 34
   *
3402 3
   * @param string $str
3403
   *
3404
   * @return bool Whether or not $str is base64 encoded
3405 32
   */
3406
  public static function is_base64($str)
3407
  {
3408
    $str = (string)$str;
3409
3410
    if (!isset($str[0])) {
3411
      return false;
3412
    }
3413
3414
    if (base64_encode(base64_decode($str, true)) === $str) {
3415 32
      return true;
3416
    } else {
3417 32
      return false;
3418 32
    }
3419 32
  }
3420
3421
  /**
3422 32
   * Check if the input is binary... (is look like a hack).
3423 32
   *
3424 32
   * @param mixed $input
3425
   *
3426
   * @return bool
3427 32
   */
3428
  public static function is_binary($input)
3429 30
  {
3430 32
3431
    $testLength = strlen($input);
3432 28
3433 28
    if (
3434 28
        preg_match('~^[01]+$~', $input)
3435 28
        ||
3436 30
        substr_count($input, "\x00") > 0
3437
        ||
3438 13
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
3439 13
    ) {
3440 13
      return true;
3441 13
    } else {
3442 23
      return false;
3443
    }
3444 6
  }
3445 6
3446 6
  /**
3447 6
   * Check if the file is binary.
3448 12
   *
3449
   * @param string $file
3450
   *
3451
   * @return boolean
3452
   */
3453
  public static function is_binary_file($file)
3454
  {
3455
    try {
3456
      $fp = fopen($file, 'r');
3457 3
      $block = fread($fp, 512);
3458 3
      fclose($fp);
3459 3
    } catch (\Exception $e) {
3460 3
      $block = '';
3461 7
    }
3462
3463 3
    return self::is_binary($block);
3464 3
  }
3465 3
3466 3
  /**
3467 3
   * Checks if the given string is equal to any "Byte Order Mark".
3468
   *
3469
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3470
   *
3471 3
   * @param    string $str The input string.
3472
   *
3473 32
   * @return   bool True if the $utf8_chr is Byte Order Mark, False otherwise.
3474
   */
3475
  public static function is_bom($str)
3476 30
  {
3477
    foreach (self::$bom as $bomString => $bomByteLength) {
3478 28
      if ($str === $bomString) {
3479 28
        return true;
3480 28
      }
3481 28
    }
3482
3483
    return false;
3484
  }
3485
3486 28
  /**
3487
   * Try to check if "$str" is an json-string.
3488
   *
3489
   * @param string $str
3490
   *
3491
   * @return bool
3492 28
   */
3493 28
  public static function is_json($str)
3494 28
  {
3495 28
    $str = (string)$str;
3496
3497 28
    if (!isset($str[0])) {
3498
      return false;
3499 28
    }
3500 28
3501 5
    if (
3502
        is_object(self::json_decode($str))
3503
        &&
3504 28
        json_last_error() === JSON_ERROR_NONE
3505 28
    ) {
3506 28
      return true;
3507 28
    } else {
3508 28
      return false;
3509
    }
3510
  }
3511
3512
  /**
3513 13
   * Check if the string contains any html-tags <lall>.
3514
   *
3515
   * @param string $str
3516 32
   *
3517
   * @return boolean
3518 14
   */
3519
  public static function is_html($str)
3520
  {
3521
    $str = (string)$str;
3522
3523
    if (!isset($str[0])) {
3524
      return false;
3525
    }
3526
3527
    // init
3528
    $matches = array();
3529
3530
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
3531
3532
    if (count($matches) == 0) {
3533
      return false;
3534
    } else {
3535
      return true;
3536
    }
3537
  }
3538
3539
  /**
3540
   * Check if the string is UTF-16.
3541
   *
3542
   * @param string $str
3543
   *
3544
   * @return int|false false if is't not UTF-16, 1 for UTF-16LE, 2 for UTF-16BE.
3545
   */
3546 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3547
  {
3548
    $str = self::remove_bom($str);
3549
3550
    if (self::is_binary($str)) {
3551
      self::checkForSupport();
3552
3553
      $maybeUTF16LE = 0;
3554
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3555
      if ($test) {
3556
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3557
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3558 2
        if ($test3 === $test) {
3559
          $strChars = self::count_chars($str, true);
3560 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3561
            if (in_array($test3char, $strChars, true) === true) {
3562 2
              $maybeUTF16LE++;
3563 2
            }
3564 2
          }
3565
        }
3566
      }
3567
3568 2
      $maybeUTF16BE = 0;
3569
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3570
      if ($test) {
3571
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3572
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3573
        if ($test3 === $test) {
3574
          $strChars = self::count_chars($str, true);
3575
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3576
            if (in_array($test3char, $strChars, true) === true) {
3577
              $maybeUTF16BE++;
3578
            }
3579
          }
3580
        }
3581
      }
3582
3583
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3584
        if ($maybeUTF16LE > $maybeUTF16BE) {
3585
          return 1;
3586
        } else {
3587
          return 2;
3588
        }
3589
      }
3590
3591
    }
3592
3593
    return false;
3594
  }
3595
3596
  /**
3597
   * Check if the string is UTF-32.
3598
   *
3599
   * @param string $str
3600
   *
3601
   * @return int|false false if is't not UTF-16, 1 for UTF-32LE, 2 for UTF-32BE.
3602
   */
3603 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3604
  {
3605
    $str = self::remove_bom($str);
3606
3607 1
    if (self::is_binary($str)) {
3608
      self::checkForSupport();
3609 1
3610
      $maybeUTF32LE = 0;
3611 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3612
      if ($test) {
3613
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3614 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3615
        if ($test3 === $test) {
3616
          $strChars = self::count_chars($str, true);
3617 1
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3618
            if (in_array($test3char, $strChars, true) === true) {
3619
              $maybeUTF32LE++;
3620
            }
3621
          }
3622
        }
3623
      }
3624
3625
      $maybeUTF32BE = 0;
3626
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3627 6
      if ($test) {
3628
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3629 6
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3630
        if ($test3 === $test) {
3631
          $strChars = self::count_chars($str, true);
3632
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3633
            if (in_array($test3char, $strChars, true) === true) {
3634
              $maybeUTF32BE++;
3635
            }
3636
          }
3637
        }
3638
      }
3639
3640
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3641
        if ($maybeUTF32LE > $maybeUTF32BE) {
3642 24
          return 1;
3643
        } else {
3644 24
          return 2;
3645
        }
3646 24
      }
3647 2
3648
    }
3649
3650 23
    return false;
3651
  }
3652 23
3653
  /**
3654
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3655
   *
3656
   * @see    http://hsivonen.iki.fi/php-utf8/
3657
   *
3658
   * @param    string $str The string to be checked.
3659
   *
3660
   * @return   bool
3661
   */
3662 1
  public static function is_utf8($str)
3663
  {
3664 1
    $str = (string)$str;
3665
3666
    if (!isset($str[0])) {
3667
      return true;
3668 1
    }
3669
3670
    if (self::pcre_utf8_support() !== true) {
3671
3672
      // If even just the first character can be matched, when the /u
3673
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3674
      // invalid, nothing at all will match, even if the string contains
3675
      // some valid sequences
3676
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3677
3678
    } else {
3679 1
3680
      $mState = 0; // cached expected number of octets after the current octet
3681 1
      // until the beginning of the next UTF8 character sequence
3682 1
      $mUcs4 = 0; // cached Unicode character
3683 1
      $mBytes = 1; // cached expected number of octets in the current sequence
3684
      $len = strlen($str);
3685 1
3686
      /** @noinspection ForeachInvariantsInspection */
3687
      for ($i = 0; $i < $len; $i++) {
3688
        $in = ord($str[$i]);
3689
        if ($mState === 0) {
3690
          // When mState is zero we expect either a US-ASCII character or a
3691
          // multi-octet sequence.
3692
          if (0 === (0x80 & $in)) {
3693
            // US-ASCII, pass straight through.
3694 2
            $mBytes = 1;
3695 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3696 2
            // First octet of 2 octet sequence.
3697
            $mUcs4 = $in;
3698 2
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3699 2
            $mState = 1;
3700 2
            $mBytes = 2;
3701
          } elseif (0xE0 === (0xF0 & $in)) {
3702 2
            // First octet of 3 octet sequence.
3703
            $mUcs4 = $in;
3704
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3705
            $mState = 2;
3706
            $mBytes = 3;
3707 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3708
            // First octet of 4 octet sequence.
3709
            $mUcs4 = $in;
3710
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3711
            $mState = 3;
3712 1
            $mBytes = 4;
3713
          } elseif (0xF8 === (0xFC & $in)) {
3714 1
            /* First octet of 5 octet sequence.
3715
            *
3716
            * This is illegal because the encoded codepoint must be either
3717
            * (a) not the shortest form or
3718 1
            * (b) outside the Unicode range of 0-0x10FFFF.
3719
            * Rather than trying to resynchronize, we will carry on until the end
3720
            * of the sequence and let the later error handling code catch it.
3721
            */
3722
            $mUcs4 = $in;
3723
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3724
            $mState = 4;
3725
            $mBytes = 5;
3726 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3727
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3728 13
            $mUcs4 = $in;
3729
            $mUcs4 = ($mUcs4 & 1) << 30;
3730 13
            $mState = 5;
3731
            $mBytes = 6;
3732 13
          } else {
3733
            /* Current octet is neither in the US-ASCII range nor a legal first
3734
             * octet of a multi-octet sequence.
3735 13
             */
3736 13
            return false;
3737 13
          }
3738 13
        } else {
3739 13
          // When mState is non-zero, we expect a continuation of the multi-octet
3740 13
          // sequence
3741 13
          if (0x80 === (0xC0 & $in)) {
3742 13
            // Legal continuation.
3743 13
            $shift = ($mState - 1) * 6;
3744 13
            $tmp = $in;
3745 13
            $tmp = ($tmp & 0x0000003F) << $shift;
3746 13
            $mUcs4 |= $tmp;
3747 13
            /**
3748 13
             * End of the multi-octet sequence. mUcs4 now contains the final
3749
             * Unicode code point to be output
3750 13
             */
3751 2
            if (0 === --$mState) {
3752
              /*
3753
              * Check for illegal sequences and code points.
3754 13
              */
3755
              // From Unicode 3.1, non-shortest form is illegal
3756
              if (
3757
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3758
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3759
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3760
                  (4 < $mBytes) ||
3761
                  // From Unicode 3.2, surrogate characters are illegal.
3762
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3763
                  // Code points outside the Unicode range are illegal.
3764 2
                  ($mUcs4 > 0x10FFFF)
3765
              ) {
3766 2
                return false;
3767 2
              }
3768
              // initialize UTF8 cache
3769 2
              $mState = 0;
3770 1
              $mUcs4 = 0;
3771 1
              $mBytes = 1;
3772 1
            }
3773
          } else {
3774 2
            /**
3775
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3776
             * Incomplete multi-octet sequence.
3777
             */
3778
            return false;
3779
          }
3780
        }
3781
      }
3782
3783
      return true;
3784
    }
3785
  }
3786 8
3787
  /**
3788 8
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3789 8
   * Decodes a JSON string
3790
   *
3791 8
   * @link http://php.net/manual/en/function.json-decode.php
3792
   *
3793 8
   * @param string $json    <p>
3794
   *                        The <i>json</i> string being decoded.
3795 2
   *                        </p>
3796
   *                        <p>
3797 2
   *                        This function only works with UTF-8 encoded strings.
3798
   *                        </p>
3799 1
   *                        <p>PHP implements a superset of
3800 1
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3801
   *                        only supports these values when they are nested inside an array or an object.
3802 2
   *                        </p>
3803 2
   * @param bool   $assoc   [optional] <p>
3804
   *                        When <b>TRUE</b>, returned objects will be converted into
3805 8
   *                        associative arrays.
3806 8
   *                        </p>
3807 1
   * @param int    $depth   [optional] <p>
3808 1
   *                        User specified recursion depth.
3809
   *                        </p>
3810 8
   * @param int    $options [optional] <p>
3811 8
   *                        Bitmask of JSON decode options. Currently only
3812
   *                        <b>JSON_BIGINT_AS_STRING</b>
3813 8
   *                        is supported (default is to cast large integers as floats)
3814
   *                        </p>
3815
   *
3816
   * @return mixed the value encoded in <i>json</i> in appropriate
3817
   * PHP type. Values true, false and
3818
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3819
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3820
   * <i>json</i> cannot be decoded or if the encoded
3821
   * data is deeper than the recursion limit.
3822
   */
3823
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3824
  {
3825
    $json = self::filter($json);
3826 1
3827
    if (Bootup::is_php('5.4') === true) {
3828 1
      $json = json_decode($json, $assoc, $depth, $options);
3829 1
    } else {
3830
      $json = json_decode($json, $assoc, $depth);
3831
    }
3832
3833
    return $json;
3834
  }
3835
3836
  /**
3837
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3838
   * Returns the JSON representation of a value
3839
   *
3840
   * @link http://php.net/manual/en/function.json-encode.php
3841
   *
3842 1
   * @param mixed $value   <p>
3843
   *                       The <i>value</i> being encoded. Can be any type except
3844 1
   *                       a resource.
3845
   *                       </p>
3846
   *                       <p>
3847
   *                       All string data must be UTF-8 encoded.
3848
   *                       </p>
3849
   *                       <p>PHP implements a superset of
3850
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3851
   *                       only supports these values when they are nested inside an array or an object.
3852
   *                       </p>
3853
   * @param int   $options [optional] <p>
3854
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3855 15
   *                       <b>JSON_HEX_TAG</b>,
3856
   *                       <b>JSON_HEX_AMP</b>,
3857 15
   *                       <b>JSON_HEX_APOS</b>,
3858 2
   *                       <b>JSON_NUMERIC_CHECK</b>,
3859
   *                       <b>JSON_PRETTY_PRINT</b>,
3860
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3861 14
   *                       <b>JSON_FORCE_OBJECT</b>,
3862 14
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3863
   *                       constants is described on
3864 14
   *                       the JSON constants page.
3865 2
   *                       </p>
3866
   * @param int   $depth   [optional] <p>
3867
   *                       Set the maximum depth. Must be greater than zero.
3868 13
   *                       </p>
3869 7
   *
3870
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3871
   */
3872 12
  public static function json_encode($value, $options = 0, $depth = 512)
3873 8
  {
3874
    $value = self::filter($value);
3875
3876 10
    if (Bootup::is_php('5.5')) {
3877
      $json = json_encode($value, $options, $depth);
3878
    } else {
3879
      $json = json_encode($value, $options);
3880
    }
3881
3882
    return $json;
3883
  }
3884
3885
  /**
3886
   * Makes string's first char lowercase.
3887
   *
3888
   * @param    string $str The input string
3889
   *
3890
   * @return   string The resulting string
3891
   */
3892
  public static function lcfirst($str)
3893
  {
3894
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3895
  }
3896
3897 1
  /**
3898
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3899
   *
3900 1
   * WARNING: This is much slower then "ltrim()" !!!!
3901
   *
3902 1
   * @param    string $str   The string to be trimmed
3903
   * @param    string $chars Optional characters to be stripped
3904 1
   *
3905 1
   * @return   string The string with unwanted characters stripped from the left
3906
   */
3907 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3908
  {
3909
    $str = (string)$str;
3910
3911
    if (!isset($str[0])) {
3912 33
      return '';
3913
    }
3914
3915 33
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3916
3917
    return preg_replace("/^{$chars}+/u", '', $str);
3918
  }
3919
3920
  /**
3921
   * Returns the UTF-8 character with the maximum code point in the given data.
3922
   *
3923
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3924
   *
3925
   * @return   string The character with the highest code point than others.
3926 1
   */
3927 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3928 1
  {
3929 1
    if (is_array($arg)) {
3930
      $arg = implode($arg);
3931
    }
3932 1
3933
    return self::chr(max(self::codepoints($arg)));
3934 1
  }
3935
3936
  /**
3937 1
   * Calculates and returns the maximum number of bytes taken by any
3938
   * UTF-8 encoded character in the given string.
3939
   *
3940 1
   * @param    string $str The original Unicode string.
3941
   *
3942
   * @return   int An array of byte lengths of each character.
3943
   */
3944 1
  public static function max_chr_width($str)
3945
  {
3946 1
    $bytes = self::chr_size_list($str);
3947
    if (count($bytes) > 0) {
3948
      return (int)max($bytes);
3949 1
    } else {
3950
      return 0;
3951
    }
3952 1
  }
3953
3954
  /**
3955
   * checks whether mbstring is available on the server
3956 1
   *
3957
   * @return   bool True if available, False otherwise
3958 1
   */
3959 1
  public static function mbstring_loaded()
3960 1
  {
3961 1
    $return = extension_loaded('mbstring');
3962 1
3963
    if ($return === true) {
3964
      \mb_internal_encoding('UTF-8');
3965
    }
3966
3967
    return $return;
3968
  }
3969
3970
  /**
3971
   * Returns the UTF-8 character with the minimum code point in the given data.
3972
   *
3973
   * @param    mixed $arg A UTF-8 encoded string or an array of such strings.
3974
   *
3975 7
   * @return   string The character with the lowest code point than others.
3976
   */
3977 7 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3978
  {
3979
    if (is_array($arg)) {
3980 7
      $arg = implode($arg);
3981 2
    }
3982 2
3983 7
    return self::chr(min(self::codepoints($arg)));
3984
  }
3985 7
3986
  /**
3987
   * Normalize the encoding-name input.
3988 3
   *
3989 1
   * @param string $encoding e.g.: ISO, UTF8, WINDOWS-1251 etc.
3990 1
   *
3991
   * @return string e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
3992
   */
3993
  public static function normalizeEncoding($encoding)
3994 3
  {
3995 1
    static $staticNormalizeEncodingCache = array();
3996 1
3997 3
    if (!$encoding) {
3998
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalizeEncoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3999 7
    }
4000
4001
    if ('UTF-8' === $encoding) {
4002 3
      return $encoding;
4003 1
    }
4004 1
4005
    if (in_array($encoding, self::$iconvEncoding, true)) {
4006
      return $encoding;
4007
    }
4008 3
4009 1
    if (isset($staticNormalizeEncodingCache[$encoding])) {
4010 1
      return $staticNormalizeEncodingCache[$encoding];
4011 3
    }
4012
4013 7
    $encodingOrig = $encoding;
4014
    $encoding = strtoupper($encoding);
4015
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4016
4017
    $equivalences = array(
4018
        'ISO88591'    => 'ISO-8859-1',
4019
        'ISO8859'     => 'ISO-8859-1',
4020
        'ISO'         => 'ISO-8859-1',
4021
        'LATIN1'      => 'ISO-8859-1',
4022
        'LATIN'       => 'ISO-8859-1',
4023
        'UTF16'       => 'UTF-16',
4024 1
        'UTF32'       => 'UTF-32',
4025
        'UTF8'        => 'UTF-8',
4026 1
        'UTF'         => 'UTF-8',
4027 1
        'UTF7'        => 'UTF-7',
4028 1
        'WIN1252'     => 'ISO-8859-1',
4029
        'WINDOWS1252' => 'ISO-8859-1',
4030 1
        '8BIT'        => 'CP850',
4031 1
        'BINARY'      => 'CP850',
4032 1
    );
4033 1
4034 1
    if (!empty($equivalences[$encodingUpperHelper])) {
4035
      $encoding = $equivalences[$encodingUpperHelper];
4036 1
    }
4037
4038
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
4039
4040
    return $encoding;
4041
  }
4042
4043
  /**
4044
   * Normalize MS Word special characters.
4045
   *
4046
   * @param string $str The string to be normalized.
4047
   *
4048
   * @return string
4049
   */
4050
  public static function normalize_msword($str)
4051
  {
4052 36
    static $utf8MSWordKeys = null;
4053
    static $utf8MSWordValues = null;
4054
4055 36
    if ($utf8MSWordKeys === null) {
4056
      $utf8MSWordKeys = array_keys(self::$utf8MSWord);
4057
      $utf8MSWordValues = array_values(self::$utf8MSWord);
4058
    }
4059 36
4060 36
    return str_replace($utf8MSWordKeys, $utf8MSWordValues, $str);
4061 36
  }
4062 36
4063
  /**
4064 36
   * Normalize the whitespace.
4065
   *
4066
   * @param string $str                     The string to be normalized.
4067 36
   * @param bool   $keepNonBreakingSpace    Set to true, to keep non-breaking-spaces.
4068 36
   * @param bool   $keepBidiUnicodeControls Set to true, to keep non-printable (for the web) bidirectional text chars.
4069
   *
4070 36
   * @return string
4071
   */
4072
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
4073
  {
4074
    static $whitespaces = array();
4075
    static $bidiUniCodeControls = null;
4076
4077
    $cacheKey = (int)$keepNonBreakingSpace;
4078
4079
    if (!isset($whitespaces[$cacheKey])) {
4080
4081 36
      $whitespaces[$cacheKey] = self::$whitespaceTable;
4082
4083 36
      if ($keepNonBreakingSpace === true) {
4084
        /** @noinspection OffsetOperationsInspection */
4085 36
        unset($whitespaces[$cacheKey]['NO-BREAK SPACE']);
4086 36
      }
4087 36
4088
      $whitespaces[$cacheKey] = array_values($whitespaces[$cacheKey]);
4089 36
    }
4090 36
4091 36
    if ($keepBidiUnicodeControls === false) {
4092
      if ($bidiUniCodeControls === null) {
4093 36
        $bidiUniCodeControls = array_values(self::$bidiUniCodeControlsTable);
4094
      }
4095
4096
      $str = str_replace($bidiUniCodeControls, '', $str);
4097
    }
4098
4099
    return str_replace($whitespaces[$cacheKey], ' ', $str);
4100
  }
4101
4102
  /**
4103
   * Format a number with grouped thousands.
4104
   *
4105
   * @param float  $number
4106 23
   * @param int    $decimals
4107
   * @param string $dec_point
4108 23
   * @param string $thousands_sep
4109
   *
4110 23
   * @return string
4111 5
   */
4112
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
4113
  {
4114 19
    $thousands_sep = (string)$thousands_sep;
4115
    $dec_point = (string)$dec_point;
4116 19
4117
    if (
4118
        isset($thousands_sep[1], $dec_point[1])
4119
        &&
4120
        Bootup::is_php('5.4') === true
4121
    ) {
4122
      return str_replace(
4123
          array(
4124
              '.',
4125
              ',',
4126
          ),
4127 40
          array(
4128
              $dec_point,
4129 40
              $thousands_sep,
4130
          ),
4131 40
          number_format($number, $decimals, '.', ',')
4132
      );
4133 40
    }
4134 30
4135
    return number_format($number, $decimals, $dec_point, $thousands_sep);
4136
  }
4137 16
4138
  /**
4139 16
   * Calculates Unicode code point of the given UTF-8 encoded character.
4140 15
   *
4141
   * @param    string $s The character of which to calculate code point.
4142 15
   *
4143 14
   * @return   int Unicode code point of the given character,<br />
4144 15
   *           0 on invalid UTF-8 byte sequence.
4145 1
   */
4146 1
  public static function ord($s)
4147
  {
4148
    if (!$s && $s !== '0') {
4149 16
      return 0;
4150
    }
4151 16
4152
    // init
4153 16
    self::checkForSupport();
4154 16
4155 16
    if (self::$support['intlChar'] === true) {
4156
      $tmpReturn = \IntlChar::ord($s);
4157
      if ($tmpReturn) {
4158
        return $tmpReturn;
4159 16
      }
4160
    }
4161 16
4162
    $s = unpack('C*', substr($s, 0, 4));
4163
    $a = $s ? $s[1] : 0;
4164
4165
    if (0xF0 <= $a && isset($s[4])) {
4166
      return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
4167
    }
4168
4169
    if (0xE0 <= $a && isset($s[3])) {
4170
      return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
4171
    }
4172
4173
    if (0xC0 <= $a && isset($s[2])) {
4174
      return (($a - 0xC0) << 6) + $s[2] - 0x80;
4175
    }
4176
4177
    return $a;
4178
  }
4179
4180
  /**
4181 2
   * Parses the string into variables.
4182
   *
4183 2
   * WARNING: This differs from parse_str() by returning the results
4184 1
   *    instead of placing them in the local scope!
4185
   *
4186
   * @link http://php.net/manual/en/function.parse-str.php
4187 2
   *
4188
   * @param string $str     <p>
4189
   *                        The input string.
4190
   *                        </p>
4191
   * @param array  $result  <p>
4192
   *                        If the second parameter arr is present,
4193
   *                        variables are stored in this variable as array elements instead.
4194
   *                        </p>
4195
   *
4196
   * @return void
4197
   */
4198
  public static function parse_str($str, &$result)
4199 25
  {
4200
    // init
4201 25
    self::checkForSupport();
4202
4203 25
    $str = self::filter($str);
4204 5
4205
    \mb_parse_str($str, $result);
4206
  }
4207
4208 24
  /**
4209 24
   * checks if \u modifier is available that enables Unicode support in PCRE.
4210 24
   *
4211
   * @return   bool True if support is available, false otherwise
4212 24
   */
4213
  public static function pcre_utf8_support()
4214 24
  {
4215
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
4216
    return (bool)@preg_match('//u', '');
4217
  }
4218 24
4219 24
  /**
4220 24
   * Create an array containing a range of UTF-8 characters.
4221 24
   *
4222 24
   * @param    mixed $var1 Numeric or hexadecimal code points, or a UTF-8 character to start from.
4223
   * @param    mixed $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
4224 24
   *
4225
   * @return   array
4226
   */
4227
  public static function range($var1, $var2)
4228
  {
4229
    if (!$var1 || !$var2) {
4230
      return array();
4231
    }
4232
4233 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4234
      $start = (int)$var1;
4235
    } elseif (ctype_xdigit($var1)) {
4236
      $start = (int)self::hex_to_int($var1);
4237
    } else {
4238
      $start = self::ord($var1);
4239
    }
4240
4241
    if (!$start) {
4242
      return array();
4243
    }
4244
4245 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4246
      $end = (int)$var2;
4247
    } elseif (ctype_xdigit($var2)) {
4248
      $end = (int)self::hex_to_int($var2);
4249
    } else {
4250
      $end = self::ord($var2);
4251
    }
4252
4253
    if (!$end) {
4254
      return array();
4255
    }
4256 24
4257 5
    return array_map(
4258
        array(
4259 5
            '\\voku\\helper\\UTF8',
4260 5
            'chr',
4261
        ),
4262 24
        range($start, $end)
4263
    );
4264
  }
4265
4266 24
  /**
4267
   * alias for "UTF8::removeBOM()"
4268
   *
4269
   * @param string $str
4270
   *
4271
   * @return string
4272
   */
4273
  public static function remove_bom($str)
4274
  {
4275
    return self::removeBOM($str);
4276
  }
4277 3
4278
  /**
4279
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4280
   *
4281
   * @param string $str
4282
   *
4283
   * @return string
4284 3
   */
4285 2
  public static function removeBOM($str)
4286 1
  {
4287 2
    foreach (self::$bom as $bomString => $bomByteLength) {
4288 1
      if (0 === strpos($str, $bomString)) {
4289 2
        $str = substr($str, $bomByteLength);
4290
      }
4291 2
    }
4292
4293
    return $str;
4294 2
  }
4295
4296
  /**
4297
   * Removes duplicate occurrences of a string in another string.
4298
   *
4299
   * @param    string       $str  The base string
4300 3
   * @param    string|array $what String to search for in the base string
4301 1
   *
4302
   * @return   string The result string with removed duplicates
4303
   */
4304
  public static function remove_duplicates($str, $what = ' ')
4305
  {
4306
    if (is_string($what)) {
4307
      $what = array($what);
4308
    }
4309
4310 3
    if (is_array($what)) {
4311 3
      foreach ($what as $item) {
4312 3
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4313 3
      }
4314 3
    }
4315 3
4316 3
    return $str;
4317 3
  }
4318
4319
  /**
4320 3
   * Remove Invisible Characters
4321 3
   *
4322 3
   * This prevents sandwiching null characters
4323 3
   * between ascii characters, like Java\0script.
4324
   *
4325
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4326
   *
4327
   * @param  string $str
4328
   * @param  bool   $url_encoded
4329
   * @param  string $replacement
4330
   *
4331
   * @return  string
4332
   */
4333
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4334
  {
4335
    // init
4336
    $non_displayables = array();
4337
4338
    // every control character except newline (dec 10),
4339
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4340
    if ($url_encoded) {
4341
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4342
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4343
    }
4344
4345
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4346
4347
    do {
4348
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4349
    } while ($count !== 0);
4350
4351
    return $str;
4352
  }
4353 13
4354
  /**
4355 13
   * replace diamond question mark (�)
4356
   *
4357
   * @param string $str
4358 13
   * @param string $unknown
4359 13
   *
4360 1
   * @return string
4361 1
   */
4362 12
  public static function replace_diamond_question_mark($str, $unknown = '?')
4363
  {
4364 13
    return str_replace(
4365
        array(
4366 13
            "\xEF\xBF\xBD",
4367 13
            '�',
4368
        ),
4369 13
        array(
4370
            $unknown,
4371
            $unknown,
4372
        ),
4373
        $str
4374
    );
4375
  }
4376
4377
  /**
4378
   * Strip whitespace or other characters from end of a UTF-8 string.
4379
   *
4380
   * WARNING: This is much slower then "rtrim()" !!!!
4381 1
   *
4382
   * @param    string $str   The string to be trimmed
4383 1
   * @param    string $chars Optional characters to be stripped
4384
   *
4385
   * @return   string The string with unwanted characters stripped from the right
4386
   */
4387 1 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4388
  {
4389 1
    $str = (string)$str;
4390
4391
    if (!isset($str[0])) {
4392
      return '';
4393 1
    }
4394 1
4395
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
4396
4397 1
    return preg_replace("/{$chars}+$/u", '', $str);
4398 1
  }
4399 1
4400 1
  /**
4401
   * rxClass
4402 1
   *
4403
   * @param string $s
4404
   * @param string $class
4405 1
   *
4406
   * @return string
4407
   */
4408 1
  protected static function rxClass($s, $class = '')
4409
  {
4410
    static $rxClassCache = array();
4411
4412
    $cacheKey = $s . $class;
4413
4414
    if (isset($rxClassCache[$cacheKey])) {
4415
      return $rxClassCache[$cacheKey];
4416
    }
4417
4418
    $class = array($class);
4419
4420
    /** @noinspection SuspiciousLoopInspection */
4421 2
    foreach (self::str_split($s) as $s) {
4422
      if ('-' === $s) {
4423 2
        $class[0] = '-' . $class[0];
4424
      } elseif (!isset($s[2])) {
4425 2
        $class[0] .= preg_quote($s, '/');
4426 2
      } elseif (1 === self::strlen($s)) {
4427
        $class[0] .= $s;
4428 2
      } else {
4429
        $class[] = $s;
4430
      }
4431 2
    }
4432 2
4433 2
    $class[0] = '[' . $class[0] . ']';
4434 2
4435 2
    if (1 === count($class)) {
4436
      $return = $class[0];
4437 2
    } else {
4438 2
      $return = '(?:' . implode('|', $class) . ')';
4439 2
    }
4440 2
4441 2
    $rxClassCache[$cacheKey] = $return;
4442 2
4443
    return $return;
4444 2
  }
4445 2
4446 2
  /**
4447 2
   * Echo native UTF8-Support libs, e.g. for debugging.
4448 2
   */
4449 2
  public static function showSupport()
4450
  {
4451 2
    foreach (self::$support as $utf8Support) {
4452
      echo $utf8Support . "\n<br>";
4453
    }
4454 2
  }
4455
4456
  /**
4457
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4458
   *
4459
   * @param    string $char           The Unicode character to be encoded as numbered entity.
4460
   * @param    bool   $keepAsciiChars Keep ASCII chars.
4461
   *
4462
   * @return   string The HTML numbered entity.
4463
   */
4464
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
4465
  {
4466
    if (!$char) {
4467
      return '';
4468
    }
4469
4470
    if (
4471
        $keepAsciiChars === true
4472
        &&
4473
        self::isAscii($char) === true
4474
    ) {
4475 1
      return $char;
4476
    }
4477 1
4478
    return '&#' . self::ord($char) . ';';
4479 1
  }
4480
4481
  /**
4482
   * Convert a string to an array of Unicode characters.
4483
   *
4484
   * @param    string  $str       The string to split into array.
4485
   * @param    int     $length    Max character length of each array element.
4486
   * @param    boolean $cleanUtf8 Clean non UTF-8 chars from the string.
4487
   *
4488
   * @return   array An array containing chunks of the string.
4489
   */
4490
  public static function split($str, $length = 1, $cleanUtf8 = false)
4491
  {
4492
    $str = (string)$str;
4493
4494
    if (!isset($str[0])) {
4495
      return array();
4496
    }
4497
4498
    // init
4499
    self::checkForSupport();
4500
    $str = (string)$str;
4501
    $ret = array();
4502
4503
    if (self::$support['pcre_utf8'] === true) {
4504
4505
      if ($cleanUtf8 === true) {
4506
        $str = self::clean($str);
4507
      }
4508
4509
      preg_match_all('/./us', $str, $retArray);
4510
      if (isset($retArray[0])) {
4511
        $ret = $retArray[0];
4512 12
      }
4513
      unset($retArray);
4514 12
4515
    } else {
4516
4517
      // fallback
4518
4519
      $len = strlen($str);
4520
4521
      /** @noinspection ForeachInvariantsInspection */
4522
      for ($i = 0; $i < $len; $i++) {
4523
        if (($str[$i] & "\x80") === "\x00") {
4524
          $ret[] = $str[$i];
4525
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4526
          if (($str[$i + 1] & "\xC0") === "\x80") {
4527
            $ret[] = $str[$i] . $str[$i + 1];
4528
4529
            $i++;
4530
          }
4531 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4532
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4533
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4534
4535
            $i += 2;
4536
          }
4537
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4538 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4539
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4540
4541
            $i += 3;
4542 1
          }
4543
        }
4544 1
      }
4545
    }
4546 1
4547 1
    if ($length > 1) {
4548 1
      $ret = array_chunk($ret, $length);
4549
4550 1
      $ret = array_map('implode', $ret);
4551 1
    }
4552 1
4553 1
    /** @noinspection OffsetOperationsInspection */
4554
    if (isset($ret[0]) && $ret[0] === '') {
4555
      return array();
4556 1
    }
4557
4558
    return $ret;
4559
  }
4560
4561
  /**
4562
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4563
   *
4564
   * @param string $str
4565
   *
4566
   * @return false|string The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4567 17
   *                      otherwise it will return false.
4568
   */
4569
  public static function str_detect_encoding($str)
4570 17
  {
4571
4572 17
    //
4573
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4574
    //
4575
4576
    if (self::is_binary($str)) {
4577
      if (self::is_utf16($str) === 1) {
4578 17
        return 'UTF-16LE';
4579 17
      } elseif (self::is_utf16($str) === 2) {
4580 17
        return 'UTF-16BE';
4581 17
      } elseif (self::is_utf32($str) === 1) {
4582 17
        return 'UTF-32LE';
4583 16
      } elseif (self::is_utf32($str) === 2) {
4584 16
        return 'UTF-32BE';
4585 17
      }
4586
    }
4587
4588
    //
4589
    // 2.) simple check for ASCII chars
4590 17
    //
4591 17
4592
    if (self::is_ascii($str) === true) {
4593
      return 'ASCII';
4594 1
    }
4595 1
4596
    //
4597
    // 3.) simple check for UTF-8 chars
4598 1
    //
4599 1
4600 1
    if (self::is_utf8($str) === true) {
4601 1
      return 'UTF-8';
4602 1
    }
4603
4604 1
    //
4605
    // 4.) check via "\mb_detect_encoding()"
4606 1
    //
4607
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4608
4609
    $detectOrder = array(
4610
        'windows-1251',
4611
        'ISO-8859-1',
4612
        'ASCII',
4613
        'UTF-8',
4614
    );
4615
4616 1
    self::checkForSupport();
4617
4618 1
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4619
    if ($encoding) {
4620 1
      return $encoding;
4621
    }
4622
4623
    //
4624
    // 5.) check via "iconv()"
4625 1
    //
4626 1
4627
    $md5 = md5($str);
4628
    foreach (self::$iconvEncoding as $encodingTmp) {
4629 1
      # INFO: //IGNORE and //TRANSLIT still throw notice
4630 1
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4631 1
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
4632
        return $encodingTmp;
4633 1
      }
4634
    }
4635
4636
    return false;
4637
  }
4638
4639
  /**
4640
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4641
   *
4642
   * @link  http://php.net/manual/en/function.str-ireplace.php
4643
   *
4644
   * @param mixed $search  <p>
4645
   *                       Every replacement with search array is
4646
   *                       performed on the result of previous replacement.
4647
   *                       </p>
4648
   * @param mixed $replace <p>
4649
   *                       </p>
4650
   * @param mixed $subject <p>
4651
   *                       If subject is an array, then the search and
4652
   *                       replace is performed with every entry of
4653
   *                       subject, and the return value is an array as
4654 8
   *                       well.
4655
   *                       </p>
4656 8
   * @param int   $count   [optional] <p>
4657
   *                       The number of matched and replaced needles will
4658 8
   *                       be returned in count which is passed by
4659
   *                       reference.
4660 8
   *                       </p>
4661 2
   *
4662
   * @return mixed a string or an array of replacements.
4663
   * @since 5.0
4664 7
   */
4665
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4666 7
  {
4667 7
    $search = (array)$search;
4668 7
4669
    /** @noinspection AlterInForeachInspection */
4670 7
    foreach ($search as &$s) {
4671
      if ('' === $s .= '') {
4672 7
        $s = '/^(?<=.)$/';
4673 6
      } else {
4674
        $s = '/' . preg_quote($s, '/') . '/ui';
4675
      }
4676 4
    }
4677
4678
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4679 4
    $count = $replace;
4680 4
4681 4
    return $subject;
4682
  }
4683 4
4684 3
  /**
4685
   * Limit the number of characters in a string, but also after the next word.
4686 3
   *
4687 3
   * @param  string $str
4688 3
   * @param  int    $length
4689
   * @param  string $strAddOn
4690 3
   *
4691 1
   * @return string
4692
   */
4693 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4694 1
  {
4695 1
    $str = (string)$str;
4696
4697 1
    if (!isset($str[0])) {
4698
      return '';
4699
    }
4700
4701
    $length = (int)$length;
4702
4703
    if (self::strlen($str) <= $length) {
4704
      return $str;
4705
    }
4706
4707
    if (self::substr($str, $length - 1, 1) === ' ') {
4708
      return self::substr($str, 0, $length - 1) . $strAddOn;
4709
    }
4710
4711
    $str = self::substr($str, 0, $length);
4712 1
    $array = explode(' ', $str);
4713 3
    array_pop($array);
4714
    $new_str = implode(' ', $array);
4715 4
4716
    if ($new_str === '') {
4717
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4718
    } else {
4719
      $str = $new_str . $strAddOn;
4720 4
    }
4721
4722
    return $str;
4723
  }
4724
4725 4
  /**
4726 4
   * Pad a UTF-8 string to given length with another string.
4727 2
   *
4728 2
   * @param    string $str        The input string
4729
   * @param    int    $pad_length The length of return string
4730 2
   * @param    string $pad_string String to use for padding the input string
4731 2
   * @param    int    $pad_type   can be STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH
4732 1
   *
4733
   * @return   string Returns the padded string
4734 2
   */
4735
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4736 4
  {
4737 4
    $str_length = self::strlen($str);
4738 4
4739 4
    if (is_int($pad_length) && ($pad_length > 0) && ($pad_length >= $str_length)) {
4740 1
      $ps_length = self::strlen($pad_string);
4741
4742 7
      $diff = $pad_length - $str_length;
4743
4744 7
      switch ($pad_type) {
4745 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4746
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4747
          $pre = self::substr($pre, 0, $diff);
4748
          $post = '';
4749
          break;
4750
4751
        case STR_PAD_BOTH:
4752
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4753
          $pre = self::substr($pre, 0, (int)$diff / 2);
4754
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4755
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4756 1
          break;
4757
4758 1
        case STR_PAD_RIGHT:
4759 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4760 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4761 1
          $post = self::substr($post, 0, $diff);
4762
          $pre = '';
4763 1
      }
4764
4765
      return $pre . $str . $post;
4766
    }
4767 1
4768
    return $str;
4769
  }
4770
4771
  /**
4772
   * Repeat a string.
4773
   *
4774
   * @param string $str        <p>
4775
   *                           The string to be repeated.
4776 1
   *                           </p>
4777
   * @param int    $multiplier <p>
4778
   *                           Number of time the input string should be
4779 1
   *                           repeated.
4780
   *                           </p>
4781
   *                           <p>
4782
   *                           multiplier has to be greater than or equal to 0.
4783
   *                           If the multiplier is set to 0, the function
4784
   *                           will return an empty string.
4785
   *                           </p>
4786
   *
4787
   * @return string the repeated string.
4788
   */
4789
  public static function str_repeat($str, $multiplier)
4790 8
  {
4791
    $str = self::filter($str);
4792 8
4793
    return str_repeat($str, $multiplier);
4794
  }
4795
4796
  /**
4797
   * INFO: this is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe
4798
   *
4799
   * (PHP 4, PHP 5)<br/>
4800
   * Replace all occurrences of the search string with the replacement string
4801
   *
4802
   * @link http://php.net/manual/en/function.str-replace.php
4803
   *
4804
   * @param mixed $search  <p>
4805 8
   *                       The value being searched for, otherwise known as the needle.
4806
   *                       An array may be used to designate multiple needles.
4807 8
   *                       </p>
4808 5
   * @param mixed $replace <p>
4809 5
   *                       The replacement value that replaces found search
4810 8
   *                       values. An array may be used to designate multiple replacements.
4811
   *                       </p>
4812
   * @param mixed $subject <p>
4813
   *                       The string or array being searched and replaced on,
4814
   *                       otherwise known as the haystack.
4815
   *                       </p>
4816
   *                       <p>
4817
   *                       If subject is an array, then the search and
4818
   *                       replace is performed with every entry of
4819
   *                       subject, and the return value is an array as
4820
   *                       well.
4821
   *                       </p>
4822
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4823 5
   *
4824
   * @return mixed This function returns a string or an array with the replaced values.
4825 5
   */
4826
  public static function str_replace($search, $replace, $subject, &$count = null)
4827
  {
4828
    return str_replace($search, $replace, $subject, $count);
4829 5
  }
4830
4831
  /**
4832 5
   * Shuffles all the characters in the string.
4833
   *
4834
   * @param    string $str The input string
4835
   *
4836 5
   * @return   string The shuffled string.
4837 5
   */
4838
  public static function str_shuffle($str)
4839
  {
4840
    $array = self::split($str);
4841
4842
    shuffle($array);
4843
4844
    return implode('', $array);
4845
  }
4846
4847
  /**
4848
   * Sort all characters according to code points.
4849
   *
4850 2
   * @param    string $str    A UTF-8 string.
4851
   * @param    bool   $unique Sort unique. If true, repeated characters are ignored.
4852 2
   * @param    bool   $desc   If true, will sort characters in reverse code point order.
4853 2
   *
4854
   * @return   string String of sorted characters
4855 2
   */
4856 2
  public static function str_sort($str, $unique = false, $desc = false)
4857 2
  {
4858
    $array = self::codepoints($str);
4859 2
4860 2
    if ($unique) {
4861
      $array = array_flip(array_flip($array));
4862
    }
4863
4864
    if ($desc) {
4865
      arsort($array);
4866
    } else {
4867
      asort($array);
4868
    }
4869
4870 1
    return self::string($array);
4871
  }
4872 1
4873
  /**
4874
   * Convert a string to an array.
4875
   *
4876
   * @param string $str
4877
   * @param int    $len
4878
   *
4879
   * @return array
4880
   */
4881
  public static function str_split($str, $len = 1)
4882
  {
4883
    // init
4884
    self::checkForSupport();
4885
    $len = (int)$len;
4886
4887
    if ($len < 1) {
4888
      return str_split($str, $len);
4889
    }
4890
4891
    if (self::$support['intl'] === true) {
4892
      $a = array();
4893
      $p = 0;
4894 2
      $l = strlen($str);
4895
      while ($p < $l) {
4896
        $a[] = \grapheme_extract($str, 1, GRAPHEME_EXTR_COUNT, $p, $p);
4897 2
      }
4898
    } else {
4899 2
      preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4900
      $a = $a[0];
4901
    }
4902
4903
    if ($len === 1) {
4904
      return $a;
4905
    }
4906
4907
    $arrayOutput = array();
4908
    $p = -1;
4909
4910
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4911
    foreach ($a as $l => $a) {
4912
      if ($l % $len) {
4913
        $arrayOutput[$p] .= $a;
4914
      } else {
4915
        $arrayOutput[++$p] = $a;
4916
      }
4917
    }
4918
4919
    return $arrayOutput;
4920
  }
4921
4922
  /**
4923
   * Convert binary into an string.
4924
   *
4925 8
   * @param $bin 1|0
4926
   *
4927 8
   * @return string
4928 8
   */
4929
  public static function binary_to_str($bin)
4930 8
  {
4931 2
    return pack('H*', base_convert($bin, 2, 16));
4932
  }
4933
4934
  /**
4935 7
   * Get a binary representation of a specific string.
4936
   *
4937 7
   * @param   string $str The input string.
4938 1
   *
4939 1
   * @return  string
4940 1
   */
4941
  public static function str_to_binary($str)
4942
  {
4943 7
    $str = (string)$str;
4944 1
4945 1
    $value = unpack('H*', $str);
4946
4947 7
    return base_convert($value[1], 16, 2);
4948
  }
4949
4950
  /**
4951
   * US-ASCII transliterations of Unicode text.
4952
   *
4953
   * Ported Sean M. Burke's Text::Unidecode Perl module (He did all the hard work!)
4954
   * Warning: you should only pass this well formed UTF-8!
4955
   * Be aware it works by making a copy of the input string which it appends transliterated
4956
   * characters to - it uses a PHP output buffer to do this - it means, memory use will increase,
4957
   * requiring up to the same amount again as the input string
4958
   *
4959 7
   * @see    http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
4960
   *
4961 7
   * @author <[email protected]>
4962 2
   *
4963
   * @param string $str     UTF-8 string to convert
4964
   * @param string $unknown Character use if character unknown. (default is ?)
4965
   *
4966 5
   * @return string US-ASCII string
4967
   */
4968 5
  public static function str_transliterate($str, $unknown = '?')
4969
  {
4970
    static $UTF8_TO_ASCII;
4971
4972
    $str = (string)$str;
4973
4974
    if (!isset($str[0])) {
4975
      return '';
4976
    }
4977
4978
    $str = self::clean($str);
4979
4980
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
4981
    $chars = $ar[0];
4982
    foreach ($chars as &$c) {
4983
4984
      $ordC0 = ord($c[0]);
4985 66
4986
      if ($ordC0 >= 0 && $ordC0 <= 127) {
4987 66
        continue;
4988
      }
4989 66
4990 4
      $ordC1 = ord($c[1]);
4991
4992
      // ASCII - next please
4993
      if ($ordC0 >= 192 && $ordC0 <= 223) {
4994 65
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
4995
      }
4996
4997 65
      if ($ordC0 >= 224) {
4998
        $ordC2 = ord($c[2]);
4999
5000
        if ($ordC0 <= 239) {
5001 65
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
5002
        }
5003
5004
        if ($ordC0 >= 240) {
5005 65
          $ordC3 = ord($c[3]);
5006
5007
          if ($ordC0 <= 247) {
5008
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
5009
          }
5010
5011
          if ($ordC0 >= 248) {
5012
            $ordC4 = ord($c[4]);
5013
5014 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5015
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
5016
            }
5017 1
5018
            if ($ordC0 >= 252) {
5019 1
              $ordC5 = ord($c[5]);
5020
5021 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5022
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
5023
              }
5024
            }
5025
          }
5026
        }
5027
      }
5028
5029
      if ($ordC0 >= 254 && $ordC0 <= 255) {
5030
        $c = $unknown;
5031 2
        continue;
5032
      }
5033 2
5034
      if (!isset($ord)) {
5035
        $c = $unknown;
5036
        continue;
5037
      }
5038
5039
      $bank = $ord >> 8;
5040
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
5041
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
5042
        if (file_exists($bankfile)) {
5043
          /** @noinspection PhpIncludeInspection */
5044
          require $bankfile;
5045
        } else {
5046
          $UTF8_TO_ASCII[$bank] = array();
5047
        }
5048
      }
5049
5050
      $newchar = $ord & 255;
5051
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
5052
        $c = $UTF8_TO_ASCII[$bank][$newchar];
5053
      } else {
5054
        $c = $unknown;
5055
      }
5056
    }
5057
5058
    return implode('', $chars);
5059
  }
5060
5061
  /**
5062
   * Counts number of words in the UTF-8 string.
5063
   *
5064
   * @param string $str    The input string.
5065
   * @param int    $format <strong>0</strong> => return a number of words<br />
5066
   *                       <strong>1</strong> => return an array of words
5067
   *                       <strong>2</strong> => return an array of words with word-offset as key
5068
   * @param string $charlist
5069
   *
5070
   * @return array|float The number of words in the string
5071
   */
5072
  public static function str_word_count($str, $format = 0, $charlist = '')
5073
  {
5074
    $charlist = self::rxClass($charlist, '\pL');
5075
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5076
5077
    $len = count($strParts);
5078
5079
    if ($format === 1) {
5080
5081
      $numberOfWords = array();
5082
      for ($i = 1; $i < $len; $i += 2) {
5083
        $numberOfWords[] = $strParts[$i];
5084
      }
5085
5086
    } elseif ($format === 2) {
5087
5088
      self::checkForSupport();
5089
5090
      $numberOfWords = array();
5091
      $offset = self::strlen($strParts[0]);
5092
      for ($i = 1; $i < $len; $i += 2) {
5093
        $numberOfWords[$offset] = $strParts[$i];
5094
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5095
      }
5096
5097
    } else {
5098
5099
      $numberOfWords = ($len - 1) / 2;
5100
5101
    }
5102
5103 11
    return $numberOfWords;
5104
  }
5105 11
5106 11
  /**
5107
   * Case-insensitive string comparison.
5108 11
   *
5109 2
   * @param string $str1
5110
   * @param string $str2
5111
   *
5112
   * @return int Returns < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
5113 10
   */
5114 10
  public static function strcasecmp($str1, $str2)
5115
  {
5116
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5117
  }
5118 10
5119
  /**
5120
   * String comparison.
5121
   *
5122 10
   * @param string $str1
5123
   * @param string $str2
5124
   *
5125
   * @return int  <strong>< 0</strong> if str1 is less than str2<br />
5126 1
   *              <strong>> 0</strong> if str1 is greater than str2<br />
5127 1
   *              <strong>0</strong> if they are equal.
5128 1
   */
5129
  public static function strcmp($str1, $str2)
5130 10
  {
5131
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5132
        \Normalizer::normalize($str1, \Normalizer::NFD),
5133 10
        \Normalizer::normalize($str2, \Normalizer::NFD)
5134 1
    );
5135 1
  }
5136
5137 10
  /**
5138
   * Find length of initial segment not matching mask.
5139
   *
5140
   * @param string $str
5141
   * @param string $charList
5142
   * @param int    $offset
5143
   * @param int    $length
5144
   *
5145
   * @return int|null
5146
   */
5147
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
5148
  {
5149
    if ('' === $charList .= '') {
5150
      return null;
5151
    }
5152
5153
    if ($offset || 2147483647 !== $length) {
5154
      $str = (string)self::substr($str, $offset, $length);
5155
    } else {
5156
      $str = (string)$str;
5157
    }
5158
5159
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5160
      /** @noinspection OffsetOperationsInspection */
5161
      return self::strlen($length[1]);
5162
    } else {
5163
      return self::strlen($str);
5164
    }
5165
  }
5166
5167
  /**
5168
   * Makes a UTF-8 string from code points.
5169
   *
5170
   * @param    array $array Integer or Hexadecimal codepoints
5171
   *
5172
   * @return   string UTF-8 encoded string
5173
   */
5174
  public static function string($array)
5175
  {
5176
    return implode(
5177
        array_map(
5178
            array(
5179
                '\\voku\\helper\\UTF8',
5180
                'chr',
5181
            ),
5182
            $array
5183
        )
5184
    );
5185
  }
5186 1
5187
  /**
5188 1
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5189
   *
5190 1
   * @param    string $str The input string.
5191
   *
5192
   * @return   bool True if the string has BOM at the start, False otherwise.
5193
   */
5194
  public static function string_has_bom($str)
5195
  {
5196
    foreach (self::$bom as $bomString => $bomByteLength) {
5197
      if (0 === strpos($str, $bomString)) {
5198
        return true;
5199
      }
5200 4
    }
5201
5202 4
    return false;
5203
  }
5204
5205
  /**
5206
   * Strip HTML and PHP tags from a string.
5207
   *
5208
   * @link http://php.net/manual/en/function.strip-tags.php
5209
   *
5210
   * @param string $str            <p>
5211
   *                               The input string.
5212
   *                               </p>
5213
   * @param string $allowable_tags [optional] <p>
5214
   *                               You can use the optional second parameter to specify tags which should
5215
   *                               not be stripped.
5216
   *                               </p>
5217
   *                               <p>
5218
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
5219
   *                               can not be changed with allowable_tags.
5220
   *                               </p>
5221
   *
5222
   * @return string the stripped string.
5223
   */
5224
  public static function strip_tags($str, $allowable_tags = null)
5225
  {
5226
    //clean broken utf8
5227
    $str = self::clean($str);
5228
5229
    return strip_tags($str, $allowable_tags);
5230
  }
5231
5232
  /**
5233 1
   * Finds position of first occurrence of a string within another, case insensitive.
5234
   *
5235 1
   * @link http://php.net/manual/en/function.mb-stripos.php
5236
   *
5237 1
   * @param string  $haystack  <p>
5238
   *                           The string from which to get the position of the first occurrence
5239
   *                           of needle
5240
   *                           </p>
5241
   * @param string  $needle    <p>
5242
   *                           The string to find in haystack
5243
   *                           </p>
5244
   * @param int     $offset    [optional] <p>
5245
   *                           The position in haystack
5246
   *                           to start searching
5247
   *                           </p>
5248
   * @param string  $encoding
5249 1
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5250
   *
5251 1
   * @return int Return the numeric position of the first occurrence of
5252
   * needle in the haystack
5253
   * string, or false if needle is not found.
5254
   */
5255
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5256
  {
5257
    $haystack = (string)$haystack;
5258
    $needle = (string)$needle;
5259
5260
    if (!isset($haystack[0], $needle[0])) {
5261
      return false;
5262
    }
5263
5264
    // init
5265
    self::checkForSupport();
5266
5267
    if ($cleanUtf8 === true) {
5268
      $haystack = self::clean($haystack);
5269
      $needle = self::clean($needle);
5270
    }
5271
5272
    // INFO: this is only a fallback for old versions
5273
    if ($encoding === true || $encoding === false) {
5274
      $encoding = 'UTF-8';
5275
    } else {
5276 10
      $encoding = self::normalizeEncoding($encoding);
5277
    }
5278 10
5279 10
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5280
  }
5281 10
5282 2
  /**
5283
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5284
   *
5285
   * @param string $str
5286 9
   * @param string $needle
5287
   * @param bool   $before_needle
5288 9
   *
5289
   * @return false|string
5290
   */
5291
  public static function stristr($str, $needle, $before_needle = false)
5292 9
  {
5293 9
    if ('' === $needle .= '') {
5294
      return false;
5295 9
    }
5296
5297
    // init
5298 1
    self::checkForSupport();
5299 1
5300 1
    return \mb_stristr($str, $needle, $before_needle, 'UTF-8');
5301
  }
5302 9
5303 9
  /**
5304
   * Get the string length, not the byte-length!
5305
   *
5306
   * @link     http://php.net/manual/en/function.mb-strlen.php
5307
   *
5308
   * @param string  $str       The string being checked for length.
5309
   * @param string  $encoding  Set the charset for e.g. "\mb_" function
5310
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5311
   *
5312
   * @return int the number of characters in
5313
   *           string str having character encoding
5314
   *           encoding. A multi-byte character is
5315
   *           counted as 1.
5316
   */
5317
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5318
  {
5319
    $str = (string)$str;
5320
5321
    if (!isset($str[0])) {
5322
      return 0;
5323
    }
5324
5325
    // INFO: this is only a fallback for old versions
5326
    if ($encoding === true || $encoding === false) {
5327
      $encoding = 'UTF-8';
5328
    } else {
5329
      $encoding = self::normalizeEncoding($encoding);
5330
    }
5331
5332
    switch ($encoding) {
5333
      case 'ASCII':
5334
      case 'CP850':
5335
        return strlen($str);
5336
    }
5337
5338
    self::checkForSupport();
5339 6
5340
    if ($encoding === 'UTF-8' && $cleanUtf8 === true) {
5341 6
      $str = self::clean($str);
5342
    }
5343
5344
    return \mb_strlen($str, $encoding);
5345 6
  }
5346
5347
  /**
5348
   * Case insensitive string comparisons using a "natural order" algorithm.
5349
   *
5350
   * @param string $str1
5351
   * @param string $str2
5352
   *
5353
   * @return int <strong>< 0</strong> if str1 is less than str2<br />
5354
   *             <strong>> 0</strong> if str1 is greater than str2<br />
5355
   *             <strong>0</strong> if they are equal
5356
   */
5357
  public static function strnatcasecmp($str1, $str2)
5358
  {
5359
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5360
  }
5361
5362
  /**
5363
   * String comparisons using a "natural order" algorithm
5364
   *
5365
   * @link  http://php.net/manual/en/function.strnatcmp.php
5366 1
   *
5367
   * @param string $str1 <p>
5368 1
   *                     The first string.
5369
   *                     </p>
5370 1
   * @param string $str2 <p>
5371
   *                     The second string.
5372
   *                     </p>
5373
   *
5374
   * @return int Similar to other string comparison functions, this one returns &lt; 0 if
5375
   * str1 is less than str2; &gt;
5376
   * 0 if str1 is greater than
5377
   * str2, and 0 if they are equal.
5378
   * @since 4.0
5379
   * @since 5.0
5380
   */
5381
  public static function strnatcmp($str1, $str2)
5382
  {
5383 10
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5384
  }
5385 10
5386 10
  /**
5387 10
   * Binary safe case-insensitive string comparison of the first n characters
5388
   *
5389 10
   * @link  http://php.net/manual/en/function.strncasecmp.php
5390 1
   *
5391 1
   * @param string $str1 <p>
5392 1
   *                     The first string.
5393
   *                     </p>
5394 10
   * @param string $str2 <p>
5395
   *                     The second string.
5396 10
   *                     </p>
5397
   * @param int    $len  <p>
5398 10
   *                     The length of strings to be used in the comparison.
5399 1
   *                     </p>
5400 1
   *
5401
   * @return int &lt; 0 if <i>str1</i> is less than
5402
   * <i>str2</i>; &gt; 0 if <i>str1</i> is
5403 10
   * greater than <i>str2</i>, and 0 if they are equal.
5404 10
   * @since 4.0.4
5405
   * @since 5.0
5406 10
   */
5407
  public static function strncasecmp($str1, $str2, $len)
5408 10
  {
5409
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5410
  }
5411
5412
  /**
5413
   * Binary safe string comparison of the first n characters
5414
   *
5415
   * @link  http://php.net/manual/en/function.strncmp.php
5416
   *
5417
   * @param string $str1 <p>
5418
   *                     The first string.
5419
   *                     </p>
5420
   * @param string $str2 <p>
5421
   *                     The second string.
5422
   *                     </p>
5423
   * @param int    $len  <p>
5424 20
   *                     Number of characters to use in the comparison.
5425
   *                     </p>
5426 20
   *
5427
   * @return int &lt; 0 if <i>str1</i> is less than
5428 20
   * <i>str2</i>; &gt; 0 if <i>str1</i>
5429 5
   * is greater than <i>str2</i>, and 0 if they are
5430
   * equal.
5431
   * @since 4.0
5432
   * @since 5.0
5433 18
   */
5434
  public static function strncmp($str1, $str2, $len)
5435 18
  {
5436
    return self::strcmp(self::substr($str1, 0, $len), self::substr($str2, 0, $len));
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str1, 0, $len) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
Security Bug introduced by
It seems like self::substr($str2, 0, $len) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5437
  }
5438
5439
  /**
5440
   * Search a string for any of a set of characters
5441
   *
5442
   * @link  http://php.net/manual/en/function.strpbrk.php
5443
   *
5444
   * @param string $haystack  <p>
5445 3
   *                          The string where char_list is looked for.
5446
   *                          </p>
5447 3
   * @param string $char_list <p>
5448
   *                          This parameter is case sensitive.
5449
   *                          </p>
5450
   *
5451
   * @return string a string starting from the character found, or false if it is
5452
   * not found.
5453
   * @since 5.0
5454
   */
5455
  public static function strpbrk($haystack, $char_list)
5456
  {
5457
    $haystack = (string)$haystack;
5458
    $char_list = (string)$char_list;
5459
5460
    if (!isset($haystack[0], $char_list[0])) {
5461
      return false;
5462 16
    }
5463
5464 16
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5465
      return substr($haystack, strpos($haystack, $m[0]));
5466 16
    } else {
5467 4
      return false;
5468
    }
5469
  }
5470
5471 15
  /**
5472
   * Find position of first occurrence of string in a string.
5473 15
   *
5474 15
   * @link http://php.net/manual/en/function.mb-strpos.php
5475
   *
5476
   * @param string  $haystack     <p>
5477
   *                              The string being checked.
5478
   *                              </p>
5479
   * @param string  $needle       <p>
5480
   *                              The position counted from the beginning of haystack.
5481
   *                              </p>
5482
   * @param int     $offset       [optional] <p>
5483
   *                              The search offset. If it is not specified, 0 is used.
5484
   *                              </p>
5485
   * @param string  $encoding
5486
   * @param boolean $cleanUtf8    Clean non UTF-8 chars from the string.
5487
   *
5488
   * @return int The numeric position of the first occurrence of needle in the haystack string.<br />
5489
   *             If needle is not found it returns false.
5490
   */
5491
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5492
  {
5493
    $haystack = (string)$haystack;
5494
    $needle = (string)$needle;
5495
5496
    if (!isset($haystack[0], $needle[0])) {
5497
      return false;
5498
    }
5499
5500
    // init
5501
    self::checkForSupport();
5502
    $offset = (int)$offset;
5503 1
5504
    // iconv and mbstring do not support integer $needle
5505 1
5506
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5507
      $needle = self::chr($needle);
5508
    }
5509
5510
    if ($cleanUtf8 === true) {
5511
      // \mb_strpos returns wrong position if invalid characters are found in $haystack before $needle
5512
      // iconv_strpos is not tolerant to invalid characters
5513
5514
      $needle = self::clean((string)$needle);
5515
      $haystack = self::clean($haystack);
5516
    }
5517
5518 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5519
5520 1
      // INFO: this is only a fallback for old versions
5521
      if ($encoding === true || $encoding === false) {
5522
        $encoding = 'UTF-8';
5523
      } else {
5524
        $encoding = self::normalizeEncoding($encoding);
5525
      }
5526
5527
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5528
    }
5529
5530 1
    if (self::$support['iconv'] === true) {
5531
      // ignore invalid negative offset to keep compatility
5532
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5533 1
      return \grapheme_strpos($haystack, $needle, $offset > 0 ? $offset : 0);
5534
    }
5535 1
5536
    if ($offset > 0) {
5537
      $haystack = self::substr($haystack, $offset);
5538
    }
5539
5540 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5541
      $left = substr($haystack, 0, $pos);
5542
5543
      // negative offset not supported in PHP strpos(), ignoring
5544
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5545
    }
5546
5547
    return false;
5548
  }
5549
5550
  /**
5551
   * Finds the last occurrence of a character in a string within another.
5552
   *
5553
   * @link http://php.net/manual/en/function.mb-strrchr.php
5554
   *
5555
   * @param string $haystack <p>
5556
   *                         The string from which to get the last occurrence
5557
   *                         of needle
5558 39
   *                         </p>
5559
   * @param string $needle   <p>
5560 39
   *                         The string to find in haystack
5561
   *                         </p>
5562 39
   * @param bool   $part     [optional] <p>
5563 9
   *                         Determines which portion of haystack
5564
   *                         this function returns.
5565
   *                         If set to true, it returns all of haystack
5566
   *                         from the beginning to the last occurrence of needle.
5567 37
   *                         If set to false, it returns all of haystack
5568
   *                         from the last occurrence of needle to the end,
5569 37
   *                         </p>
5570
   * @param string $encoding [optional] <p>
5571
   *                         Character encoding name to use.
5572
   *                         If it is omitted, internal character encoding is used.
5573 1
   *                         </p>
5574 1
   *
5575
   * @return string the portion of haystack.
5576 37
   * or false if needle is not found.
5577 22
   */
5578 22 View Code Duplication
  public static function strrchr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5579 33
  {
5580
    self::checkForSupport();
5581
    $encoding = self::normalizeEncoding($encoding);
5582 37
5583
    return \mb_strrchr($haystack, $needle, $part, $encoding);
5584
  }
5585 37
5586 1
  /**
5587 1
   * Reverses characters order in the string.
5588
   *
5589 37
   * @param    string $str The input string
5590
   *
5591
   * @return   string The string with characters in the reverse sequence
5592
   */
5593
  public static function strrev($str)
5594
  {
5595
    return implode(array_reverse(self::split($str)));
5596
  }
5597
5598
  /**
5599
   * Finds the last occurrence of a character in a string within another, case insensitive.
5600
   *
5601
   * @link http://php.net/manual/en/function.mb-strrichr.php
5602
   *
5603
   * @param string $haystack <p>
5604
   *                         The string from which to get the last occurrence
5605
   *                         of needle
5606
   *                         </p>
5607
   * @param string $needle   <p>
5608
   *                         The string to find in haystack
5609
   *                         </p>
5610
   * @param bool   $part     [optional] <p>
5611
   *                         Determines which portion of haystack
5612
   *                         this function returns.
5613
   *                         If set to true, it returns all of haystack
5614
   *                         from the beginning to the last occurrence of needle.
5615
   *                         If set to false, it returns all of haystack
5616
   *                         from the last occurrence of needle to the end,
5617
   *                         </p>
5618 1
   * @param string $encoding [optional] <p>
5619
   *                         Character encoding name to use.
5620 1
   *                         If it is omitted, internal character encoding is used.
5621 1
   *                         </p>
5622
   *
5623 1
   * @return string the portion of haystack.
5624
   * or false if needle is not found.
5625
   */
5626 View Code Duplication
  public static function strrichr($haystack, $needle, $part = false, $encoding = 'UTF-8')
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5627
  {
5628
    self::checkForSupport();
5629
    $encoding = self::normalizeEncoding($encoding);
5630
5631
    return \mb_strrichr($haystack, $needle, $part, $encoding);
5632
  }
5633
5634
  /**
5635
   * Find position of last occurrence of a case-insensitive string.
5636
   *
5637
   * @param    string $haystack The string to look in
5638
   * @param    string $needle   The string to look for
5639
   * @param    int    $offset   (Optional) Number of characters to ignore in the beginning or end
5640
   *
5641
   * @return   int The position of offset
5642
   */
5643
  public static function strripos($haystack, $needle, $offset = 0)
5644
  {
5645
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset);
5646
  }
5647
5648
  /**
5649
   * Find position of last occurrence of a string in a string.
5650
   *
5651
   * @link http://php.net/manual/en/function.mb-strrpos.php
5652
   *
5653
   * @param string     $haystack  <p>
5654
   *                              The string being checked, for the last occurrence
5655
   *                              of needle
5656
   *                              </p>
5657
   * @param string|int $needle    <p>
5658
   *                              The string to find in haystack.
5659
   *                              Or a code point as int.
5660
   *                              </p>
5661
   * @param int        $offset    [optional] May be specified to begin searching an arbitrary number of characters into
5662
   *                              the string. Negative values will stop searching at an arbitrary point
5663
   *                              prior to the end of the string.
5664
   * @param boolean    $cleanUtf8 Clean non UTF-8 chars from the string
5665 6
   *
5666
   * @return int the numeric position of
5667
   * the last occurrence of needle in the
5668 6
   * haystack string. If
5669 1
   * needle is not found, it returns false.
5670
   */
5671
  public static function strrpos($haystack, $needle, $offset = null, $cleanUtf8 = false)
5672 1
  {
5673 1
    $haystack = (string)$haystack;
5674 1
5675 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
5676
      $needle = self::chr($needle);
5677
    }
5678
5679 1
    $needle = (string)$needle;
5680 1
5681 1
    if (!isset($haystack[0], $needle[0])) {
5682 1
      return false;
5683 1
    }
5684 1
5685 1
    // init
5686 1
    self::checkForSupport();
5687
5688
    $needle = (string)$needle;
5689
    $offset = (int)$offset;
5690 1
5691 1
    if ($cleanUtf8 === true) {
5692 1
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5693 1
5694 1
      $needle = self::clean($needle);
5695 1
      $haystack = self::clean($haystack);
5696 1
    }
5697 1
5698
    if (self::$support['mbstring'] === true) {
5699
      return \mb_strrpos($haystack, $needle, $offset, 'UTF-8');
5700 1
    }
5701 1
5702 1
    if (self::$support['iconv'] === true) {
5703 1
      return \grapheme_strrpos($haystack, $needle, $offset);
5704
    }
5705
5706
    // fallback
5707 1
5708
    if ($offset > 0) {
5709 6
      $haystack = self::substr($haystack, $offset);
5710 1
    } elseif ($offset < 0) {
5711 1
      $haystack = self::substr($haystack, 0, $offset);
5712 1
    }
5713 1
5714 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5715 1
      $left = substr($haystack, 0, $pos);
5716
5717
      // negative offset not supported in PHP strpos(), ignoring
5718 6
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
5719 6
    }
5720
5721 6
    return false;
5722 4
  }
5723
5724 4
  /**
5725 4
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5726
   * mask.
5727 6
   *
5728
   * @param string $str
5729 6
   * @param string $mask
5730
   * @param int    $offset
5731
   * @param int    $length
5732
   *
5733
   * @return int|null
5734
   */
5735
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5736
  {
5737
    if ($offset || 2147483647 !== $length) {
5738
      $str = self::substr($str, $offset, $length);
5739
    }
5740 1
5741
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5742 1
  }
5743
5744 1
  /**
5745 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5746
   *
5747
   * @link http://php.net/manual/en/function.grapheme-strstr.php
5748 1
   *
5749
   * @param string $haystack      <p>
5750 1
   *                              The input string. Must be valid UTF-8.
5751 1
   *                              </p>
5752
   * @param string $needle        <p>
5753 1
   *                              The string to look for. Must be valid UTF-8.
5754
   *                              </p>
5755 1
   * @param bool   $before_needle [optional] <p>
5756 1
   *                              If <b>TRUE</b>, grapheme_strstr() returns the part of the
5757
   *                              haystack before the first occurrence of the needle (excluding the needle).
5758 1
   *                              </p>
5759
   *
5760 1
   * @return string the portion of string, or FALSE if needle is not found.
5761
   */
5762 1
  public static function strstr($haystack, $needle, $before_needle = false)
5763
  {
5764 1
    self::checkForSupport();
5765
5766
    return \grapheme_strstr($haystack, $needle, $before_needle);
5767
  }
5768
5769
  /**
5770
   * Unicode transformation for case-less matching.
5771
   *
5772
   * @link http://unicode.org/reports/tr21/tr21-5.html
5773
   *
5774
   * @param string $str
5775 6
   * @param bool   $full
5776
   *
5777 6
   * @return string
5778
   */
5779
  public static function strtocasefold($str, $full = true)
5780
  {
5781
    static $fullCaseFold = null;
5782
    static $commonCaseFoldKeys = null;
5783
    static $commonCaseFoldValues = null;
5784
5785
    if ($commonCaseFoldKeys === null) {
5786
      $commonCaseFoldKeys = array_keys(self::$commonCaseFold);
5787
      $commonCaseFoldValues = array_values(self::$commonCaseFold);
5788
    }
5789
5790
    $str = str_replace($commonCaseFoldKeys, $commonCaseFoldValues, $str);
5791
5792
    if ($full) {
5793
5794
      if ($fullCaseFold === null) {
5795
        $fullCaseFold = self::getData('caseFolding_full');
5796
      }
5797
5798
      /** @noinspection OffsetOperationsInspection */
5799
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5800
    }
5801
5802
    $str = self::clean($str);
5803
5804
    return self::strtolower($str);
5805
  }
5806
5807
  /**
5808
   * (PHP 4 &gt;= 4.3.0, PHP 5)<br/>
5809
   * Make a string lowercase.
5810
   *
5811
   * @link http://php.net/manual/en/function.mb-strtolower.php
5812 7
   *
5813
   * @param string $str <p>
5814 7
   *                    The string being lowercased.
5815
   *                    </p>
5816 7
   * @param string $encoding
5817
   *
5818 7
   * @return string str with all alphabetic characters converted to lowercase.
5819 2
   */
5820
  public static function strtolower($str, $encoding = 'UTF-8')
5821
  {
5822 6
    $str = (string)$str;
5823
5824 6
    if (!isset($str[0])) {
5825 3
      return '';
5826
    }
5827 3
5828
    // init
5829 3
    self::checkForSupport();
5830
    $encoding = self::normalizeEncoding($encoding);
5831
5832 3
    return \mb_strtolower($str, $encoding);
5833
  }
5834 3
5835 3
  /**
5836
   * Generic case sensitive transformation for collation matching.
5837
   *
5838 3
   * @param string $s
5839 3
   *
5840 3
   * @return string
5841
   */
5842
  protected static function strtonatfold($s)
5843
  {
5844
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($s, \Normalizer::NFD));
5845
  }
5846
5847
  /**
5848
   * Make a string uppercase.
5849
   *
5850
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5851
   *
5852 3
   * @param string $str <p>
5853
   *                    The string being uppercased.
5854 1
   *                    </p>
5855 1
   * @param string $encoding
5856 1
   *
5857
   * @return string str with all alphabetic characters converted to uppercase.
5858 1
   */
5859 1
  public static function strtoupper($str, $encoding = 'UTF-8')
5860 1
  {
5861 1
    $str = (string)$str;
5862
5863 1
    if (!isset($str[0])) {
5864
      return '';
5865
    }
5866 1
5867
    // init
5868
    self::checkForSupport();
5869 1
5870
    if (self::$support['mbstring'] === true) {
5871 3
      $encoding = self::normalizeEncoding($encoding);
5872 1
5873 1
      return \mb_strtoupper($str, $encoding);
5874
    } else {
5875 3
5876 3
      // fallback
5877
5878 3
      static $caseTableKeys = null;
5879 3
      static $caseTableValues = null;
5880
5881 6
      if ($caseTableKeys === null) {
5882
        $caseTable = self::case_table();
5883
        $caseTableKeys = array_keys($caseTable);
5884
        $caseTableValues = array_values($caseTable);
5885
      }
5886
5887
      $str = self::clean($str);
5888
5889
      return str_replace($caseTableKeys, $caseTableValues, $str);
5890
    }
5891
  }
5892
5893
  /**
5894
   * Translate characters or replace sub-strings.
5895
   *
5896
   * @link  http://php.net/manual/en/function.strtr.php
5897
   *
5898
   * @param string       $str  <p>
5899
   *                           The string being translated.
5900
   *                           </p>
5901
   * @param string|array $from <p>
5902
   *                           The string replacing from.
5903 2
   *                           </p>
5904
   * @param string|array $to   <p>
5905 2
   *                           The string being translated to to.
5906
   *                           </p>
5907
   *
5908
   * @return string This function returns a copy of str,
5909
   * translating all occurrences of each character in
5910
   * from to the corresponding character in
5911
   * to.
5912
   * @since 4.0
5913
   * @since 5.0
5914
   */
5915
  public static function strtr($str, $from, $to = INF)
5916
  {
5917
    if (INF !== $to) {
5918
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5918 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5919
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5919 can also be of type array; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5920
      $countFrom = count($from);
5921
      $countTo = count($to);
5922
5923
      if ($countFrom > $countTo) {
5924
        $from = array_slice($from, 0, $countTo);
5925
      } elseif ($countFrom < $countTo) {
5926
        $to = array_slice($to, 0, $countFrom);
5927
      }
5928
5929 20
      $from = array_combine($from, $to);
5930
    }
5931 20
5932 2
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5915 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5933
  }
5934 2
5935 2
  /**
5936
   * Return the width of a string.
5937 2
   *
5938
   * @param string $s
5939
   *
5940 20
   * @return int
5941
   */
5942 20
  public static function strwidth($s)
5943 9
  {
5944
    // init
5945
    self::checkForSupport();
5946 20
5947
    return \mb_strwidth($s, 'UTF-8');
5948 20
  }
5949
5950 20
  /**
5951 20
   * Get part of a string.
5952
   *
5953 20
   * @link http://php.net/manual/en/function.mb-substr.php
5954 20
   *
5955 20
   * @param string  $str       <p>
5956 20
   *                           The string being checked.
5957
   *                           </p>
5958 20
   * @param int     $start     <p>
5959
   *                           The first position used in str.
5960 18
   *                           </p>
5961 17
   * @param int     $length    [optional] <p>
5962 17
   *                           The maximum length of the returned string.
5963 17
   *                           </p>
5964 5
   * @param string  $encoding
5965 5
   * @param boolean $cleanUtf8 Clean non UTF-8 chars from the string
5966 5
   *
5967
   * @return string mb_substr returns the portion of
5968
   * str specified by the start and length parameters.
5969 20
   */
5970
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5971 18
  {
5972 14
    $str = (string)$str;
5973 14
5974 14
    if (!isset($str[0])) {
5975 8
      return '';
5976 8
    }
5977 8
5978
    // init
5979
    self::checkForSupport();
5980 19
5981
    if ($cleanUtf8 === true) {
5982 9
      // iconv and mbstring are not tolerant to invalid encoding
5983 3
      // further, their behaviour is inconsistent with that of PHP's substr
5984 3
5985 3
      $str = self::clean($str);
5986 6
    }
5987 6
5988 6
    $str_length = 0;
5989
    if ($start || $length === null) {
5990
      $str_length = (int)self::strlen($str);
5991 9
    }
5992 6
5993 6
    if ($start && $start > $str_length) {
5994 6
      return false;
5995
    }
5996
5997 20
    if ($length === null) {
5998
      $length = $str_length;
5999 2
    } else {
6000 2
      $length = (int)$length;
6001
    }
6002
6003 2 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6004 2
6005 2
      // INFO: this is only a fallback for old versions
6006
      if ($encoding === true || $encoding === false) {
6007
        $encoding = 'UTF-8';
6008 2
      } else {
6009 18
        $encoding = self::normalizeEncoding($encoding);
6010
      }
6011 20
6012
      return \mb_substr($str, $start, $length, $encoding);
6013 20
    }
6014
6015
    if (self::$support['iconv'] === true) {
6016 20
      return (string)\grapheme_substr($str, $start, $length);
6017 20
    }
6018
6019 3
    // fallback
6020 20
6021
    // split to array, and remove invalid characters
6022 20
    $array = self::split($str);
6023
6024
    // extract relevant part, and join to make sting again
6025 20
    return implode(array_slice($array, $start, $length));
6026 20
  }
6027 20
6028 2
  /**
6029 20
   * Binary safe comparison of two strings from an offset, up to length characters.
6030
   *
6031 20
   * @param string  $main_str           The main string being compared.
6032
   * @param string  $str                The secondary string being compared.
6033 20
   * @param int     $offset             The start position for the comparison. If negative, it starts counting from the
6034
   *                                    end of the string.
6035
   * @param int     $length             The length of the comparison. The default value is the largest of the length of
6036
   *                                    the str compared to the length of main_str less the offset.
6037
   * @param boolean $case_insensitivity If case_insensitivity is TRUE, comparison is case insensitive.
6038
   *
6039
   * @return int
6040
   */
6041
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6042
  {
6043 2
    $main_str = self::substr($main_str, $offset, $length);
6044
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6043 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6045 2
6046
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6043 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6044 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6043 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6044 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6047 1
  }
6048
6049 1
  /**
6050 1
   * Count the number of substring occurrences
6051
   *
6052 1
   * @link  http://php.net/manual/en/function.substr-count.php
6053 2
   *
6054 2
   * @param string $haystack <p>
6055
   *                         The string to search in
6056
   *                         </p>
6057
   * @param string $needle   <p>
6058
   *                         The substring to search for
6059
   *                         </p>
6060
   * @param int    $offset   [optional] <p>
6061
   *                         The offset where to start counting
6062
   *                         </p>
6063
   * @param int    $length   [optional] <p>
6064
   *                         The maximum length after the specified offset to search for the
6065
   *                         substring. It outputs a warning if the offset plus the length is
6066
   *                         greater than the haystack length.
6067
   *                         </p>
6068
   *
6069
   * @return int This functions returns an integer.
6070
   * @since 4.0
6071
   * @since 5.0
6072
   */
6073 26
  public static function substr_count($haystack, $needle, $offset = 0, $length = null)
6074
  {
6075 26
    $haystack = (string)$haystack;
6076
    $needle = (string)$needle;
6077 26
6078 5
    if (!isset($haystack[0], $needle[0])) {
6079
      return false;
6080
    }
6081
6082 22
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6083 6
      $offset = (int)$offset;
6084
      $length = (int)$length;
6085
6086 16
      if ($length + $offset <= 0) {
6087
        return false;
6088
      }
6089
6090
      $haystack = self::substr($haystack, $offset, $length);
6091
    }
6092
6093
    self::checkForSupport();
6094
6095
    return \mb_substr_count($haystack, $needle);
6096 14
  }
6097
6098 14
  /**
6099
   * Replace text within a portion of a string.
6100
   *
6101
   * source: https://gist.github.com/stemar/8287074
6102
   *
6103
   * @param string|array   $str
6104
   * @param string|array   $replacement
6105
   * @param int|array      $start
6106
   * @param null|int|array $length
6107
   *
6108
   * @return array|string
6109
   */
6110
  public static function substr_replace($str, $replacement, $start, $length = null)
6111
  {
6112
    if (is_array($str)) {
6113
      $num = count($str);
6114
6115
      // $replacement
6116
      if (is_array($replacement)) {
6117
        $replacement = array_slice($replacement, 0, $num);
6118
      } else {
6119
        $replacement = array_pad(array($replacement), $num, $replacement);
6120
      }
6121 8
6122
      // $start
6123 8 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6124 2
        $start = array_slice($start, 0, $num);
6125
        foreach ($start as &$valueTmp) {
6126
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6127
        }
6128 7
        unset($valueTmp);
6129 7
      } else {
6130
        $start = array_pad(array($start), $num, $start);
6131 7
      }
6132 1
6133 1
      // $length
6134 7
      if (!isset($length)) {
6135
        $length = array_fill(0, $num, 0);
6136 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6137 7
        $length = array_slice($length, 0, $num);
6138
        foreach ($length as &$valueTmpV2) {
6139 7
          if (isset($valueTmpV2)) {
6140
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6141
          } else {
6142
            $valueTmpV2 = 0;
6143 1
          }
6144 1
        }
6145 1
        unset($valueTmpV2);
6146 7
      } else {
6147 7
        $length = array_pad(array($length), $num, $length);
6148 7
      }
6149 7
6150 7
      // Recursive call
6151
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6152 7
    } else {
6153
      if (is_array($replacement)) {
6154
        if (count($replacement) > 0) {
6155
          $replacement = $replacement[0];
6156
        } else {
6157
          $replacement = '';
6158
        }
6159
      }
6160
    }
6161
6162
    preg_match_all('/./us', (string)$str, $smatches);
6163
    preg_match_all('/./us', (string)$replacement, $rmatches);
6164
6165
    if ($length === null) {
6166
      self::checkForSupport();
6167
6168
      $length = \mb_strlen($str);
6169
    }
6170
6171
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6172 1
6173
    return implode($smatches[0], null);
6174 1
  }
6175
6176 1
  /**
6177 1
   * Returns a case swapped version of the string.
6178
   *
6179
   * @param string $str
6180 1
   * @param string $encoding
6181
   *
6182 1
   * @return string each character's case swapped
6183
   */
6184 1
  public static function swapCase($str, $encoding = 'UTF-8')
6185 1
  {
6186 1
    $str = (string)$str;
6187 1
6188
    if (!isset($str[0])) {
6189 1
      return '';
6190 1
    }
6191 1
6192
    $encoding = self::normalizeEncoding($encoding);
6193 1
    $str = self::clean($str);
6194
6195
    $strSwappedCase = preg_replace_callback(
6196
        '/[\S]/u',
6197
        function ($match) use ($encoding) {
6198
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6199
6200
          if ($match[0] === $marchToUpper) {
6201
            return UTF8::strtolower($match[0], $encoding);
6202
          } else {
6203
            return $marchToUpper;
6204
          }
6205
        },
6206
        $str
6207
    );
6208
6209
    return $strSwappedCase;
6210
  }
6211
6212
  /**
6213
   * alias for "UTF8::to_ascii()"
6214
   *
6215
   * @param string $s The input string e.g. a UTF-8 String
6216
   * @param string $subst_chr
6217
   *
6218
   * @return string
6219
   */
6220
  public static function toAscii($s, $subst_chr = '?')
6221
  {
6222
    return self::to_ascii($s, $subst_chr);
6223
  }
6224
6225
  /**
6226
   * alias for "UTF8::to_latin1()"
6227
   *
6228
   * @param $str
6229
   *
6230
   * @return string
6231
   */
6232
  public static function toLatin1($str)
6233
  {
6234
    return self::to_latin1($str);
6235
  }
6236
6237
  /**
6238
   * alias for "UTF8::to_utf8"
6239
   *
6240
   * @param string $str
6241
   *
6242
   * @return string
6243
   */
6244
  public static function toUTF8($str)
6245
  {
6246
    return self::to_utf8($str);
6247
  }
6248
6249
  /**
6250
   * convert to ASCII
6251
   *
6252
   * @param string $s The input string e.g. a UTF-8 String
6253
   * @param string $subst_chr
6254
   *
6255
   * @return string
6256
   */
6257
  public static function to_ascii($s, $subst_chr = '?')
6258
  {
6259
    static $translitExtra = null;
6260
6261
    $s = (string)$s;
6262
6263
    if (!isset($s[0])) {
6264
      return '';
6265
    }
6266
6267
    $s = self::clean($s);
6268
6269
    if (preg_match("/[\x80-\xFF]/", $s)) {
6270
      $s = \Normalizer::normalize($s, \Normalizer::NFKC);
6271
6272
      $glibc = 'glibc' === ICONV_IMPL;
6273
6274
      preg_match_all('/./u', $s, $s);
6275
6276
      /** @noinspection AlterInForeachInspection */
6277
      foreach ($s[0] as &$c) {
6278
6279
        if (!isset($c[1])) {
6280
          continue;
6281
        }
6282
6283
        if ($glibc) {
6284
          $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
6285
        } else {
6286
          $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
6287
6288
          if ($t !== false && is_string($t)) {
6289
            if (!isset($t[0])) {
6290
              $t = '?';
6291
            } elseif (isset($t[1])) {
6292
              $t = ltrim($t, '\'`"^~');
6293
            }
6294
          }
6295
        }
6296
6297
        if ('?' === $t) {
6298
6299
          if ($translitExtra === null) {
6300
            $translitExtra = (array)self::getData('translit_extra');
6301
          }
6302
6303
          if (isset($translitExtra[$c])) {
6304
            $t = $translitExtra[$c];
6305
          } else {
6306
            $t = \Normalizer::normalize($c, \Normalizer::NFD);
6307
6308
            if ($t[0] < "\x80") {
6309
              $t = $t[0];
6310
            } else {
6311
              $t = $subst_chr;
6312
            }
6313
          }
6314
        }
6315
6316
        if ('?' === $t) {
6317
          $t = self::str_transliterate($c, $subst_chr);
6318
        }
6319
6320
        $c = $t;
6321
      }
6322
6323
      $s = implode('', $s[0]);
6324
    }
6325
6326
    return $s;
6327
  }
6328
6329
  /**
6330
   * alias for "UTF8::to_win1252()"
6331
   *
6332
   * @param   string $str
6333
   *
6334
   * @return  array|string
6335
   */
6336
  public static function to_iso8859($str)
6337
  {
6338
    return self::to_win1252($str);
6339
  }
6340
6341
  /**
6342
   * alias for "UTF8::to_win1252()"
6343
   *
6344
   * @param string|array $str
6345
   *
6346
   * @return string|array
6347
   */
6348
  public static function to_latin1($str)
6349
  {
6350
    return self::to_win1252($str);
6351
  }
6352
6353
  /**
6354
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6355
   *
6356
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6357
   *
6358
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6359
   *
6360
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6361
   *    are followed by any of these:  ("group B")
6362
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6363
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6364
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6365
   * is also a valid unicode character, and will be left unchanged.
6366
   *
6367
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6368
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6369
   *
6370
   * @param string|array $str Any string or array.
6371
   *
6372
   * @return string The same string, but UTF8 encoded.
6373
   */
6374
  public static function to_utf8($str)
6375
  {
6376
    if (is_array($str)) {
6377
      foreach ($str as $k => $v) {
6378
        /** @noinspection AlterInForeachInspection */
6379
        $str[$k] = self::to_utf8($v);
6380
      }
6381
6382
      return $str;
6383
    }
6384
6385
    $str = (string)$str;
6386
6387
    if (!isset($str[0])) {
6388
      return $str;
6389
    }
6390
6391
    $max = strlen($str);
6392
    $buf = '';
6393
6394
    /** @noinspection ForeachInvariantsInspection */
6395
    for ($i = 0; $i < $max; $i++) {
6396
      $c1 = $str[$i];
6397
6398
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6399
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6400
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6401
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6402
6403
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6404
6405
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6406
            $buf .= $c1 . $c2;
6407
            $i++;
6408
          } else { // not valid UTF8 - convert it
6409
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6410
            $cc2 = ($c1 & "\x3f") | "\x80";
6411
            $buf .= $cc1 . $cc2;
6412
          }
6413
6414 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6415
6416
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6417
            $buf .= $c1 . $c2 . $c3;
6418
            $i += 2;
6419
          } else { // not valid UTF8 - convert it
6420
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6421
            $cc2 = ($c1 & "\x3f") | "\x80";
6422
            $buf .= $cc1 . $cc2;
6423
          }
6424
6425
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6426
6427 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6428
            $buf .= $c1 . $c2 . $c3 . $c4;
6429
            $i += 3;
6430
          } else { // not valid UTF8 - convert it
6431
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6432
            $cc2 = ($c1 & "\x3f") | "\x80";
6433
            $buf .= $cc1 . $cc2;
6434
          }
6435
6436
        } else { // doesn't look like UTF8, but should be converted
6437
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6438
          $cc2 = (($c1 & "\x3f") | "\x80");
6439
          $buf .= $cc1 . $cc2;
6440 6
        }
6441
6442 6
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6443 6
6444
        $ordC1 = ord($c1);
6445 6
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6446
          $buf .= self::$win1252ToUtf8[$ordC1];
6447 6
        } else {
6448 5
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6449
          $cc2 = (($c1 & "\x3f") | "\x80");
6450
          $buf .= $cc1 . $cc2;
6451
        }
6452 6
6453
      } else { // it doesn't need conversion
6454 6
        $buf .= $c1;
6455
      }
6456 6
    }
6457 1
6458 1
    self::checkForSupport();
6459 1
6460
    // decode unicode escape sequences
6461 6
    $buf = preg_replace_callback(
6462
        '/\\\\u([0-9a-f]{4})/i',
6463
        function ($match) {
6464
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6465
        },
6466
        $buf
6467
    );
6468
6469
    // decode UTF-8 codepoints
6470
    $buf = preg_replace_callback(
6471 6
        '/&#\d{2,4};/',
6472
        function ($match) {
6473 6
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
6474
        },
6475 6
        $buf
6476 6
    );
6477
6478
    return $buf;
6479 5
  }
6480 5
6481
  /**
6482 5
   * Convert a string into "win1252"-encoding.
6483 1
   *
6484 1
   * @param  string|array $str
6485 1
   *
6486
   * @return string|array
6487 5
   */
6488
  protected static function to_win1252($str)
6489
  {
6490
    if (is_array($str)) {
6491
6492
      foreach ($str as $k => $v) {
6493
        /** @noinspection AlterInForeachInspection */
6494
        $str[$k] = self::to_win1252($v);
6495
      }
6496
6497
      return $str;
6498
    }
6499
6500
    $str = (string)$str;
6501
6502
    if (!isset($str[0])) {
6503
      return '';
6504
    }
6505
6506
    return self::utf8_decode($str);
6507
  }
6508
6509
  /**
6510
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6511
   *
6512
   * INFO: This is slower then "trim()"
6513
   *
6514
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6515
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6516
   *
6517
   * @param    string $str   The string to be trimmed
6518
   * @param    string $chars Optional characters to be stripped
6519 1
   *
6520
   * @return   string The trimmed string
6521 1
   */
6522
  public static function trim($str = '', $chars = INF)
6523
  {
6524
    $str = (string)$str;
6525
6526
    if (!isset($str[0])) {
6527
      return '';
6528
    }
6529
6530
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6531
    if ($chars === INF || !$chars) {
6532
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6533 1
    }
6534
6535 1
    return self::rtrim(self::ltrim($str, $chars), $chars);
6536
  }
6537
6538
  /**
6539 1
   * Makes string's first char uppercase.
6540
   *
6541 1
   * @param    string $str The input string
6542
   *
6543
   * @return   string The resulting string
6544 1
   */
6545 1
  public static function ucfirst($str)
6546 1
  {
6547 1
    return self::strtoupper(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6548 1
  }
6549
6550
  /**
6551 1
   * alias for "UTF8::ucfirst"
6552
   *
6553
   * @param $str
6554
   *
6555
   * @return string
6556
   */
6557
  public static function ucword($str)
6558
  {
6559
    return self::ucfirst($str);
6560
  }
6561
6562
  /**
6563
   * Uppercase for all words in the string.
6564 4
   *
6565
   * @param  string $str
6566 4
   * @param array   $exceptions
6567
   *
6568
   * @return string
6569
   */
6570 4
  public static function ucwords($str, $exceptions = array())
6571 4
  {
6572 4
    if (!$str) {
6573
      return '';
6574 4
    }
6575 4
6576 4
    // init
6577 4
    $words = explode(' ', $str);
6578
    $newwords = array();
6579 4
6580
    if (count($exceptions) > 0) {
6581
      $useExceptions = true;
6582
    } else {
6583
      $useExceptions = false;
6584 4
    }
6585
6586 4
    foreach ($words as $word) {
6587
      if (
6588
          ($useExceptions === false)
6589
          ||
6590
          (
6591 4
              $useExceptions === true
6592 4
              &&
6593
              !in_array($word, $exceptions, true)
6594 4
          )
6595 4
      ) {
6596 4
        $word = self::ucfirst($word);
6597 4
      }
6598 4
      $newwords[] = $word;
6599
    }
6600 4
6601 4
    return self::ucfirst(implode(' ', $newwords));
6602 4
  }
6603 4
6604
  /**
6605 4
   * Multi decode html entity & fix urlencoded-win1252-chars.
6606 3
   *
6607 3
   * e.g:
6608 3
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6609 3
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6610
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6611 3
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6612
   * 'Düsseldorf'                   => 'Düsseldorf'
6613
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6614
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6615 3
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6616 3
   *
6617
   * @param string $str
6618 4
   *
6619
   * @return string
6620
   */
6621
  public static function urldecode($str)
6622
  {
6623
    $str = (string)$str;
6624
6625
    if (!isset($str[0])) {
6626
      return '';
6627
    }
6628
6629
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
6630
6631
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6632
6633
    $str = self::fix_simple_utf8(
6634
        rawurldecode(
6635
            self::html_entity_decode(
6636
                self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6637
                $flags
6638
            )
6639
        )
6640
    );
6641
6642
    return (string)$str;
6643
  }
6644
6645
  /**
6646
   * Return a array with "urlencoded"-win1252 -> UTF-8
6647
   *
6648
   * @return mixed
6649
   */
6650
  public static function urldecode_fix_win1252_chars()
6651
  {
6652
    static $array = array(
6653
        '%20' => ' ',
6654
        '%21' => '!',
6655
        '%22' => '"',
6656
        '%23' => '#',
6657
        '%24' => '$',
6658
        '%25' => '%',
6659
        '%26' => '&',
6660
        '%27' => "'",
6661
        '%28' => '(',
6662
        '%29' => ')',
6663
        '%2A' => '*',
6664
        '%2B' => '+',
6665
        '%2C' => ',',
6666
        '%2D' => '-',
6667
        '%2E' => '.',
6668
        '%2F' => '/',
6669
        '%30' => '0',
6670
        '%31' => '1',
6671
        '%32' => '2',
6672
        '%33' => '3',
6673
        '%34' => '4',
6674
        '%35' => '5',
6675
        '%36' => '6',
6676
        '%37' => '7',
6677
        '%38' => '8',
6678
        '%39' => '9',
6679
        '%3A' => ':',
6680
        '%3B' => ';',
6681
        '%3C' => '<',
6682
        '%3D' => '=',
6683
        '%3E' => '>',
6684
        '%3F' => '?',
6685
        '%40' => '@',
6686
        '%41' => 'A',
6687
        '%42' => 'B',
6688
        '%43' => 'C',
6689
        '%44' => 'D',
6690
        '%45' => 'E',
6691
        '%46' => 'F',
6692
        '%47' => 'G',
6693
        '%48' => 'H',
6694
        '%49' => 'I',
6695
        '%4A' => 'J',
6696
        '%4B' => 'K',
6697
        '%4C' => 'L',
6698
        '%4D' => 'M',
6699
        '%4E' => 'N',
6700
        '%4F' => 'O',
6701
        '%50' => 'P',
6702
        '%51' => 'Q',
6703
        '%52' => 'R',
6704
        '%53' => 'S',
6705
        '%54' => 'T',
6706
        '%55' => 'U',
6707
        '%56' => 'V',
6708
        '%57' => 'W',
6709
        '%58' => 'X',
6710
        '%59' => 'Y',
6711
        '%5A' => 'Z',
6712
        '%5B' => '[',
6713
        '%5C' => '\\',
6714
        '%5D' => ']',
6715
        '%5E' => '^',
6716
        '%5F' => '_',
6717
        '%60' => '`',
6718
        '%61' => 'a',
6719
        '%62' => 'b',
6720
        '%63' => 'c',
6721
        '%64' => 'd',
6722
        '%65' => 'e',
6723
        '%66' => 'f',
6724
        '%67' => 'g',
6725
        '%68' => 'h',
6726
        '%69' => 'i',
6727
        '%6A' => 'j',
6728
        '%6B' => 'k',
6729
        '%6C' => 'l',
6730
        '%6D' => 'm',
6731
        '%6E' => 'n',
6732
        '%6F' => 'o',
6733
        '%70' => 'p',
6734
        '%71' => 'q',
6735
        '%72' => 'r',
6736
        '%73' => 's',
6737
        '%74' => 't',
6738
        '%75' => 'u',
6739
        '%76' => 'v',
6740
        '%77' => 'w',
6741
        '%78' => 'x',
6742
        '%79' => 'y',
6743
        '%7A' => 'z',
6744
        '%7B' => '{',
6745
        '%7C' => '|',
6746
        '%7D' => '}',
6747
        '%7E' => '~',
6748
        '%7F' => '',
6749
        '%80' => '`',
6750
        '%81' => '',
6751
        '%82' => '‚',
6752
        '%83' => 'ƒ',
6753
        '%84' => '„',
6754
        '%85' => '…',
6755
        '%86' => '†',
6756
        '%87' => '‡',
6757
        '%88' => 'ˆ',
6758
        '%89' => '‰',
6759
        '%8A' => 'Š',
6760
        '%8B' => '‹',
6761
        '%8C' => 'Œ',
6762
        '%8D' => '',
6763
        '%8E' => 'Ž',
6764
        '%8F' => '',
6765
        '%90' => '',
6766
        '%91' => '‘',
6767
        '%92' => '’',
6768
        '%93' => '“',
6769
        '%94' => '”',
6770
        '%95' => '•',
6771
        '%96' => '–',
6772
        '%97' => '—',
6773
        '%98' => '˜',
6774
        '%99' => '™',
6775
        '%9A' => 'š',
6776
        '%9B' => '›',
6777
        '%9C' => 'œ',
6778
        '%9D' => '',
6779
        '%9E' => 'ž',
6780
        '%9F' => 'Ÿ',
6781
        '%A0' => '',
6782
        '%A1' => '¡',
6783
        '%A2' => '¢',
6784
        '%A3' => '£',
6785
        '%A4' => '¤',
6786
        '%A5' => '¥',
6787
        '%A6' => '¦',
6788
        '%A7' => '§',
6789
        '%A8' => '¨',
6790
        '%A9' => '©',
6791
        '%AA' => 'ª',
6792
        '%AB' => '«',
6793
        '%AC' => '¬',
6794
        '%AD' => '',
6795
        '%AE' => '®',
6796
        '%AF' => '¯',
6797
        '%B0' => '°',
6798
        '%B1' => '±',
6799
        '%B2' => '²',
6800
        '%B3' => '³',
6801
        '%B4' => '´',
6802
        '%B5' => 'µ',
6803
        '%B6' => '¶',
6804
        '%B7' => '·',
6805
        '%B8' => '¸',
6806
        '%B9' => '¹',
6807
        '%BA' => 'º',
6808
        '%BB' => '»',
6809
        '%BC' => '¼',
6810
        '%BD' => '½',
6811
        '%BE' => '¾',
6812
        '%BF' => '¿',
6813
        '%C0' => 'À',
6814
        '%C1' => 'Á',
6815
        '%C2' => 'Â',
6816
        '%C3' => 'Ã',
6817
        '%C4' => 'Ä',
6818
        '%C5' => 'Å',
6819
        '%C6' => 'Æ',
6820
        '%C7' => 'Ç',
6821
        '%C8' => 'È',
6822
        '%C9' => 'É',
6823
        '%CA' => 'Ê',
6824
        '%CB' => 'Ë',
6825
        '%CC' => 'Ì',
6826
        '%CD' => 'Í',
6827
        '%CE' => 'Î',
6828
        '%CF' => 'Ï',
6829
        '%D0' => 'Ð',
6830
        '%D1' => 'Ñ',
6831
        '%D2' => 'Ò',
6832
        '%D3' => 'Ó',
6833
        '%D4' => 'Ô',
6834
        '%D5' => 'Õ',
6835
        '%D6' => 'Ö',
6836
        '%D7' => '×',
6837
        '%D8' => 'Ø',
6838
        '%D9' => 'Ù',
6839
        '%DA' => 'Ú',
6840
        '%DB' => 'Û',
6841
        '%DC' => 'Ü',
6842
        '%DD' => 'Ý',
6843
        '%DE' => 'Þ',
6844
        '%DF' => 'ß',
6845
        '%E0' => 'à',
6846
        '%E1' => 'á',
6847
        '%E2' => 'â',
6848
        '%E3' => 'ã',
6849
        '%E4' => 'ä',
6850
        '%E5' => 'å',
6851
        '%E6' => 'æ',
6852
        '%E7' => 'ç',
6853
        '%E8' => 'è',
6854
        '%E9' => 'é',
6855
        '%EA' => 'ê',
6856
        '%EB' => 'ë',
6857
        '%EC' => 'ì',
6858
        '%ED' => 'í',
6859
        '%EE' => 'î',
6860
        '%EF' => 'ï',
6861
        '%F0' => 'ð',
6862
        '%F1' => 'ñ',
6863
        '%F2' => 'ò',
6864
        '%F3' => 'ó',
6865
        '%F4' => 'ô',
6866
        '%F5' => 'õ',
6867
        '%F6' => 'ö',
6868
        '%F7' => '÷',
6869
        '%F8' => 'ø',
6870
        '%F9' => 'ù',
6871
        '%FA' => 'ú',
6872
        '%FB' => 'û',
6873
        '%FC' => 'ü',
6874
        '%FD' => 'ý',
6875
        '%FE' => 'þ',
6876
        '%FF' => 'ÿ',
6877
    );
6878
6879
    return $array;
6880
  }
6881
6882
  /**
6883
   * Decodes an UTF-8 string to ISO-8859-1.
6884
   *
6885
   * @param string $str
6886
   *
6887
   * @return string
6888
   */
6889
  public static function utf8_decode($str)
6890
  {
6891
    static $utf8ToWin1252Keys = null;
6892
    static $utf8ToWin1252Values = null;
6893
6894
    $str = (string)$str;
6895
6896
    if (!isset($str[0])) {
6897
      return '';
6898
    }
6899
6900
    // init
6901
    self::checkForSupport();
6902
6903
    $str = self::to_utf8($str);
6904
6905
    if ($utf8ToWin1252Keys === null) {
6906
      $utf8ToWin1252Keys = array_keys(self::$utf8ToWin1252);
6907
      $utf8ToWin1252Values = array_values(self::$utf8ToWin1252);
6908
    }
6909
6910
    return Xml::utf8_decode(str_replace($utf8ToWin1252Keys, $utf8ToWin1252Values, $str));
6911
  }
6912
6913
  /**
6914
   * Encodes an ISO-8859-1 string to UTF-8.
6915
   *
6916
   * @param string $str
6917
   *
6918
   * @return string
6919
   */
6920
  public static function utf8_encode($str)
6921
  {
6922
    $str = \utf8_encode($str);
6923
6924
    if (false === strpos($str, "\xC2")) {
6925
      return $str;
6926
    } else {
6927
6928
      static $cp1252ToUtf8Keys = null;
6929
      static $cp1252ToUtf8Values = null;
6930
6931
      if ($cp1252ToUtf8Keys === null) {
6932
        $cp1252ToUtf8Keys = array_keys(self::$cp1252ToUtf8);
6933
        $cp1252ToUtf8Values = array_values(self::$cp1252ToUtf8);
6934
      }
6935
6936
      return str_replace($cp1252ToUtf8Keys, $cp1252ToUtf8Values, $str);
6937
    }
6938
  }
6939
6940
  /**
6941
   * fix -> utf8-win1252 chars
6942
   *
6943
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
6944
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
6945
   * See: http://en.wikipedia.org/wiki/Windows-1252
6946
   *
6947
   * @deprecated use "UTF8::fix_simple_utf8()"
6948
   *
6949
   * @param   string $str
6950
   *
6951
   * @return  string
6952
   */
6953
  public static function utf8_fix_win1252_chars($str)
6954
  {
6955
    return self::fix_simple_utf8($str);
6956
  }
6957
6958
  /**
6959
   * Returns an array with all utf8 whitespace characters.
6960
   *
6961
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6962
   *
6963
   * @author: Derek E. [email protected]
6964
   *
6965
   * @return array an array with all known whitespace characters as values and the type of whitespace as keys
6966
   *         as defined in above URL
6967
   */
6968
  public static function whitespace_table()
6969
  {
6970
    return self::$whitespaceTable;
6971
  }
6972
6973
  /**
6974
   * Limit the number of words in a string.
6975
   *
6976
   * @param  string $str
6977
   * @param  int    $words
6978
   * @param  string $strAddOn
6979
   *
6980
   * @return string
6981
   */
6982
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6983
  {
6984
    $str = (string)$str;
6985
6986
    if (!isset($str[0])) {
6987
      return '';
6988
    }
6989
6990
    $words = (int)$words;
6991
6992
    if ($words < 1) {
6993
      return '';
6994
    }
6995
6996
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6997
6998
    if (
6999
        !isset($matches[0])
7000
        ||
7001
        self::strlen($str) === self::strlen($matches[0])
7002
    ) {
7003
      return $str;
7004
    }
7005
7006
    return self::rtrim($matches[0]) . $strAddOn;
7007
  }
7008
7009
  /**
7010
   * Wraps a string to a given number of characters
7011
   *
7012
   * @link  http://php.net/manual/en/function.wordwrap.php
7013
   *
7014
   * @param string $str   <p>
7015
   *                      The input string.
7016
   *                      </p>
7017
   * @param int    $width [optional] <p>
7018
   *                      The column width.
7019
   *                      </p>
7020
   * @param string $break [optional] <p>
7021
   *                      The line is broken using the optional
7022
   *                      break parameter.
7023
   *                      </p>
7024
   * @param bool   $cut   [optional] <p>
7025
   *                      If the cut is set to true, the string is
7026
   *                      always wrapped at or before the specified width. So if you have
7027
   *                      a word that is larger than the given width, it is broken apart.
7028
   *                      (See second example).
7029
   *                      </p>
7030
   *
7031
   * @return string the given string wrapped at the specified column.
7032
   * @since 4.0.2
7033
   * @since 5.0
7034
   */
7035
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7036
  {
7037
    $str = (string)$str;
7038
    $break = (string)$break;
7039
7040
    if (!isset($str[0], $break[0])) {
7041
      return '';
7042
    }
7043
7044
    $w = '';
7045
    $strSplit = explode($break, $str);
7046
    $count = count($strSplit);
7047
7048
    if (1 === $count && '' === $strSplit[0]) {
7049
      return '';
7050
    }
7051
7052
    $chars = array();
7053
    /** @noinspection ForeachInvariantsInspection */
7054
    for ($i = 0; $i < $count; ++$i) {
7055
7056
      if ($i) {
7057
        $chars[] = $break;
7058
        $w .= '#';
7059
      }
7060
7061
      $c = $strSplit[$i];
7062
      unset($strSplit[$i]);
7063
7064
      foreach (self::split($c) as $c) {
7065
        $chars[] = $c;
7066
        $w .= ' ' === $c ? ' ' : '?';
7067
      }
7068
    }
7069
7070
    $strReturn = '';
7071
    $j = 0;
7072
    $b = $i = -1;
7073
    $w = wordwrap($w, $width, '#', $cut);
7074
7075
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7076
      for (++$i; $i < $b; ++$i) {
7077
        $strReturn .= $chars[$j];
7078
        unset($chars[$j++]);
7079
      }
7080
7081
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7082
        unset($chars[$j++]);
7083
      }
7084
7085
      $strReturn .= $break;
7086
    }
7087
7088
    return $strReturn . implode('', $chars);
7089
  }
7090
7091
  /**
7092
   * Returns an array of Unicode White Space characters.
7093
   *
7094
   * @return   array An array with numeric code point as key and White Space Character as value.
7095
   */
7096
  public static function ws()
7097
  {
7098
    return self::$whitespace;
7099
  }
7100
7101
}
7102