Completed
Push — master ( 917948...b1c244 )
by Lars
30:27 queued 15:29
created

UTF8::hex_to_chr()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $utf8ToWin1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $utf8MSWord = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $iconvEncoding = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $support = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    $str = (string)$str;
828
    $pos = (int)$pos;
829
830
    if (!isset($str[0])) {
831
      return '';
832
    }
833
834 1
    if ($pos < 0) {
835
      return '';
836 1
    }
837 1
838 1
    return self::substr($str, $pos, 1);
839
  }
840 1
841
  /**
842
   * Prepends UTF-8 BOM character to the string and returns the whole string.
843
   *
844
   * INFO: If BOM already existed there, the Input string is returned.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>The output string that contains BOM.</p>
849
   */
850 1
  public static function add_bom_to_string($str)
851
  {
852 1
    if (self::string_has_bom($str) === false) {
853
      $str = self::bom() . $str;
854
    }
855
856
    return $str;
857
  }
858
859
  /**
860 2
   * Convert binary into an string.
861
   *
862 2
   * @param mixed $bin 1|0
863
   *
864
   * @return string
865
   */
866
  public static function binary_to_str($bin)
867
  {
868
    if (!isset($bin[0])) {
869
      return '';
870
    }
871
872
    return pack('H*', base_convert($bin, 2, 16));
873
  }
874 1
875
  /**
876 1
   * Returns the UTF-8 Byte Order Mark Character.
877
   *
878
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
879
   *
880
   * @return string UTF-8 Byte Order Mark
881
   */
882
  public static function bom()
883
  {
884 2
    return "\xef\xbb\xbf";
885
  }
886 2
887
  /**
888 1
   * @alias of UTF8::chr_map()
889
   *
890 1
   * @see   UTF8::chr_map()
891 1
   *
892 1
   * @param string|array $callback
893 1
   * @param string       $str
894 1
   *
895 1
   * @return array
896 2
   */
897
  public static function callback($callback, $str)
898
  {
899
    return self::chr_map($callback, $str);
900
  }
901
902
  /**
903
   * This method will auto-detect your server environment for UTF-8 support.
904
   *
905
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
906
   */
907 9
  public static function checkForSupport()
908
  {
909 9
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
910 9
911 1
      self::$support['already_checked_via_portable_utf8'] = true;
912
913
      // http://php.net/manual/en/book.mbstring.php
914 9
      self::$support['mbstring'] = self::mbstring_loaded();
915
916
      // http://php.net/manual/en/book.iconv.php
917
      self::$support['iconv'] = self::iconv_loaded();
918 9
919
      // http://php.net/manual/en/book.intl.php
920
      self::$support['intl'] = self::intl_loaded();
921
922
      // http://php.net/manual/en/class.intlchar.php
923 9
      self::$support['intlChar'] = self::intlChar_loaded();
924 9
925 8
      // http://php.net/manual/en/book.pcre.php
926
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
927
    }
928
  }
929 8
930 6
  /**
931
   * Generates a UTF-8 encoded character from the given code point.
932
   *
933 7
   * INFO: opposite to UTF8::ord()
934 6
   *
935 6
   * @param int    $code_point <p>The code point for which to generate a character.</p>
936
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
937
   *
938 7
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
939 7
   */
940 7
  public static function chr($code_point, $encoding = 'UTF-8')
941 7
  {
942
    $i = (int)$code_point;
943
    if ($i !== $code_point) {
944 1
      return null;
945 1
    }
946 1
947 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
948 1
      self::checkForSupport();
949
    }
950
951
    if ($encoding !== 'UTF-8') {
952
      $encoding = self::normalize_encoding($encoding);
953
    } elseif (self::$support['intlChar'] === true) {
954
      return \IntlChar::chr($code_point);
955
    }
956
957
    // use static cache, if there is no support for "IntlChar"
958
    static $cache = array();
959
    $cacheKey = $code_point . $encoding;
960
    if (isset($cache[$cacheKey]) === true) {
961
      return $cache[$cacheKey];
962
    }
963 1
964
    if (0x80 > $code_point %= 0x200000) {
965 1
      $str = chr($code_point);
966
    } elseif (0x800 > $code_point) {
967 1
      $str = chr(0xC0 | $code_point >> 6) .
968
             chr(0x80 | $code_point & 0x3F);
969
    } elseif (0x10000 > $code_point) {
970
      $str = chr(0xE0 | $code_point >> 12) .
971
             chr(0x80 | $code_point >> 6 & 0x3F) .
972
             chr(0x80 | $code_point & 0x3F);
973
    } else {
974
      $str = chr(0xF0 | $code_point >> 18) .
975
             chr(0x80 | $code_point >> 12 & 0x3F) .
976
             chr(0x80 | $code_point >> 6 & 0x3F) .
977
             chr(0x80 | $code_point & 0x3F);
978
    }
979
980
    if ($encoding !== 'UTF-8') {
981
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
982 4
    }
983
984 4
    // add into static cache
985 3
    $cache[$cacheKey] = $str;
986
987
    return $str;
988 4
  }
989
990
  /**
991
   * Applies callback to all characters of a string.
992
   *
993
   * @param string|array $callback <p>The callback function.</p>
994
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
995
   *
996
   * @return array <p>The outcome of callback.</p>
997
   */
998 2
  public static function chr_map($callback, $str)
999
  {
1000 2
    $chars = self::split($str);
1001 2
1002 2
    return array_map($callback, $chars);
1003
  }
1004 2
1005
  /**
1006 2
   * Generates an array of byte length of each character of a Unicode string.
1007
   *
1008
   * 1 byte => U+0000  - U+007F
1009 2
   * 2 byte => U+0080  - U+07FF
1010
   * 3 byte => U+0800  - U+FFFF
1011 2
   * 4 byte => U+10000 - U+10FFFF
1012 2
   *
1013 2
   * @param string $str <p>The original Unicode string.</p>
1014
   *
1015 1
   * @return array <p>An array of byte lengths of each character.</p>
1016 1
   */
1017 1
  public static function chr_size_list($str)
1018
  {
1019
    $str = (string)$str;
1020
1021
    if (!isset($str[0])) {
1022
      return array();
1023 2
    }
1024
1025 2
    return array_map('strlen', self::split($str));
1026 2
  }
1027
1028 2
  /**
1029
   * Get a decimal code representation of a specific character.
1030
   *
1031
   * @param string $char <p>The input character.</p>
1032
   *
1033
   * @return int
1034
   */
1035
  public static function chr_to_decimal($char)
1036
  {
1037
    $char = (string)$char;
1038
    $code = self::ord($char[0]);
1039 1
    $bytes = 1;
1040
1041 1
    if (!($code & 0x80)) {
1042
      // 0xxxxxxx
1043
      return $code;
1044
    }
1045
1046
    if (($code & 0xe0) === 0xc0) {
1047
      // 110xxxxx
1048
      $bytes = 2;
1049
      $code &= ~0xc0;
1050
    } elseif (($code & 0xf0) === 0xe0) {
1051
      // 1110xxxx
1052
      $bytes = 3;
1053 1
      $code &= ~0xe0;
1054
    } elseif (($code & 0xf8) === 0xf0) {
1055 1
      // 11110xxx
1056
      $bytes = 4;
1057
      $code &= ~0xf0;
1058
    }
1059
1060
    for ($i = 2; $i <= $bytes; $i++) {
1061
      // 10xxxxxx
1062
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1063
    }
1064
1065
    return $code;
1066
  }
1067
1068
  /**
1069
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1070
   *
1071 44
   * @param string $char <p>The input character</p>
1072
   * @param string $pfix [optional]
1073
   *
1074
   * @return string <p>The code point encoded as U+xxxx<p>
1075
   */
1076
  public static function chr_to_hex($char, $pfix = 'U+')
1077
  {
1078
    if ($char === '&#0;') {
1079
      $char = '';
1080
    }
1081
1082
    return self::int_to_hex(self::ord($char), $pfix);
1083
  }
1084
1085
  /**
1086 44
   * alias for "UTF8::chr_to_decimal()"
1087 44
   *
1088
   * @see UTF8::chr_to_decimal()
1089 44
   *
1090 44
   * @param string $chr
1091
   *
1092 44
   * @return int
1093 17
   */
1094 17
  public static function chr_to_int($chr)
1095
  {
1096 44
    return self::chr_to_decimal($chr);
1097 12
  }
1098 12
1099
  /**
1100 44
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1101 5
   *
1102 5
   * @param string $body     <p>The original string to be split.</p>
1103
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1104 44
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1105
   *
1106
   * @return string <p>The chunked string</p>
1107
   */
1108
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1109
  {
1110
    return implode($end, self::split($body, $chunklen));
1111
  }
1112
1113
  /**
1114 4
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1115
   *
1116 4
   * @param string $str                     <p>The string to be sanitized.</p>
1117
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1118 4
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1119 1
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1120
   *                                        => "..."</p>
1121
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1122
   *                                        $normalize_whitespace</p>
1123 4
   *
1124
   * @return string <p>Clean UTF-8 encoded string.</p>
1125
   */
1126
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1127
  {
1128
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1129
    // caused connection reset problem on larger strings
1130 4
1131
    $regx = '/
1132 4
      (
1133
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1134
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1135
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1136
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1137
        ){1,100}                      # ...one or more times
1138
      )
1139
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1140
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1141
    /x';
1142
    $str = preg_replace($regx, '$1', $str);
1143
1144
    $str = self::replace_diamond_question_mark($str, '');
1145
    $str = self::remove_invisible_characters($str);
1146 5
1147
    if ($normalize_whitespace === true) {
1148 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1149 5
    }
1150 5
1151
    if ($normalize_msword === true) {
1152 5
      $str = self::normalize_msword($str);
1153
    }
1154 5
1155 5
    if ($remove_bom === true) {
1156 5
      $str = self::remove_bom($str);
1157
    }
1158 5
1159
    return $str;
1160 5
  }
1161 1
1162
  /**
1163 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1164 1
   *
1165 1
   * @param string $str <p>The input string.</p>
1166
   *
1167 1
   * @return string
1168 1
   */
1169
  public static function cleanup($str)
1170 5
  {
1171
    $str = (string)$str;
1172
1173
    if (!isset($str[0])) {
1174
      return '';
1175
    }
1176
1177
    // fixed ISO <-> UTF-8 Errors
1178
    $str = self::fix_simple_utf8($str);
1179
1180
    // remove all none UTF-8 symbols
1181
    // && remove diamond question mark (�)
1182 6
    // && remove remove invisible characters (e.g. "\0")
1183
    // && remove BOM
1184 6
    // && normalize whitespace chars (but keep non-breaking-spaces)
1185
    $str = self::clean($str, true, true, false, true);
1186
1187
    return (string)$str;
1188
  }
1189
1190
  /**
1191
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1192
   *
1193
   * INFO: opposite to UTF8::string()
1194 1
   *
1195
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1196 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1197 1
   *                                    default, code points will be returned as integers.</p>
1198 1
   *
1199
   * @return array <p>The array of code points.</p>
1200 1
   */
1201
  public static function codepoints($arg, $u_style = false)
1202
  {
1203
    if (is_string($arg)) {
1204
      $arg = self::split($arg);
1205
    }
1206
1207
    $arg = array_map(
1208
        array(
1209
            '\\voku\\helper\\UTF8',
1210
            'ord',
1211
        ),
1212
        $arg
1213
    );
1214
1215
    if ($u_style) {
1216 11
      $arg = array_map(
1217
          array(
1218 11
              '\\voku\\helper\\UTF8',
1219 11
              'int_to_hex',
1220
          ),
1221 11
          $arg
1222 5
      );
1223
    }
1224
1225 11
    return $arg;
1226 1
  }
1227 1
1228
  /**
1229 11
   * Returns count of characters used in a string.
1230
   *
1231
   * @param string $str       <p>The input string.</p>
1232
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1233 11
   *
1234
   * @return array <p>An associative array of Character as keys and
1235
   *               their count as values.</p>
1236 11
   */
1237
  public static function count_chars($str, $cleanUtf8 = false)
1238 1
  {
1239 11
    return array_count_values(self::split($str, 1, $cleanUtf8));
1240
  }
1241
1242
  /**
1243 11
   * Converts a int-value into an UTF-8 character.
1244
   *
1245
   * @param int $int
1246 11
   *
1247 1
   * @return string
1248 1
   */
1249 1
  public static function decimal_to_chr($int)
1250 11
  {
1251 11
    return self::html_decode('&#' . $int . ';');
1252
  }
1253
1254
  /**
1255
   * Encode a string with a new charset-encoding.
1256 2
   *
1257
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1258
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1259 1
   *
1260
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1261
   * @param string $str      <p>The input string</p>
1262 2
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1263 1
   *                         /> otherwise we auto-detect the current string-encoding</p>
1264
   *
1265
   * @return string
1266 2
   */
1267 2
  public static function encode($encoding, $str, $force = true)
1268 2
  {
1269
    $str = (string)$str;
1270 2
    $encoding = (string)$encoding;
1271
1272 2
    if (!isset($str[0], $encoding[0])) {
1273 2
      return $str;
1274
    }
1275
1276
    if ($encoding !== 'UTF-8') {
1277 1
      $encoding = self::normalize_encoding($encoding);
1278
    }
1279
1280
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1281
      self::checkForSupport();
1282
    }
1283
1284
    $encodingDetected = self::str_detect_encoding($str);
1285
1286
    if (
1287
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1288
        &&
1289
        (
1290
            $force === true
1291
            ||
1292
            $encodingDetected !== $encoding
1293
        )
1294
    ) {
1295
1296
      if (
1297
          $encoding === 'UTF-8'
1298
          &&
1299
          (
1300
              $force === true
1301
              || $encodingDetected === 'UTF-8'
1302
              || $encodingDetected === 'WINDOWS-1252'
1303
              || $encodingDetected === 'ISO-8859-1'
1304
          )
1305
      ) {
1306
        return self::to_utf8($str);
1307
      }
1308
1309
      if (
1310
          $encoding === 'ISO-8859-1'
1311
          &&
1312
          (
1313
              $force === true
1314
              || $encodingDetected === 'ISO-8859-1'
1315
              || $encodingDetected === 'UTF-8'
1316
          )
1317
      ) {
1318
        return self::to_iso8859($str);
1319
      }
1320
1321 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1322
          $encoding !== 'UTF-8'
1323
          &&
1324
          $encoding !== 'WINDOWS-1252'
1325
          &&
1326
          self::$support['mbstring'] === false
1327
      ) {
1328
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1329
      }
1330
1331
      $strEncoded = \mb_convert_encoding(
1332
          $str,
1333
          $encoding,
1334
          $encodingDetected
1335
      );
1336
1337
      if ($strEncoded) {
1338
        return $strEncoded;
1339
      }
1340
    }
1341
1342
    return $str;
1343
  }
1344
1345
  /**
1346
   * Reads entire file into a string.
1347
   *
1348
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1349
   *
1350
   * @link http://php.net/manual/en/function.file-get-contents.php
1351
   *
1352
   * @param string        $filename      <p>
1353
   *                                     Name of the file to read.
1354
   *                                     </p>
1355
   * @param int|null      $flags         [optional] <p>
1356
   *                                     Prior to PHP 6, this parameter is called
1357
   *                                     use_include_path and is a bool.
1358
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1359
   *                                     to trigger include path
1360
   *                                     search.
1361
   *                                     </p>
1362 2
   *                                     <p>
1363
   *                                     The value of flags can be any combination of
1364
   *                                     the following flags (with some restrictions), joined with the
1365 2
   *                                     binary OR (|)
1366 2
   *                                     operator.
1367
   *                                     </p>
1368 2
   *                                     <p>
1369 2
   *                                     <table>
1370
   *                                     Available flags
1371
   *                                     <tr valign="top">
1372
   *                                     <td>Flag</td>
1373 2
   *                                     <td>Description</td>
1374 2
   *                                     </tr>
1375
   *                                     <tr valign="top">
1376 2
   *                                     <td>
1377 2
   *                                     FILE_USE_INCLUDE_PATH
1378
   *                                     </td>
1379 2
   *                                     <td>
1380 1
   *                                     Search for filename in the include directory.
1381 1
   *                                     See include_path for more
1382 2
   *                                     information.
1383
   *                                     </td>
1384
   *                                     </tr>
1385
   *                                     <tr valign="top">
1386 2
   *                                     <td>
1387 1
   *                                     FILE_TEXT
1388
   *                                     </td>
1389
   *                                     <td>
1390 1
   *                                     As of PHP 6, the default encoding of the read
1391 1
   *                                     data is UTF-8. You can specify a different encoding by creating a
1392 1
   *                                     custom context or by changing the default using
1393 1
   *                                     stream_default_encoding. This flag cannot be
1394
   *                                     used with FILE_BINARY.
1395 1
   *                                     </td>
1396
   *                                     </tr>
1397
   *                                     <tr valign="top">
1398
   *                                     <td>
1399
   *                                     FILE_BINARY
1400
   *                                     </td>
1401
   *                                     <td>
1402
   *                                     With this flag, the file is read in binary mode. This is the default
1403
   *                                     setting and cannot be used with FILE_TEXT.
1404
   *                                     </td>
1405 1
   *                                     </tr>
1406
   *                                     </table>
1407 1
   *                                     </p>
1408
   * @param resource|null $context       [optional] <p>
1409
   *                                     A valid context resource created with
1410
   *                                     stream_context_create. If you don't need to use a
1411
   *                                     custom context, you can skip this parameter by &null;.
1412
   *                                     </p>
1413
   * @param int|null      $offset        [optional] <p>
1414
   *                                     The offset where the reading starts.
1415
   *                                     </p>
1416
   * @param int|null      $maxlen        [optional] <p>
1417
   *                                     Maximum length of data read. The default is to read until end
1418
   *                                     of file is reached.
1419 9
   *                                     </p>
1420
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1421 9
   *
1422 9
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1423 3
   *                                     or pdf, because they used non default utf-8 chars</p>
1424
   *
1425 3
   * @return string <p>The function returns the read data or false on failure.</p>
1426 3
   */
1427 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1428 9
  {
1429 2
    // init
1430 2
    $timeout = (int)$timeout;
1431 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1432 2
1433 9
    if ($timeout && $context === null) {
1434
      $context = stream_context_create(
1435 8
          array(
1436
              'http' =>
1437 2
                  array(
1438 2
                      'timeout' => $timeout,
1439
                  ),
1440 8
          )
1441
      );
1442 8
    }
1443 6
1444 6
    if (is_int($maxlen)) {
1445 6
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1446
    } else {
1447 6
      $data = file_get_contents($filename, $flags, $context, $offset);
1448 3
    }
1449 3
1450 5
    // return false on error
1451
    if ($data === false) {
1452
      return false;
1453
    }
1454
1455 8
    if ($convertToUtf8 === true) {
1456 8
      $data = self::encode('UTF-8', $data, false);
1457 5
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1458 8
    }
1459
1460
    return $data;
1461 2
  }
1462 2
1463 8
  /**
1464 8
   * Checks if a file starts with BOM (Byte Order Mark) character.
1465 9
   *
1466
   * @param string $file_path <p>Path to a valid file.</p>
1467 9
   *
1468
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1469
   */
1470
  public static function file_has_bom($file_path)
1471
  {
1472
    return self::string_has_bom(file_get_contents($file_path));
1473
  }
1474
1475
  /**
1476
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1477
   *
1478
   * @param mixed  $var
1479
   * @param int    $normalization_form
1480
   * @param string $leading_combining
1481
   *
1482
   * @return mixed
1483
   */
1484
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1485
  {
1486
    switch (gettype($var)) {
1487 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1488
        foreach ($var as $k => $v) {
1489
          /** @noinspection AlterInForeachInspection */
1490
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1491
        }
1492
        break;
1493 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1494
        foreach ($var as $k => $v) {
1495
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1496
        }
1497
        break;
1498
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1499
1500
        if (false !== strpos($var, "\r")) {
1501
          // Workaround https://bugs.php.net/65732
1502
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1503
        }
1504
1505
        if (self::is_ascii($var) === false) {
1506
          /** @noinspection PhpUndefinedClassInspection */
1507
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1508
            $n = '-';
1509
          } else {
1510
            /** @noinspection PhpUndefinedClassInspection */
1511
            $n = \Normalizer::normalize($var, $normalization_form);
1512
1513
            if (isset($n[0])) {
1514
              $var = $n;
1515
            } else {
1516
              $var = self::encode('UTF-8', $var);
1517
            }
1518
          }
1519
1520 1
          if (
1521
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1522 1
              &&
1523 1
              preg_match('/^\p{Mn}/u', $var)
1524 1
          ) {
1525 1
            // Prevent leading combining chars
1526
            // for NFC-safe concatenations.
1527
            $var = $leading_combining . $var;
1528 1
          }
1529
        }
1530
1531
        break;
1532
    }
1533
1534
    return $var;
1535
  }
1536
1537
  /**
1538
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1539
   *
1540 1
   * Gets a specific external variable by name and optionally filters it
1541
   *
1542 1
   * @link  http://php.net/manual/en/function.filter-input.php
1543 1
   *
1544 1
   * @param int    $type          <p>
1545 1
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1546
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1547
   *                              <b>INPUT_ENV</b>.
1548 1
   *                              </p>
1549
   * @param string $variable_name <p>
1550
   *                              Name of a variable to get.
1551
   *                              </p>
1552
   * @param int    $filter        [optional] <p>
1553
   *                              The ID of the filter to apply. The
1554
   *                              manual page lists the available filters.
1555
   *                              </p>
1556
   * @param mixed  $options       [optional] <p>
1557
   *                              Associative array of options or bitwise disjunction of flags. If filter
1558
   *                              accepts options, flags can be provided in "flags" field of array.
1559 1
   *                              </p>
1560
   *
1561 1
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1562
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1563
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1564
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1565
   * @since 5.2.0
1566
   */
1567 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1568
  {
1569
    if (4 > func_num_args()) {
1570
      $var = filter_input($type, $variable_name, $filter);
1571
    } else {
1572
      $var = filter_input($type, $variable_name, $filter, $options);
1573
    }
1574
1575
    return self::filter($var);
1576
  }
1577 7
1578
  /**
1579 7
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1580 7
   *
1581
   * Gets external variables and optionally filters them
1582 7
   *
1583
   * @link  http://php.net/manual/en/function.filter-input-array.php
1584 7
   *
1585 2
   * @param int   $type       <p>
1586
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1587
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1588 7
   *                          <b>INPUT_ENV</b>.
1589 1
   *                          </p>
1590 1
   * @param mixed $definition [optional] <p>
1591 1
   *                          An array defining the arguments. A valid key is a string
1592
   *                          containing a variable name and a valid value is either a filter type, or an array
1593 7
   *                          optionally specifying the filter, flags and options. If the value is an
1594
   *                          array, valid keys are filter which specifies the
1595
   *                          filter type,
1596
   *                          flags which specifies any flags that apply to the
1597
   *                          filter, and options which specifies any options that
1598
   *                          apply to the filter. See the example below for a better understanding.
1599
   *                          </p>
1600
   *                          <p>
1601
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1602
   *                          input array are filtered by this filter.
1603 1
   *                          </p>
1604
   * @param bool  $add_empty  [optional] <p>
1605 1
   *                          Add missing keys as <b>NULL</b> to the return value.
1606
   *                          </p>
1607 1
   *
1608
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1609
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1610 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1611 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1612
   * fails.
1613 1
   * @since 5.2.0
1614
   */
1615 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1616 1
  {
1617 1
    if (2 > func_num_args()) {
1618 1
      $a = filter_input_array($type);
1619 1
    } else {
1620 1
      $a = filter_input_array($type, $definition, $add_empty);
1621
    }
1622 1
1623
    return self::filter($a);
1624
  }
1625
1626
  /**
1627
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1628
   *
1629
   * Filters a variable with a specified filter
1630
   *
1631
   * @link  http://php.net/manual/en/function.filter-var.php
1632 1
   *
1633
   * @param mixed $variable <p>
1634 1
   *                        Value to filter.
1635
   *                        </p>
1636
   * @param int   $filter   [optional] <p>
1637
   *                        The ID of the filter to apply. The
1638 1
   *                        manual page lists the available filters.
1639
   *                        </p>
1640
   * @param mixed $options  [optional] <p>
1641
   *                        Associative array of options or bitwise disjunction of flags. If filter
1642
   *                        accepts options, flags can be provided in "flags" field of array. For
1643
   *                        the "callback" filter, callable type should be passed. The
1644
   *                        callback must accept one argument, the value to be filtered, and return
1645
   *                        the value after filtering/sanitizing it.
1646
   *                        </p>
1647
   *                        <p>
1648
   *                        <code>
1649
   *                        // for filters that accept options, use this format
1650
   *                        $options = array(
1651
   *                        'options' => array(
1652
   *                        'default' => 3, // value to return if the filter fails
1653
   *                        // other options here
1654 1
   *                        'min_range' => 0
1655
   *                        ),
1656 1
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1657 1
   *                        );
1658
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1659
   *                        // for filter that only accept flags, you can pass them directly
1660 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1661
   *                        // for filter that only accept flags, you can also pass as an array
1662 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1663 1
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1664 1
   *                        // callback validate filter
1665 1
   *                        function foo($value)
1666 1
   *                        {
1667 1
   *                        // Expected format: Surname, GivenNames
1668 1
   *                        if (strpos($value, ", ") === false) return false;
1669 1
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1670 1
   *                        $empty = (empty($surname) || empty($givennames));
1671 1
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1672 1
   *                        if ($empty || $notstrings) {
1673
   *                        return false;
1674
   *                        } else {
1675
   *                        return $value;
1676
   *                        }
1677
   *                        }
1678
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1679
   *                        </code>
1680
   *                        </p>
1681
   *
1682
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1683
   * @since 5.2.0
1684
   */
1685 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1686
  {
1687
    if (3 > func_num_args()) {
1688
      $variable = filter_var($variable, $filter);
1689
    } else {
1690
      $variable = filter_var($variable, $filter, $options);
1691
    }
1692 1
1693 1
    return self::filter($variable);
1694
  }
1695
1696
  /**
1697
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1698
   *
1699
   * Gets multiple variables and optionally filters them
1700
   *
1701
   * @link  http://php.net/manual/en/function.filter-var-array.php
1702
   *
1703
   * @param array $data       <p>
1704
   *                          An array with string keys containing the data to filter.
1705
   *                          </p>
1706
   * @param mixed $definition [optional] <p>
1707
   *                          An array defining the arguments. A valid key is a string
1708
   *                          containing a variable name and a valid value is either a
1709
   *                          filter type, or an
1710
   *                          array optionally specifying the filter, flags and options.
1711
   *                          If the value is an array, valid keys are filter
1712
   *                          which specifies the filter type,
1713
   *                          flags which specifies any flags that apply to the
1714
   *                          filter, and options which specifies any options that
1715
   *                          apply to the filter. See the example below for a better understanding.
1716
   *                          </p>
1717
   *                          <p>
1718
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
   *                          input array are filtered by this filter.
1720
   *                          </p>
1721
   * @param bool  $add_empty  [optional] <p>
1722
   *                          Add missing keys as <b>NULL</b> to the return value.
1723
   *                          </p>
1724
   *
1725
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1726
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1727
   * the variable is not set.
1728
   * @since 5.2.0
1729
   */
1730 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1731
  {
1732
    if (2 > func_num_args()) {
1733
      $a = filter_var_array($data);
1734
    } else {
1735
      $a = filter_var_array($data, $definition, $add_empty);
1736
    }
1737
1738
    return self::filter($a);
1739
  }
1740
1741
  /**
1742
   * Check if the number of unicode characters are not more than the specified integer.
1743
   *
1744
   * @param string $str      The original string to be checked.
1745
   * @param int    $box_size The size in number of chars to be checked against string.
1746
   *
1747
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1748
   */
1749
  public static function fits_inside($str, $box_size)
1750
  {
1751
    return (self::strlen($str) <= $box_size);
1752 1
  }
1753
1754 1
  /**
1755 1
   * Try to fix simple broken UTF-8 strings.
1756
   *
1757 1
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1758
   *
1759
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1760
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1761
   * See: http://en.wikipedia.org/wiki/Windows-1252
1762
   *
1763
   * @param string $str <p>The input string</p>
1764
   *
1765
   * @return string
1766
   */
1767 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1768
  {
1769
    // init
1770
    $str = (string)$str;
1771
1772 1
    if (!isset($str[0])) {
1773
      return '';
1774 1
    }
1775
1776
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1777
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1778
1779
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1780
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1781
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1782
    }
1783
1784
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1785
  }
1786 1
1787
  /**
1788 1
   * Fix a double (or multiple) encoded UTF8 string.
1789 1
   *
1790
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1791
   *
1792 1
   * @return mixed
1793 1
   */
1794
  public static function fix_utf8($str)
1795
  {
1796 1
    if (is_array($str)) {
1797
1798
      /** @noinspection ForeachSourceInspection */
1799
      foreach ($str as $k => $v) {
1800
        /** @noinspection AlterInForeachInspection */
1801
        /** @noinspection OffsetOperationsInspection */
1802
        $str[$k] = self::fix_utf8($v);
1803
      }
1804
1805
      return $str;
1806
    }
1807
1808
    $last = '';
1809
    while ($last !== $str) {
1810 1
      $last = $str;
1811
      $str = self::to_utf8(
1812 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1811 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1813
      );
1814
    }
1815
1816
    return $str;
1817
  }
1818
1819
  /**
1820
   * Get character of a specific character.
1821
   *
1822
   * @param string $char
1823
   *
1824
   * @return string <p>'RTL' or 'LTR'</p>
1825
   */
1826 2
  public static function getCharDirection($char)
1827
  {
1828
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1829 2
      self::checkForSupport();
1830
    }
1831 2
1832 2
    if (self::$support['intlChar'] === true) {
1833 1
      $tmpReturn = \IntlChar::charDirection($char);
1834 1
1835
      // from "IntlChar"-Class
1836 2
      $charDirection = array(
1837 1
          'RTL' => array(1, 13, 14, 15, 21),
1838 1
          'LTR' => array(0, 11, 12, 20),
1839
      );
1840 2
1841 2
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1842 2
        return 'LTR';
1843
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1844 2
        return 'RTL';
1845
      }
1846
    }
1847
1848
    $c = static::chr_to_decimal($char);
1849
1850
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1851
      return 'LTR';
1852
    }
1853
1854
    if (0x85e >= $c) {
1855
1856
      if (0x5be === $c ||
1857
          0x5c0 === $c ||
1858
          0x5c3 === $c ||
1859
          0x5c6 === $c ||
1860
          (0x5d0 <= $c && 0x5ea >= $c) ||
1861
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1862
          0x608 === $c ||
1863
          0x60b === $c ||
1864
          0x60d === $c ||
1865
          0x61b === $c ||
1866
          (0x61e <= $c && 0x64a >= $c) ||
1867
          (0x66d <= $c && 0x66f >= $c) ||
1868
          (0x671 <= $c && 0x6d5 >= $c) ||
1869
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1870
          (0x6ee <= $c && 0x6ef >= $c) ||
1871
          (0x6fa <= $c && 0x70d >= $c) ||
1872
          0x710 === $c ||
1873
          (0x712 <= $c && 0x72f >= $c) ||
1874
          (0x74d <= $c && 0x7a5 >= $c) ||
1875
          0x7b1 === $c ||
1876
          (0x7c0 <= $c && 0x7ea >= $c) ||
1877
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1878
          0x7fa === $c ||
1879
          (0x800 <= $c && 0x815 >= $c) ||
1880
          0x81a === $c ||
1881
          0x824 === $c ||
1882
          0x828 === $c ||
1883
          (0x830 <= $c && 0x83e >= $c) ||
1884
          (0x840 <= $c && 0x858 >= $c) ||
1885
          0x85e === $c
1886
      ) {
1887
        return 'RTL';
1888
      }
1889
1890
    } elseif (0x200f === $c) {
1891
1892
      return 'RTL';
1893
1894
    } elseif (0xfb1d <= $c) {
1895
1896
      if (0xfb1d === $c ||
1897
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1898
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1899
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1900
          0xfb3e === $c ||
1901
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1902
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1903
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1904
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1905
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1906
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1907
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1908
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1909
          (0xfe76 <= $c && 0xfefc >= $c) ||
1910
          (0x10800 <= $c && 0x10805 >= $c) ||
1911
          0x10808 === $c ||
1912
          (0x1080a <= $c && 0x10835 >= $c) ||
1913
          (0x10837 <= $c && 0x10838 >= $c) ||
1914
          0x1083c === $c ||
1915
          (0x1083f <= $c && 0x10855 >= $c) ||
1916
          (0x10857 <= $c && 0x1085f >= $c) ||
1917
          (0x10900 <= $c && 0x1091b >= $c) ||
1918
          (0x10920 <= $c && 0x10939 >= $c) ||
1919
          0x1093f === $c ||
1920
          0x10a00 === $c ||
1921
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1922
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1923
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1924
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1925
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1926 9
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1927
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1928 9
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1929
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1930 9
          (0x10b78 <= $c && 0x10b7f >= $c)
1931 6
      ) {
1932
        return 'RTL';
1933
      }
1934 9
    }
1935 7
1936
    return 'LTR';
1937
  }
1938
1939 9
  /**
1940 9
   * get data from "/data/*.ser"
1941
   *
1942 9
   * @param string $file
1943 9
   *
1944 9
   * @return bool|string|array|int <p>Will return false on error.</p>
1945 9
   */
1946 9
  private static function getData($file)
1947 6
  {
1948
    $file = __DIR__ . '/data/' . $file . '.php';
1949
    if (file_exists($file)) {
1950 9
      /** @noinspection PhpIncludeInspection */
1951 2
      return require $file;
1952 2
    } else {
1953
      return false;
1954 9
    }
1955 4
  }
1956 4
1957 4
  /**
1958
   * alias for "UTF8::string_has_bom()"
1959
   *
1960 4
   * @see UTF8::string_has_bom()
1961
   *
1962
   * @param string $str
1963 9
   *
1964
   * @return bool
1965 9
   *
1966 9
   * @deprecated
1967
   */
1968 7
  public static function hasBom($str)
1969
  {
1970 7
    return self::string_has_bom($str);
1971 6
  }
1972
1973 4
  /**
1974
   * Converts a hexadecimal-value into an UTF-8 character.
1975 9
   *
1976
   * @param string $hexdec <p>The hexadecimal value.</p>
1977 9
   *
1978
   * @return string|false <p>One single UTF-8 character.</p>
1979
   */
1980 9
  public static function hex_to_chr($hexdec)
1981 9
  {
1982 9
    return self::decimal_to_chr(hexdec($hexdec));
1983
  }
1984 9
1985
  /**
1986 9
   * Converts hexadecimal U+xxxx code point representation to integer.
1987
   *
1988 9
   * INFO: opposite to UTF8::int_to_hex()
1989
   *
1990
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
1991
   *
1992
   * @return int|false <p>The code point, or false on failure.</p>
1993
   */
1994
  public static function hex_to_int($hexdec)
1995
  {
1996
    if (!$hexdec) {
1997
      return false;
1998
    }
1999
2000
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2001
      return intval($match[1], 16);
2002
    }
2003
2004
    return false;
2005
  }
2006
2007
  /**
2008
   * alias for "UTF8::html_entity_decode()"
2009
   *
2010
   * @see UTF8::html_entity_decode()
2011
   *
2012
   * @param string $str
2013
   * @param int    $flags
2014
   * @param string $encoding
2015
   *
2016
   * @return string
2017
   */
2018
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2019
  {
2020
    return self::html_entity_decode($str, $flags, $encoding);
2021
  }
2022
2023
  /**
2024
   * Converts a UTF-8 string to a series of HTML numbered entities.
2025
   *
2026
   * INFO: opposite to UTF8::html_decode()
2027
   *
2028
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2029
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2030
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2031
   *
2032
   * @return string <p>HTML numbered entities.</p>
2033
   */
2034
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2035
  {
2036
    // init
2037
    $str = (string)$str;
2038
2039
    if (!isset($str[0])) {
2040
      return '';
2041
    }
2042
2043
    if ($encoding !== 'UTF-8') {
2044
      $encoding = self::normalize_encoding($encoding);
2045
    }
2046
2047
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2048
    if (function_exists('mb_encode_numericentity')) {
2049
2050
      $startCode = 0x00;
2051
      if ($keepAsciiChars === true) {
2052
        $startCode = 0x80;
2053
      }
2054
2055
      return mb_encode_numericentity(
2056
          $str,
2057
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2058
          $encoding
2059
      );
2060
    }
2061
2062
    return implode(
2063
        '',
2064
        array_map(
2065
            function ($data) use ($keepAsciiChars, $encoding) {
2066
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2067
            },
2068
            self::split($str)
2069
        )
2070
    );
2071
  }
2072
2073
  /**
2074
   * UTF-8 version of html_entity_decode()
2075
   *
2076
   * The reason we are not using html_entity_decode() by itself is because
2077
   * while it is not technically correct to leave out the semicolon
2078
   * at the end of an entity most browsers will still interpret the entity
2079
   * correctly. html_entity_decode() does not convert entities without
2080
   * semicolons, so we are left with our own little solution here. Bummer.
2081
   *
2082
   * Convert all HTML entities to their applicable characters
2083
   *
2084
   * INFO: opposite to UTF8::html_encode()
2085
   *
2086
   * @link http://php.net/manual/en/function.html-entity-decode.php
2087
   *
2088
   * @param string $str      <p>
2089
   *                         The input string.
2090
   *                         </p>
2091
   * @param int    $flags    [optional] <p>
2092
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2093
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2094 2
   *                         <table>
2095
   *                         Available <i>flags</i> constants
2096 2
   *                         <tr valign="top">
2097 1
   *                         <td>Constant Name</td>
2098 1
   *                         <td>Description</td>
2099
   *                         </tr>
2100 2
   *                         <tr valign="top">
2101
   *                         <td><b>ENT_COMPAT</b></td>
2102 2
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2103 1
   *                         </tr>
2104
   *                         <tr valign="top">
2105
   *                         <td><b>ENT_QUOTES</b></td>
2106 2
   *                         <td>Will convert both double and single quotes.</td>
2107 2
   *                         </tr>
2108 2
   *                         <tr valign="top">
2109 2
   *                         <td><b>ENT_NOQUOTES</b></td>
2110 2
   *                         <td>Will leave both double and single quotes unconverted.</td>
2111 1
   *                         </tr>
2112
   *                         <tr valign="top">
2113 1
   *                         <td><b>ENT_HTML401</b></td>
2114 1
   *                         <td>
2115 1
   *                         Handle code as HTML 4.01.
2116 1
   *                         </td>
2117 1
   *                         </tr>
2118 2
   *                         <tr valign="top">
2119
   *                         <td><b>ENT_XML1</b></td>
2120 2
   *                         <td>
2121
   *                         Handle code as XML 1.
2122
   *                         </td>
2123
   *                         </tr>
2124
   *                         <tr valign="top">
2125
   *                         <td><b>ENT_XHTML</b></td>
2126
   *                         <td>
2127
   *                         Handle code as XHTML.
2128
   *                         </td>
2129
   *                         </tr>
2130
   *                         <tr valign="top">
2131
   *                         <td><b>ENT_HTML5</b></td>
2132
   *                         <td>
2133
   *                         Handle code as HTML 5.
2134
   *                         </td>
2135
   *                         </tr>
2136
   *                         </table>
2137
   *                         </p>
2138
   * @param string $encoding [optional] <p>Encoding to use.</p>
2139
   *
2140
   * @return string <p>The decoded string.</p>
2141
   */
2142
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2143
  {
2144
    // init
2145
    $str = (string)$str;
2146
2147
    if (!isset($str[0])) {
2148
      return '';
2149
    }
2150
2151
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2152
      return $str;
2153
    }
2154
2155
    if (
2156
        strpos($str, '&') === false
2157
        ||
2158
        (
2159
            strpos($str, '&#') === false
2160
            &&
2161
            strpos($str, ';') === false
2162
        )
2163
    ) {
2164
      return $str;
2165
    }
2166
2167
    if ($encoding !== 'UTF-8') {
2168
      $encoding = self::normalize_encoding($encoding);
2169
    }
2170
2171
    if ($flags === null) {
2172
      if (Bootup::is_php('5.4') === true) {
2173
        $flags = ENT_COMPAT | ENT_HTML5;
2174
      } else {
2175
        $flags = ENT_COMPAT;
2176
      }
2177
    }
2178
2179
    do {
2180
      $str_compare = $str;
2181
2182
      $str = preg_replace_callback(
2183
          "/&#\d{2,6};/",
2184
          function ($matches) use ($encoding) {
2185
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2186
2187
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2188
              return $returnTmp;
2189
            } else {
2190
              return $matches[0];
2191
            }
2192
          },
2193
          $str
2194
      );
2195
2196
      // decode numeric & UTF16 two byte entities
2197
      $str = html_entity_decode(
2198
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2199
          $flags,
2200
          $encoding
2201
      );
2202
2203
    } while ($str_compare !== $str);
2204
2205
    return $str;
2206
  }
2207
2208
  /**
2209
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2210
   *
2211
   * @link http://php.net/manual/en/function.htmlentities.php
2212
   *
2213
   * @param string $str           <p>
2214
   *                              The input string.
2215
   *                              </p>
2216
   * @param int    $flags         [optional] <p>
2217
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2218
   *                              invalid code unit sequences and the used document type. The default is
2219
   *                              ENT_COMPAT | ENT_HTML401.
2220
   *                              <table>
2221
   *                              Available <i>flags</i> constants
2222
   *                              <tr valign="top">
2223
   *                              <td>Constant Name</td>
2224
   *                              <td>Description</td>
2225
   *                              </tr>
2226
   *                              <tr valign="top">
2227
   *                              <td><b>ENT_COMPAT</b></td>
2228
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2229
   *                              </tr>
2230
   *                              <tr valign="top">
2231
   *                              <td><b>ENT_QUOTES</b></td>
2232 1
   *                              <td>Will convert both double and single quotes.</td>
2233
   *                              </tr>
2234 1
   *                              <tr valign="top">
2235
   *                              <td><b>ENT_NOQUOTES</b></td>
2236
   *                              <td>Will leave both double and single quotes unconverted.</td>
2237
   *                              </tr>
2238 1
   *                              <tr valign="top">
2239
   *                              <td><b>ENT_IGNORE</b></td>
2240
   *                              <td>
2241
   *                              Silently discard invalid code unit sequences instead of returning
2242
   *                              an empty string. Using this flag is discouraged as it
2243
   *                              may have security implications.
2244
   *                              </td>
2245
   *                              </tr>
2246 1
   *                              <tr valign="top">
2247
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2248 1
   *                              <td>
2249
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2250
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2251
   *                              </td>
2252
   *                              </tr>
2253
   *                              <tr valign="top">
2254
   *                              <td><b>ENT_DISALLOWED</b></td>
2255
   *                              <td>
2256
   *                              Replace invalid code points for the given document type with a
2257
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2258
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2259
   *                              instance, to ensure the well-formedness of XML documents with
2260
   *                              embedded external content.
2261 3
   *                              </td>
2262
   *                              </tr>
2263 3
   *                              <tr valign="top">
2264 3
   *                              <td><b>ENT_HTML401</b></td>
2265
   *                              <td>
2266 3
   *                              Handle code as HTML 4.01.
2267
   *                              </td>
2268 3
   *                              </tr>
2269
   *                              <tr valign="top">
2270
   *                              <td><b>ENT_XML1</b></td>
2271
   *                              <td>
2272
   *                              Handle code as XML 1.
2273
   *                              </td>
2274
   *                              </tr>
2275
   *                              <tr valign="top">
2276
   *                              <td><b>ENT_XHTML</b></td>
2277
   *                              <td>
2278
   *                              Handle code as XHTML.
2279 1
   *                              </td>
2280
   *                              </tr>
2281 1
   *                              <tr valign="top">
2282
   *                              <td><b>ENT_HTML5</b></td>
2283
   *                              <td>
2284
   *                              Handle code as HTML 5.
2285
   *                              </td>
2286
   *                              </tr>
2287
   *                              </table>
2288
   *                              </p>
2289 2
   * @param string $encoding      [optional] <p>
2290
   *                              Like <b>htmlspecialchars</b>,
2291 2
   *                              <b>htmlentities</b> takes an optional third argument
2292
   *                              <i>encoding</i> which defines encoding used in
2293
   *                              conversion.
2294
   *                              Although this argument is technically optional, you are highly
2295
   *                              encouraged to specify the correct value for your code.
2296
   *                              </p>
2297
   * @param bool   $double_encode [optional] <p>
2298
   *                              When <i>double_encode</i> is turned off PHP will not
2299
   *                              encode existing html entities. The default is to convert everything.
2300
   *                              </p>
2301
   *
2302
   *
2303 2
   * @return string the encoded string.
2304
   * </p>
2305 2
   * <p>
2306
   * If the input <i>string</i> contains an invalid code unit
2307
   * sequence within the given <i>encoding</i> an empty string
2308
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2309
   * <b>ENT_SUBSTITUTE</b> flags are set.
2310
   */
2311
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2312
  {
2313
    if ($encoding !== 'UTF-8') {
2314
      $encoding = self::normalize_encoding($encoding);
2315
    }
2316
2317 1
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2318
2319 1
    if ($encoding !== 'UTF-8') {
2320
      return $str;
2321
    }
2322
2323
    $byteLengths = self::chr_size_list($str);
2324
    $search = array();
2325
    $replacements = array();
2326
    foreach ($byteLengths as $counter => $byteLength) {
2327
      if ($byteLength >= 3) {
2328
        $char = self::access($str, $counter);
2329
2330
        if (!isset($replacements[$char])) {
2331
          $search[$char] = $char;
2332
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2328 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2333
        }
2334
      }
2335
    }
2336
2337
    return str_replace($search, $replacements, $str);
2338
  }
2339
2340
  /**
2341
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2342
   *
2343
   * INFO: Take a look at "UTF8::htmlentities()"
2344
   *
2345
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2346
   *
2347
   * @param string $str           <p>
2348
   *                              The string being converted.
2349
   *                              </p>
2350
   * @param int    $flags         [optional] <p>
2351
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2352
   *                              invalid code unit sequences and the used document type. The default is
2353
   *                              ENT_COMPAT | ENT_HTML401.
2354
   *                              <table>
2355
   *                              Available <i>flags</i> constants
2356
   *                              <tr valign="top">
2357
   *                              <td>Constant Name</td>
2358
   *                              <td>Description</td>
2359 1
   *                              </tr>
2360
   *                              <tr valign="top">
2361 1
   *                              <td><b>ENT_COMPAT</b></td>
2362
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2363
   *                              </tr>
2364
   *                              <tr valign="top">
2365
   *                              <td><b>ENT_QUOTES</b></td>
2366
   *                              <td>Will convert both double and single quotes.</td>
2367
   *                              </tr>
2368
   *                              <tr valign="top">
2369
   *                              <td><b>ENT_NOQUOTES</b></td>
2370
   *                              <td>Will leave both double and single quotes unconverted.</td>
2371
   *                              </tr>
2372
   *                              <tr valign="top">
2373
   *                              <td><b>ENT_IGNORE</b></td>
2374
   *                              <td>
2375
   *                              Silently discard invalid code unit sequences instead of returning
2376
   *                              an empty string. Using this flag is discouraged as it
2377
   *                              may have security implications.
2378
   *                              </td>
2379
   *                              </tr>
2380
   *                              <tr valign="top">
2381
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2382
   *                              <td>
2383
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2384
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2385
   *                              </td>
2386
   *                              </tr>
2387 1
   *                              <tr valign="top">
2388
   *                              <td><b>ENT_DISALLOWED</b></td>
2389 1
   *                              <td>
2390
   *                              Replace invalid code points for the given document type with a
2391
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2392
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2393
   *                              instance, to ensure the well-formedness of XML documents with
2394
   *                              embedded external content.
2395
   *                              </td>
2396
   *                              </tr>
2397
   *                              <tr valign="top">
2398
   *                              <td><b>ENT_HTML401</b></td>
2399
   *                              <td>
2400
   *                              Handle code as HTML 4.01.
2401 1
   *                              </td>
2402
   *                              </tr>
2403 1
   *                              <tr valign="top">
2404
   *                              <td><b>ENT_XML1</b></td>
2405
   *                              <td>
2406
   *                              Handle code as XML 1.
2407
   *                              </td>
2408
   *                              </tr>
2409
   *                              <tr valign="top">
2410
   *                              <td><b>ENT_XHTML</b></td>
2411
   *                              <td>
2412
   *                              Handle code as XHTML.
2413
   *                              </td>
2414
   *                              </tr>
2415
   *                              <tr valign="top">
2416 16
   *                              <td><b>ENT_HTML5</b></td>
2417
   *                              <td>
2418 16
   *                              Handle code as HTML 5.
2419
   *                              </td>
2420
   *                              </tr>
2421
   *                              </table>
2422
   *                              </p>
2423
   * @param string $encoding      [optional] <p>
2424
   *                              Defines encoding used in conversion.
2425
   *                              </p>
2426
   *                              <p>
2427
   *                              For the purposes of this function, the encodings
2428
   *                              ISO-8859-1, ISO-8859-15,
2429
   *                              UTF-8, cp866,
2430
   *                              cp1251, cp1252, and
2431 28
   *                              KOI8-R are effectively equivalent, provided the
2432
   *                              <i>string</i> itself is valid for the encoding, as
2433 28
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2434
   *                              the same positions in all of these encodings.
2435 28
   *                              </p>
2436 5
   * @param bool   $double_encode [optional] <p>
2437
   *                              When <i>double_encode</i> is turned off PHP will not
2438
   *                              encode existing html entities, the default is to convert everything.
2439 28
   *                              </p>
2440
   *
2441
   * @return string The converted string.
2442
   * </p>
2443
   * <p>
2444
   * If the input <i>string</i> contains an invalid code unit
2445
   * sequence within the given <i>encoding</i> an empty string
2446
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2447
   * <b>ENT_SUBSTITUTE</b> flags are set.
2448
   */
2449 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2450
  {
2451 1
    if ($encoding !== 'UTF-8') {
2452
      $encoding = self::normalize_encoding($encoding);
2453 1
    }
2454 1
2455
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2456
  }
2457 1
2458 1
  /**
2459
   * Checks whether iconv is available on the server.
2460 1
   *
2461
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2462
   */
2463
  public static function iconv_loaded()
2464
  {
2465
    $return = extension_loaded('iconv') ? true : false;
2466
2467
    // INFO: "default_charset" is already set by the "Bootup"-class
2468
2469
    if (!Bootup::is_php('5.6')) {
2470
      // INFO: "iconv_set_encoding" is deprecated cince PHP 5.6
2471 16
      iconv_set_encoding('input_encoding', 'UTF-8');
2472
      iconv_set_encoding('output_encoding', 'UTF-8');
2473
      iconv_set_encoding('internal_encoding', 'UTF-8');
2474 16
    }
2475
2476
    return $return;
2477 16
  }
2478
2479 16
  /**
2480 16
   * alias for "UTF8::decimal_to_chr()"
2481 15
   *
2482 16
   * @see UTF8::decimal_to_chr()
2483 6
   *
2484
   * @param int $int
2485 15
   *
2486
   * @return string
2487
   */
2488
  public static function int_to_chr($int)
2489
  {
2490
    return self::decimal_to_chr($int);
2491
  }
2492
2493
  /**
2494
   * Converts Integer to hexadecimal U+xxxx code point representation.
2495
   *
2496
   * INFO: opposite to UTF8::hex_to_int()
2497
   *
2498
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2499
   * @param string $pfix [optional]
2500
   *
2501
   * @return string <p>The code point, or empty string on failure.</p>
2502
   */
2503
  public static function int_to_hex($int, $pfix = 'U+')
2504
  {
2505
    if (ctype_digit((string)$int)) {
2506
      $hex = dechex((int)$int);
2507
2508
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2509
2510
      return $pfix . $hex;
2511
    }
2512
2513
    return '';
2514
  }
2515
2516
  /**
2517
   * Checks whether intl-char is available on the server.
2518
   *
2519
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2520
   */
2521
  public static function intlChar_loaded()
2522
  {
2523
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2524
  }
2525
2526
  /**
2527
   * Checks whether intl is available on the server.
2528
   *
2529
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2530
   */
2531
  public static function intl_loaded()
2532
  {
2533
    return extension_loaded('intl') ? true : false;
2534
  }
2535
2536 1
  /**
2537
   * alias for "UTF8::is_ascii()"
2538 1
   *
2539
   * @see UTF8::is_ascii()
2540 1
   *
2541
   * @param string $str
2542
   *
2543
   * @return boolean
2544
   *
2545 1
   * @deprecated
2546
   */
2547 1
  public static function isAscii($str)
2548
  {
2549 1
    return self::is_ascii($str);
2550 1
  }
2551
2552 1
  /**
2553
   * alias for "UTF8::is_base64()"
2554
   *
2555
   * @see UTF8::is_base64()
2556
   *
2557
   * @param string $str
2558
   *
2559
   * @return bool
2560
   *
2561
   * @deprecated
2562
   */
2563 1
  public static function isBase64($str)
2564
  {
2565 1
    return self::is_base64($str);
2566
  }
2567 1
2568
  /**
2569
   * alias for "UTF8::is_binary()"
2570
   *
2571
   * @see UTF8::is_binary()
2572 1
   *
2573 1
   * @param string $str
2574 1
   *
2575 1
   * @return bool
2576 1
   *
2577
   * @deprecated
2578 1
   */
2579
  public static function isBinary($str)
2580
  {
2581
    return self::is_binary($str);
2582
  }
2583
2584
  /**
2585
   * alias for "UTF8::is_bom()"
2586
   *
2587
   * @see UTF8::is_bom()
2588
   *
2589
   * @param string $utf8_chr
2590
   *
2591
   * @return boolean
2592
   *
2593 4
   * @deprecated
2594
   */
2595 4
  public static function isBom($utf8_chr)
2596
  {
2597 4
    return self::is_bom($utf8_chr);
2598
  }
2599 4
2600 4
  /**
2601 4
   * alias for "UTF8::is_html()"
2602 4
   *
2603 4
   * @see UTF8::is_html()
2604 4
   *
2605 4
   * @param string $str
2606 4
   *
2607 4
   * @return boolean
2608 2
   *
2609 2
   * @deprecated
2610 4
   */
2611 4
  public static function isHtml($str)
2612 4
  {
2613
    return self::is_html($str);
2614 4
  }
2615 4
2616 4
  /**
2617 4
   * alias for "UTF8::is_json()"
2618 4
   *
2619 4
   * @see UTF8::is_json()
2620 4
   *
2621 4
   * @param string $str
2622 4
   *
2623 3
   * @return bool
2624 3
   *
2625 4
   * @deprecated
2626 4
   */
2627 4
  public static function isJson($str)
2628
  {
2629 4
    return self::is_json($str);
2630 3
  }
2631 2
2632
  /**
2633 3
   * alias for "UTF8::is_utf16()"
2634
   *
2635
   * @see UTF8::is_utf16()
2636
   *
2637 3
   * @param string $str
2638
   *
2639 3
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2640
   *
2641
   * @deprecated
2642
   */
2643
  public static function isUtf16($str)
2644
  {
2645
    return self::is_utf16($str);
2646
  }
2647
2648
  /**
2649
   * alias for "UTF8::is_utf32()"
2650
   *
2651
   * @see UTF8::is_utf32()
2652
   *
2653 3
   * @param string $str
2654
   *
2655 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2656
   *
2657 3
   * @deprecated
2658
   */
2659 3
  public static function isUtf32($str)
2660 3
  {
2661 3
    return self::is_utf32($str);
2662 3
  }
2663 3
2664 3
  /**
2665 3
   * alias for "UTF8::is_utf8()"
2666 3
   *
2667 3
   * @see UTF8::is_utf8()
2668 1
   *
2669 1
   * @param string $str
2670 3
   * @param bool   $strict
2671 3
   *
2672 3
   * @return bool
2673
   *
2674 3
   * @deprecated
2675 3
   */
2676 3
  public static function isUtf8($str, $strict = false)
2677 3
  {
2678 3
    return self::is_utf8($str, $strict);
2679 3
  }
2680 3
2681 3
  /**
2682 3
   * Checks if a string is 7 bit ASCII.
2683 1
   *
2684 1
   * @param string $str <p>The string to check.</p>
2685 3
   *
2686 3
   * @return bool <p>
2687 3
   *              <strong>true</strong> if it is ASCII<br />
2688
   *              <strong>false</strong> otherwise
2689 3
   *              </p>
2690 1
   */
2691 1
  public static function is_ascii($str)
2692
  {
2693 1
    $str = (string)$str;
2694
2695
    if (!isset($str[0])) {
2696
      return true;
2697 3
    }
2698
2699 3
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2700
  }
2701
2702
  /**
2703
   * Returns true if the string is base64 encoded, false otherwise.
2704
   *
2705
   * @param string $str <p>The input string.</p>
2706
   *
2707
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2708
   */
2709
  public static function is_base64($str)
2710
  {
2711
    $str = (string)$str;
2712 43
2713
    if (!isset($str[0])) {
2714 43
      return false;
2715
    }
2716 43
2717 3
    if (base64_encode(base64_decode($str, true)) === $str) {
2718
      return true;
2719
    } else {
2720 41
      return false;
2721 1
    }
2722 1
  }
2723
2724
  /**
2725
   * Check if the input is binary... (is look like a hack).
2726
   *
2727
   * @param mixed $input
2728
   *
2729
   * @return bool
2730 41
   */
2731
  public static function is_binary($input)
2732
  {
2733
2734
    $testLength = strlen($input);
2735
2736
    if (
2737
        preg_match('~^[01]+$~', $input)
2738
        ||
2739
        substr_count($input, "\x00") > 0
2740 41
        ||
2741
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2742 41
    ) {
2743 41
      return true;
2744 41
    } else {
2745
      return false;
2746
    }
2747 41
  }
2748 41
2749 41
  /**
2750
   * Check if the file is binary.
2751
   *
2752 41
   * @param string $file
2753
   *
2754 36
   * @return boolean
2755 41
   */
2756
  public static function is_binary_file($file)
2757 34
  {
2758 34
    try {
2759 34
      $fp = fopen($file, 'r');
2760 34
      $block = fread($fp, 512);
2761 39
      fclose($fp);
2762
    } catch (\Exception $e) {
2763 21
      $block = '';
2764 21
    }
2765 21
2766 21
    return self::is_binary($block);
2767 33
  }
2768
2769 9
  /**
2770 9
   * Checks if the given string is equal to any "Byte Order Mark".
2771 9
   *
2772 9
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2773 16
   *
2774
   * @param string $str <p>The input string.</p>
2775
   *
2776
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2777
   */
2778
  public static function is_bom($str)
2779
  {
2780
    foreach (self::$bom as $bomString => $bomByteLength) {
2781
      if ($str === $bomString) {
2782 3
        return true;
2783 3
      }
2784 3
    }
2785 3
2786 9
    return false;
2787
  }
2788 3
2789 3
  /**
2790 3
   * Check if the string contains any html-tags <lall>.
2791 3
   *
2792 3
   * @param string $str <p>The input string.</p>
2793
   *
2794
   * @return boolean
2795
   */
2796 5
  public static function is_html($str)
2797
  {
2798 41
    $str = (string)$str;
2799
2800
    if (!isset($str[0])) {
2801 36
      return false;
2802
    }
2803 33
2804 33
    // init
2805 33
    $matches = array();
2806 33
2807
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2808
2809
    if (count($matches) == 0) {
2810
      return false;
2811 33
    } else {
2812
      return true;
2813
    }
2814
  }
2815
2816
  /**
2817 33
   * Try to check if "$str" is an json-string.
2818 33
   *
2819 33
   * @param string $str <p>The input string.</p>
2820 33
   *
2821
   * @return bool
2822 33
   */
2823
  public static function is_json($str)
2824 33
  {
2825 33
    $str = (string)$str;
2826 5
2827
    if (!isset($str[0])) {
2828
      return false;
2829 33
    }
2830 33
2831 33
    if (
2832 33
        is_object(self::json_decode($str))
2833 33
        &&
2834
        json_last_error() === JSON_ERROR_NONE
2835
    ) {
2836
      return true;
2837
    } else {
2838 18
      return false;
2839
    }
2840
  }
2841 41
2842
  /**
2843 20
   * Check if the string is UTF-16.
2844
   *
2845
   * @param string $str <p>The input string.</p>
2846
   *
2847
   * @return int|false <p>
2848
   *                   <strong>false</strong> if is't not UTF-16,<br />
2849
   *                   <strong>1</strong> for UTF-16LE,<br />
2850
   *                   <strong>2</strong> for UTF-16BE.
2851
   *                   </p>
2852
   */
2853 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2854
  {
2855
    $str = self::remove_bom($str);
2856
2857
    if (self::is_binary($str)) {
2858
2859
      $maybeUTF16LE = 0;
2860
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2861
      if ($test) {
2862
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2863
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2864
        if ($test3 === $test) {
2865
          $strChars = self::count_chars($str, true);
2866
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2867
            if (in_array($test3char, $strChars, true) === true) {
2868
              $maybeUTF16LE++;
2869
            }
2870
          }
2871
        }
2872
      }
2873
2874
      $maybeUTF16BE = 0;
2875
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2876
      if ($test) {
2877
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2878
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2879
        if ($test3 === $test) {
2880
          $strChars = self::count_chars($str, true);
2881
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2882
            if (in_array($test3char, $strChars, true) === true) {
2883 2
              $maybeUTF16BE++;
2884
            }
2885 2
          }
2886
        }
2887 2
      }
2888 2
2889 2
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2890
        if ($maybeUTF16LE > $maybeUTF16BE) {
2891
          return 1;
2892
        } else {
2893 2
          return 2;
2894
        }
2895
      }
2896
2897
    }
2898
2899
    return false;
2900
  }
2901
2902
  /**
2903
   * Check if the string is UTF-32.
2904
   *
2905
   * @param string $str
2906
   *
2907
   * @return int|false <p>
2908
   *                   <strong>false</strong> if is't not UTF-16,<br />
2909
   *                   <strong>1</strong> for UTF-32LE,<br />
2910
   *                   <strong>2</strong> for UTF-32BE.
2911
   *                   </p>
2912
   */
2913 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2914
  {
2915
    $str = self::remove_bom($str);
2916
2917
    if (self::is_binary($str)) {
2918
2919
      $maybeUTF32LE = 0;
2920
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2921
      if ($test) {
2922
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2923
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2924
        if ($test3 === $test) {
2925
          $strChars = self::count_chars($str, true);
2926
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2927
            if (in_array($test3char, $strChars, true) === true) {
2928
              $maybeUTF32LE++;
2929
            }
2930
          }
2931
        }
2932 2
      }
2933
2934 2
      $maybeUTF32BE = 0;
2935
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2936 2
      if ($test) {
2937
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2938
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2939 2
        if ($test3 === $test) {
2940
          $strChars = self::count_chars($str, true);
2941
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2942 2
            if (in_array($test3char, $strChars, true) === true) {
2943
              $maybeUTF32BE++;
2944
            }
2945
          }
2946
        }
2947
      }
2948
2949
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2950
        if ($maybeUTF32LE > $maybeUTF32BE) {
2951
          return 1;
2952 6
        } else {
2953
          return 2;
2954 6
        }
2955
      }
2956
2957
    }
2958
2959
    return false;
2960
  }
2961
2962
  /**
2963
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2964
   *
2965 24
   * @see    http://hsivonen.iki.fi/php-utf8/
2966
   *
2967 24
   * @param string $str    <p>The string to be checked.</p>
2968
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2969 24
   *
2970 2
   * @return bool
2971
   */
2972
  public static function is_utf8($str, $strict = false)
2973
  {
2974 23
    $str = (string)$str;
2975 2
2976
    if (!isset($str[0])) {
2977
      return true;
2978 23
    }
2979
2980 23
    if ($strict === true) {
2981
      if (self::is_utf16($str) !== false) {
2982
        return false;
2983
      }
2984
2985
      if (self::is_utf32($str) !== false) {
2986
        return false;
2987
      }
2988
    }
2989
2990 1
    if (self::pcre_utf8_support() !== true) {
2991
2992 1
      // If even just the first character can be matched, when the /u
2993
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2994
      // invalid, nothing at all will match, even if the string contains
2995
      // some valid sequences
2996 1
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2997
2998
    } else {
2999
3000
      $mState = 0; // cached expected number of octets after the current octet
3001
      // until the beginning of the next UTF8 character sequence
3002
      $mUcs4 = 0; // cached Unicode character
3003
      $mBytes = 1; // cached expected number of octets in the current sequence
3004
      $len = strlen($str);
3005
3006
      /** @noinspection ForeachInvariantsInspection */
3007 1
      for ($i = 0; $i < $len; $i++) {
3008
        $in = ord($str[$i]);
3009 1
        if ($mState === 0) {
3010 1
          // When mState is zero we expect either a US-ASCII character or a
3011 1
          // multi-octet sequence.
3012
          if (0 === (0x80 & $in)) {
3013 1
            // US-ASCII, pass straight through.
3014
            $mBytes = 1;
3015 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3016
            // First octet of 2 octet sequence.
3017
            $mUcs4 = $in;
3018
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3019
            $mState = 1;
3020
            $mBytes = 2;
3021
          } elseif (0xE0 === (0xF0 & $in)) {
3022 2
            // First octet of 3 octet sequence.
3023
            $mUcs4 = $in;
3024 2
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3025
            $mState = 2;
3026 2
            $mBytes = 3;
3027 2 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3028 2
            // First octet of 4 octet sequence.
3029
            $mUcs4 = $in;
3030 2
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3031
            $mState = 3;
3032
            $mBytes = 4;
3033
          } elseif (0xF8 === (0xFC & $in)) {
3034
            /* First octet of 5 octet sequence.
3035
            *
3036
            * This is illegal because the encoded codepoint must be either
3037
            * (a) not the shortest form or
3038
            * (b) outside the Unicode range of 0-0x10FFFF.
3039
            * Rather than trying to resynchronize, we will carry on until the end
3040 1
            * of the sequence and let the later error handling code catch it.
3041
            */
3042 1
            $mUcs4 = $in;
3043
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3044
            $mState = 4;
3045
            $mBytes = 5;
3046 1 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3047
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3048
            $mUcs4 = $in;
3049
            $mUcs4 = ($mUcs4 & 1) << 30;
3050
            $mState = 5;
3051
            $mBytes = 6;
3052
          } else {
3053
            /* Current octet is neither in the US-ASCII range nor a legal first
3054
             * octet of a multi-octet sequence.
3055
             */
3056
            return false;
3057
          }
3058 1
        } else {
3059
          // When mState is non-zero, we expect a continuation of the multi-octet
3060 1
          // sequence
3061
          if (0x80 === (0xC0 & $in)) {
3062
            // Legal continuation.
3063
            $shift = ($mState - 1) * 6;
3064
            $tmp = $in;
3065
            $tmp = ($tmp & 0x0000003F) << $shift;
3066
            $mUcs4 |= $tmp;
3067
            /**
3068
             * End of the multi-octet sequence. mUcs4 now contains the final
3069
             * Unicode code point to be output
3070 16
             */
3071
            if (0 === --$mState) {
3072 16
              /*
3073
              * Check for illegal sequences and code points.
3074 16
              */
3075 2
              // From Unicode 3.1, non-shortest form is illegal
3076
              if (
3077
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3078 16
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3079 1
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3080
                  (4 < $mBytes) ||
3081
                  // From Unicode 3.2, surrogate characters are illegal.
3082 16
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3083 4
                  // Code points outside the Unicode range are illegal.
3084
                  ($mUcs4 > 0x10FFFF)
3085
              ) {
3086 15
                return false;
3087 14
              }
3088
              // initialize UTF8 cache
3089
              $mState = 0;
3090 4
              $mUcs4 = 0;
3091 4
              $mBytes = 1;
3092 4
            }
3093
          } else {
3094
            /**
3095 4
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3096 4
             * Incomplete multi-octet sequence.
3097 4
             */
3098 4
            return false;
3099 4
          }
3100 4
        }
3101 4
      }
3102 4
3103 4
      return true;
3104 4
    }
3105 4
  }
3106 4
3107 4
  /**
3108 4
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3109 4
   * Decodes a JSON string
3110
   *
3111 4
   * @link http://php.net/manual/en/function.json-decode.php
3112 4
   *
3113 4
   * @param string $json    <p>
3114
   *                        The <i>json</i> string being decoded.
3115 4
   *                        </p>
3116
   *                        <p>
3117 4
   *                        This function only works with UTF-8 encoded strings.
3118
   *                        </p>
3119
   *                        <p>PHP implements a superset of
3120
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3121
   *                        only supports these values when they are nested inside an array or an object.
3122
   *                        </p>
3123
   * @param bool   $assoc   [optional] <p>
3124
   *                        When <b>TRUE</b>, returned objects will be converted into
3125
   *                        associative arrays.
3126
   *                        </p>
3127 13
   * @param int    $depth   [optional] <p>
3128
   *                        User specified recursion depth.
3129 13
   *                        </p>
3130 13
   * @param int    $options [optional] <p>
3131
   *                        Bitmask of JSON decode options. Currently only
3132 13
   *                        <b>JSON_BIGINT_AS_STRING</b>
3133 1
   *                        is supported (default is to cast large integers as floats)
3134 1
   *                        </p>
3135 1
   *
3136
   * @return mixed the value encoded in <i>json</i> in appropriate
3137 13
   * PHP type. Values true, false and
3138
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3139
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3140
   * <i>json</i> cannot be decoded or if the encoded
3141
   * data is deeper than the recursion limit.
3142
   */
3143
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3144
  {
3145
    $json = self::filter($json);
3146
3147
    if (Bootup::is_php('5.4') === true) {
3148
      $json = json_decode($json, $assoc, $depth, $options);
3149
    } else {
3150 18
      $json = json_decode($json, $assoc, $depth);
3151
    }
3152 18
3153 18
    return $json;
3154
  }
3155 18
3156
  /**
3157 18
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3158
   * Returns the JSON representation of a value.
3159 2
   *
3160
   * @link http://php.net/manual/en/function.json-encode.php
3161 2
   *
3162
   * @param mixed $value   <p>
3163 1
   *                       The <i>value</i> being encoded. Can be any type except
3164 1
   *                       a resource.
3165
   *                       </p>
3166 2
   *                       <p>
3167 2
   *                       All string data must be UTF-8 encoded.
3168
   *                       </p>
3169 18
   *                       <p>PHP implements a superset of
3170 18
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3171 1
   *                       only supports these values when they are nested inside an array or an object.
3172 1
   *                       </p>
3173
   * @param int   $options [optional] <p>
3174 18
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3175 18
   *                       <b>JSON_HEX_TAG</b>,
3176
   *                       <b>JSON_HEX_AMP</b>,
3177 18
   *                       <b>JSON_HEX_APOS</b>,
3178
   *                       <b>JSON_NUMERIC_CHECK</b>,
3179
   *                       <b>JSON_PRETTY_PRINT</b>,
3180
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3181
   *                       <b>JSON_FORCE_OBJECT</b>,
3182
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3183
   *                       constants is described on
3184
   *                       the JSON constants page.
3185
   *                       </p>
3186
   * @param int   $depth   [optional] <p>
3187
   *                       Set the maximum depth. Must be greater than zero.
3188
   *                       </p>
3189
   *
3190
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3191
   */
3192
  public static function json_encode($value, $options = 0, $depth = 512)
3193
  {
3194
    $value = self::filter($value);
3195
3196
    if (Bootup::is_php('5.5')) {
3197
      $json = json_encode($value, $options, $depth);
3198
    } else {
3199
      $json = json_encode($value, $options);
3200
    }
3201
3202
    return $json;
3203
  }
3204
3205
  /**
3206
   * Makes string's first char lowercase.
3207
   *
3208
   * @param string $str <p>The input string</p>
3209
   *
3210
   * @return string <p>The resulting string</p>
3211
   */
3212
  public static function lcfirst($str)
3213
  {
3214
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3215
  }
3216
3217
  /**
3218
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3219
   *
3220
   * @param string $str   <p>The string to be trimmed</p>
3221
   * @param string $chars <p>Optional characters to be stripped</p>
3222
   *
3223
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3224
   */
3225 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3226
  {
3227
    $str = (string)$str;
3228
3229
    if (!isset($str[0])) {
3230 17
      return '';
3231
    }
3232 17
3233 3
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3234
    if ($chars === INF || !$chars) {
3235
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3236 16
    }
3237
3238
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3239
  }
3240 16
3241
  /**
3242
   * Returns the UTF-8 character with the maximum code point in the given data.
3243
   *
3244
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3245
   *
3246
   * @return string <p>The character with the highest code point than others.</p>
3247
   */
3248 16 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3249 16
  {
3250 15
    if (is_array($arg)) {
3251
      $arg = implode('', $arg);
3252
    }
3253 9
3254 9
    return self::chr(max(self::codepoints($arg)));
3255 9
  }
3256
3257 9
  /**
3258 1
   * Calculates and returns the maximum number of bytes taken by any
3259
   * UTF-8 encoded character in the given string.
3260
   *
3261 9
   * @param string $str <p>The original Unicode string.</p>
3262 4
   *
3263
   * @return int <p>Max byte lengths of the given chars.</p>
3264
   */
3265 9
  public static function max_chr_width($str)
3266 5
  {
3267
    $bytes = self::chr_size_list($str);
3268
    if (count($bytes) > 0) {
3269 9
      return (int)max($bytes);
3270
    } else {
3271
      return 0;
3272
    }
3273
  }
3274
3275
  /**
3276
   * Checks whether mbstring is available on the server.
3277
   *
3278
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3279
   */
3280
  public static function mbstring_loaded()
3281
  {
3282
    $return = extension_loaded('mbstring') ? true : false;
3283
3284
    if ($return === true) {
3285 1
      \mb_internal_encoding('UTF-8');
3286
    }
3287
3288 1
    return $return;
3289
  }
3290 1
3291 1
  /**
3292 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3293
   *
3294
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3295 1
   *
3296
   * @return string <p>The character with the lowest code point than others.</p>
3297
   */
3298 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3299
  {
3300
    if (is_array($arg)) {
3301
      $arg = implode('', $arg);
3302
    }
3303 41
3304
    return self::chr(min(self::codepoints($arg)));
3305
  }
3306 41
3307
  /**
3308
   * alias for "UTF8::normalize_encoding()"
3309
   *
3310
   * @see UTF8::normalize_encoding()
3311
   *
3312
   * @param string $encoding
3313
   *
3314
   * @return string
3315
   *
3316
   * @deprecated
3317 1
   */
3318
  public static function normalizeEncoding($encoding)
3319 1
  {
3320 1
    return self::normalize_encoding($encoding);
3321
  }
3322
3323 1
  /**
3324 1
   * Normalize the encoding-"name" input.
3325 1
   *
3326
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3327
   *
3328 1
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3329
   */
3330
  public static function normalize_encoding($encoding)
3331 1
  {
3332
    static $staticNormalizeEncodingCache = array();
3333
3334
    if (!$encoding) {
3335 1
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3336 1
    }
3337 1
3338
    if ('UTF-8' === $encoding) {
3339
      return $encoding;
3340 1
    }
3341
3342
    if (in_array($encoding, self::$iconvEncoding, true)) {
3343 1
      return $encoding;
3344
    }
3345
3346
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3347 1
      return $staticNormalizeEncodingCache[$encoding];
3348
    }
3349 1
3350 1
    $encodingOrig = $encoding;
3351 1
    $encoding = strtoupper($encoding);
3352 1
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3353 1
3354
    $equivalences = array(
3355
        'ISO88591'    => 'ISO-8859-1',
3356
        'ISO8859'     => 'ISO-8859-1',
3357
        'ISO'         => 'ISO-8859-1',
3358
        'LATIN1'      => 'ISO-8859-1',
3359
        'LATIN'       => 'ISO-8859-1',
3360
        'WIN1252'     => 'ISO-8859-1',
3361
        'WINDOWS1252' => 'ISO-8859-1',
3362
        'UTF16'       => 'UTF-16',
3363
        'UTF32'       => 'UTF-32',
3364
        'UTF8'        => 'UTF-8',
3365 5
        'UTF'         => 'UTF-8',
3366
        'UTF7'        => 'UTF-7',
3367 5
        '8BIT'        => 'CP850',
3368
        'BINARY'      => 'CP850',
3369
    );
3370
3371
    if (!empty($equivalences[$encodingUpperHelper])) {
3372
      $encoding = $equivalences[$encodingUpperHelper];
3373
    }
3374
3375
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3376
3377 10
    return $encoding;
3378
  }
3379 10
3380 10
  /**
3381 5
   * Normalize some MS Word special characters.
3382 5
   *
3383 10
   * @param string $str <p>The string to be normalized.</p>
3384
   *
3385 10
   * @return string
3386
   */
3387 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3388
  {
3389
    // init
3390
    $str = (string)$str;
3391
3392
    if (!isset($str[0])) {
3393
      return '';
3394
    }
3395
3396 1
    static $UTF8_MSWORD_KEYS_CACHE = null;
3397
    static $UTF8_MSWORD_VALUES_CACHE = null;
3398 1
3399 1
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3400 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3401
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3402 1
    }
3403 1
3404 1
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3405 1
  }
3406 1
3407
  /**
3408 1
   * Normalize the whitespace.
3409
   *
3410
   * @param string $str                     <p>The string to be normalized.</p>
3411
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3412
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3413
   *                                        bidirectional text chars.</p>
3414
   *
3415
   * @return string
3416
   */
3417
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3418
  {
3419
    // init
3420
    $str = (string)$str;
3421
3422
    if (!isset($str[0])) {
3423
      return '';
3424 45
    }
3425
3426
    static $WHITESPACE_CACHE = array();
3427 45
    $cacheKey = (int)$keepNonBreakingSpace;
3428
3429
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3430
3431 45
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3432 45
3433 45
      if ($keepNonBreakingSpace === true) {
3434 45
        /** @noinspection OffsetOperationsInspection */
3435
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3436 45
      }
3437
3438
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3439 45
    }
3440 45
3441
    if ($keepBidiUnicodeControls === false) {
3442 45
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3443
3444
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3445
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3446
      }
3447
3448
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3449
    }
3450
3451
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3452
  }
3453 45
3454
  /**
3455 45
   * Format a number with grouped thousands.
3456
   *
3457 45
   * @param float  $number
3458 45
   * @param int    $decimals
3459 45
   * @param string $dec_point
3460
   * @param string $thousands_sep
3461 45
   *
3462 45
   * @return string
3463 45
   *    *
3464
   * @deprecated Because this has nothing to do with UTF8. :/
3465 45
   */
3466
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3467
  {
3468
    $thousands_sep = (string)$thousands_sep;
3469
    $dec_point = (string)$dec_point;
3470
3471
    if (
3472
        isset($thousands_sep[1], $dec_point[1])
3473
        &&
3474
        Bootup::is_php('5.4') === true
3475
    ) {
3476 23
      return str_replace(
3477
          array(
3478 23
              '.',
3479
              ',',
3480 23
          ),
3481 5
          array(
3482
              $dec_point,
3483
              $thousands_sep,
3484
          ),
3485 19
          number_format($number, $decimals, '.', ',')
3486 3
      );
3487
    }
3488
3489 18
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3490
  }
3491 18
3492
  /**
3493
   * Calculates Unicode code point of the given UTF-8 encoded character.
3494
   *
3495
   * INFO: opposite to UTF8::chr()
3496
   *
3497
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3498
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3499
   *
3500
   * @return int <p>
3501
   *             Unicode code point of the given character,<br />
3502 52
   *             0 on invalid UTF-8 byte sequence.
3503
   *             </p>
3504 52
   */
3505
  public static function ord($chr, $encoding = 'UTF-8')
3506 52
  {
3507
    if (!$chr && $chr !== '0') {
3508 52
      return 0;
3509 40
    }
3510
3511
    if ($encoding !== 'UTF-8') {
3512 18
      $encoding = self::normalize_encoding($encoding);
3513
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3514
    }
3515 18
3516 17
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3517
      self::checkForSupport();
3518 17
    }
3519 17
3520 17
    if (self::$support['intlChar'] === true) {
3521 2
      $tmpReturn = \IntlChar::ord($chr);
3522 2
      if ($tmpReturn) {
3523
        return $tmpReturn;
3524
      }
3525 18
    }
3526
3527 18
    // use static cache, if there is no support for "IntlChar"
3528 18
    static $cache = array();
3529 18
    if (isset($cache[$chr]) === true) {
3530
      return $cache[$chr];
3531 18
    }
3532 18
3533 18
    $chr_orig = $chr;
3534
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3535
    $chr = unpack('C*', substr($chr, 0, 4));
3536
    $code = $chr ? $chr[1] : 0;
3537 18
3538
    if (0xF0 <= $code && isset($chr[4])) {
3539 18
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3540
    }
3541
3542
    if (0xE0 <= $code && isset($chr[3])) {
3543
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3544
    }
3545
3546
    if (0xC0 <= $code && isset($chr[2])) {
3547
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3548
    }
3549
3550
    return $cache[$chr_orig] = $code;
3551
  }
3552
3553
  /**
3554
   * Parses the string into an array (into the the second parameter).
3555
   *
3556
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3557
   *          if the second parameter is not set!
3558
   *
3559
   * @link http://php.net/manual/en/function.parse-str.php
3560 1
   *
3561
   * @param string  $str       <p>The input string.</p>
3562 1
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3563 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3564
   *
3565
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3566
   */
3567
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3568 1
  {
3569 1
    if ($cleanUtf8 === true) {
3570 1
      $str = self::clean($str);
3571 1
    }
3572
3573
    $return = \mb_parse_str($str, $result);
3574 1
    if ($return === false || empty($result)) {
3575
      return false;
3576
    }
3577
3578
    return true;
3579
  }
3580
3581
  /**
3582
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3583
   *
3584
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3585
   */
3586 36
  public static function pcre_utf8_support()
3587
  {
3588 36
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3589
    return (bool)@preg_match('//u', '');
3590 36
  }
3591 2
3592
  /**
3593
   * Create an array containing a range of UTF-8 characters.
3594
   *
3595 36
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3596 36
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3597
   *
3598 36
   * @return array
3599
   */
3600
  public static function range($var1, $var2)
3601
  {
3602 36
    if (!$var1 || !$var2) {
3603
      return array();
3604 36
    }
3605 6
3606 6 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3607
      $start = (int)$var1;
3608 36
    } elseif (ctype_xdigit($var1)) {
3609 36
      $start = (int)self::hex_to_int($var1);
3610 36
    } else {
3611 36
      $start = self::ord($var1);
3612 36
    }
3613
3614 36
    if (!$start) {
3615
      return array();
3616
    }
3617
3618 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3619
      $end = (int)$var2;
3620
    } elseif (ctype_xdigit($var2)) {
3621
      $end = (int)self::hex_to_int($var2);
3622
    } else {
3623
      $end = self::ord($var2);
3624
    }
3625
3626
    if (!$end) {
3627
      return array();
3628
    }
3629
3630
    return array_map(
3631
        array(
3632
            '\\voku\\helper\\UTF8',
3633
            'chr',
3634
        ),
3635
        range($start, $end)
3636
    );
3637
  }
3638
3639
  /**
3640
   * Multi decode html entity & fix urlencoded-win1252-chars.
3641
   *
3642
   * e.g:
3643
   * 'test+test'                     => 'test+test'
3644
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3645
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3646 36
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3647 5
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3648
   * 'Düsseldorf'                   => 'Düsseldorf'
3649 5
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3650 5
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3651
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3652
   *
3653 36
   * @param string $str          <p>The input string.</p>
3654
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3655
   *
3656
   * @return string
3657 36
   */
3658 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3659
  {
3660
    $str = (string)$str;
3661
3662
    if (!isset($str[0])) {
3663
      return '';
3664
    }
3665
3666
    $pattern = '/%u([0-9a-f]{3,4})/i';
3667
    if (preg_match($pattern, $str)) {
3668
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3669
    }
3670 12
3671
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3672
3673
    do {
3674
      $str_compare = $str;
3675
3676 12
      $str = self::fix_simple_utf8(
3677 2
          rawurldecode(
3678 1
              self::html_entity_decode(
3679 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3680 1
                  $flags
3681 2
              )
3682
          )
3683 2
      );
3684
3685
    } while ($multi_decode === true && $str_compare !== $str);
3686 2
3687
    return (string)$str;
3688
  }
3689
3690
  /**
3691
   * alias for "UTF8::remove_bom()"
3692 12
   *
3693 3
   * @see UTF8::remove_bom()
3694
   *
3695
   * @param string $str
3696
   *
3697
   * @return string
3698
   *
3699
   * @deprecated
3700 12
   */
3701 9
  public static function removeBOM($str)
3702
  {
3703
    return self::remove_bom($str);
3704
  }
3705
3706
  /**
3707
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3708
   *
3709
   * @param string $str <p>The input string.</p>
3710 6
   *
3711 6
   * @return string <p>String without UTF-BOM</p>
3712 6
   */
3713 6
  public static function remove_bom($str)
3714 6
  {
3715 6
    foreach (self::$bom as $bomString => $bomByteLength) {
3716 6
      if (0 === strpos($str, $bomString)) {
3717 6
        $str = substr($str, $bomByteLength);
3718 6
      }
3719 6
    }
3720 6
3721 6
    return $str;
3722 6
  }
3723 6
3724 6
  /**
3725 6
   * Removes duplicate occurrences of a string in another string.
3726 6
   *
3727 6
   * @param string          $str  <p>The base string.</p>
3728 6
   * @param string|string[] $what <p>String to search for in the base string.</p>
3729 6
   *
3730 6
   * @return string <p>The result string with removed duplicates.</p>
3731
   */
3732 6
  public static function remove_duplicates($str, $what = ' ')
3733 6
  {
3734 6
    if (is_string($what)) {
3735
      $what = array($what);
3736
    }
3737
3738
    if (is_array($what)) {
3739
      /** @noinspection ForeachSourceInspection */
3740
      foreach ($what as $item) {
3741
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3742
      }
3743
    }
3744
3745
    return $str;
3746
  }
3747
3748
  /**
3749
   * Remove invisible characters from a string.
3750
   *
3751
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3752
   *
3753
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3754
   *
3755
   * @param string $str
3756
   * @param bool   $url_encoded
3757
   * @param string $replacement
3758
   *
3759
   * @return string
3760
   */
3761
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3762
  {
3763
    // init
3764
    $non_displayables = array();
3765
3766
    // every control character except newline (dec 10),
3767
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3768
    if ($url_encoded) {
3769
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3770
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3771
    }
3772
3773
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3774
3775
    do {
3776
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3777
    } while ($count !== 0);
3778 14
3779
    return $str;
3780 14
  }
3781
3782
  /**
3783 14
   * Replace the diamond question mark (�) with the replacement.
3784 14
   *
3785 1
   * @param string $str
3786 1
   * @param string $unknown
3787 13
   *
3788
   * @return string
3789 14
   */
3790
  public static function replace_diamond_question_mark($str, $unknown = '?')
3791 14
  {
3792 14
    $str = (string)$str;
3793
3794 14
    if (!isset($str[0])) {
3795
      return '';
3796
    }
3797
3798
    $unknownHelper = $unknown;
3799
    if ($unknown === '') {
3800
      $unknownHelper = 'none';
3801
    }
3802
3803
    $save = \mb_substitute_character();
3804
    \mb_substitute_character($unknownHelper);
3805
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3806 1
    $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3807
    \mb_substitute_character($save);
3808 1
3809
    return str_replace(
3810 1
        array(
3811
            "\xEF\xBF\xBD",
3812
            '�',
3813
        ),
3814 1
        array(
3815
            $unknown,
3816 1
            $unknown,
3817
        ),
3818
        $str
3819
    );
3820 1
  }
3821 1
3822
  /**
3823
   * Strip whitespace or other characters from end of a UTF-8 string.
3824 1
   *
3825 1
   * @param string $str   <p>The string to be trimmed.</p>
3826 1
   * @param string $chars <p>Optional characters to be stripped.</p>
3827 1
   *
3828
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3829 1
   */
3830 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3831
  {
3832 1
    $str = (string)$str;
3833
3834
    if (!isset($str[0])) {
3835 1
      return '';
3836
    }
3837
3838
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3839
    if ($chars === INF || !$chars) {
3840
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3841
    }
3842
3843
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3844
  }
3845
3846
  /**
3847
   * rxClass
3848
   *
3849
   * @param string $s
3850
   * @param string $class
3851 2
   *
3852
   * @return string
3853 2
   */
3854
  private static function rxClass($s, $class = '')
3855
  {
3856 2
    static $rxClassCache = array();
3857 2
3858
    $cacheKey = $s . $class;
3859 2
3860
    if (isset($rxClassCache[$cacheKey])) {
3861 2
      return $rxClassCache[$cacheKey];
3862 2
    }
3863
3864 2
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3865
    $class = array($class);
3866
3867 2
    /** @noinspection SuspiciousLoopInspection */
3868 2
    foreach (self::str_split($s) as $s) {
3869 2
      if ('-' === $s) {
3870 2
        $class[0] = '-' . $class[0];
3871 2
      } elseif (!isset($s[2])) {
3872
        $class[0] .= preg_quote($s, '/');
3873 2
      } elseif (1 === self::strlen($s)) {
3874 2
        $class[0] .= $s;
3875 2
      } else {
3876 2
        $class[] = $s;
3877 2
      }
3878 2
    }
3879
3880 2
    if ($class[0]) {
3881 2
      $class[0] = '[' . $class[0] . ']';
3882 2
    }
3883 2
3884 2
    if (1 === count($class)) {
3885 2
      $return = $class[0];
3886
    } else {
3887 2
      $return = '(?:' . implode('|', $class) . ')';
3888
    }
3889
3890 2
    $rxClassCache[$cacheKey] = $return;
3891
3892
    return $return;
3893
  }
3894
3895
  /**
3896
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3897
   */
3898
  public static function showSupport()
3899
  {
3900
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3901
      self::checkForSupport();
3902
    }
3903
3904
    foreach (self::$support as $utf8Support) {
3905
      echo $utf8Support . "\n<br>";
3906
    }
3907
  }
3908
3909
  /**
3910
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3911 1
   *
3912
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3913 1
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3914
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3915 1
   *
3916
   * @return string <p>The HTML numbered entity.</p>
3917
   */
3918
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3919
  {
3920
    // init
3921
    $char = (string)$char;
3922
3923
    if (!isset($char[0])) {
3924
      return '';
3925
    }
3926
3927
    if (
3928
        $keepAsciiChars === true
3929
        &&
3930
        self::is_ascii($char) === true
3931
    ) {
3932
      return $char;
3933
    }
3934
3935
    if ($encoding !== 'UTF-8') {
3936
      $encoding = self::normalize_encoding($encoding);
3937
    }
3938
3939
    return '&#' . self::ord($char, $encoding) . ';';
3940
  }
3941
3942
  /**
3943
   * Convert a string to an array of Unicode characters.
3944
   *
3945
   * @param string  $str       <p>The string to split into array.</p>
3946
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3947 12
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3948
   *
3949 12
   * @return string[] <p>An array containing chunks of the string.</p>
3950
   */
3951
  public static function split($str, $length = 1, $cleanUtf8 = false)
3952
  {
3953
    $str = (string)$str;
3954
3955
    if (!isset($str[0])) {
3956
      return array();
3957
    }
3958
3959 1
    // init
3960
    $str = (string)$str;
3961 1
    $ret = array();
3962
3963 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3964
      self::checkForSupport();
3965 1
    }
3966
3967
    if (self::$support['pcre_utf8'] === true) {
3968
3969
      if ($cleanUtf8 === true) {
3970
        $str = self::clean($str);
3971
      }
3972
3973
      preg_match_all('/./us', $str, $retArray);
3974
      if (isset($retArray[0])) {
3975
        $ret = $retArray[0];
3976
      }
3977 1
      unset($retArray);
3978
3979 1
    } else {
3980
3981 1
      // fallback
3982 1
3983 1
      $len = strlen($str);
3984
3985 1
      /** @noinspection ForeachInvariantsInspection */
3986 1
      for ($i = 0; $i < $len; $i++) {
3987 1
        if (($str[$i] & "\x80") === "\x00") {
3988 1
          $ret[] = $str[$i];
3989
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3990
          if (($str[$i + 1] & "\xC0") === "\x80") {
3991 1
            $ret[] = $str[$i] . $str[$i + 1];
3992
3993
            $i++;
3994
          }
3995 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3996
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3997
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3998
3999
            $i += 2;
4000
          }
4001
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4002 21 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4003
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4004
4005 21
            $i += 3;
4006 21
          }
4007
        }
4008 21
      }
4009 1
    }
4010
4011
    if ($length > 1) {
4012 20
      $ret = array_chunk($ret, $length);
4013
4014
      return array_map(
4015
          function ($item) {
4016 20
            return implode('', $item);
4017 20
          }, $ret
4018
      );
4019 20
    }
4020 20
4021
    /** @noinspection OffsetOperationsInspection */
4022
    if (isset($ret[0]) && $ret[0] === '') {
4023 1
      return array();
4024 1
    }
4025
4026
    return $ret;
4027 1
  }
4028 1
4029 1
  /**
4030 1
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4031 1
   *
4032
   * @param string $str <p>The input string.</p>
4033 1
   *
4034
   * @return false|string <p>
4035 1
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4036
   *                      otherwise it will return false.
4037
   *                      </p>
4038
   */
4039
  public static function str_detect_encoding($str)
4040
  {
4041
    //
4042
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4043
    //
4044
4045 1
    if (self::is_binary($str)) {
4046
      if (self::is_utf16($str) === 1) {
4047 1
        return 'UTF-16LE';
4048
      } elseif (self::is_utf16($str) === 2) {
4049 1
        return 'UTF-16BE';
4050
      } elseif (self::is_utf32($str) === 1) {
4051 1
        return 'UTF-32LE';
4052
      } elseif (self::is_utf32($str) === 2) {
4053
        return 'UTF-32BE';
4054
      }
4055
    }
4056
4057
    //
4058
    // 2.) simple check for ASCII chars
4059
    //
4060
4061
    if (self::is_ascii($str) === true) {
4062
      return 'ASCII';
4063
    }
4064
4065 7
    //
4066
    // 3.) simple check for UTF-8 chars
4067 7
    //
4068
4069
    if (self::is_utf8($str) === true) {
4070
      return 'UTF-8';
4071
    }
4072
4073
    //
4074
    // 4.) check via "\mb_detect_encoding()"
4075
    //
4076
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4077
4078
    $detectOrder = array(
4079
        'ISO-8859-1',
4080
        'ISO-8859-2',
4081
        'ISO-8859-3',
4082
        'ISO-8859-4',
4083 1
        'ISO-8859-5',
4084
        'ISO-8859-6',
4085 1
        'ISO-8859-7',
4086 1
        'ISO-8859-8',
4087
        'ISO-8859-9',
4088 1
        'ISO-8859-10',
4089
        'ISO-8859-13',
4090 1
        'ISO-8859-14',
4091
        'ISO-8859-15',
4092 1
        'ISO-8859-16',
4093 1
        'WINDOWS-1251',
4094 1
        'WINDOWS-1252',
4095 1
        'WINDOWS-1254',
4096
        'ISO-2022-JP',
4097 1
        'JIS',
4098
        'EUC-JP',
4099 1
    );
4100 1
4101 1
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4102 1
    if ($encoding) {
4103 1
      return $encoding;
4104 1
    }
4105
4106 1
    //
4107
    // 5.) check via "iconv()"
4108 1
    //
4109
4110
    $md5 = md5($str);
4111
    foreach (self::$iconvEncoding as $encodingTmp) {
4112 1
      # INFO: //IGNORE and //TRANSLIT still throw notice
4113
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4114
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4115
        return $encodingTmp;
4116
      }
4117
    }
4118
4119
    return false;
4120
  }
4121
4122
  /**
4123
   * Check if the string ends with the given substring.
4124
   *
4125
   * @param string $haystack <p>The string to search in.</p>
4126
   * @param string $needle   <p>The substring to search for.</p>
4127
   *
4128
   * @return bool
4129 9
   */
4130 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4131 9
  {
4132
    $haystack = (string)$haystack;
4133
    $needle = (string)$needle;
4134
4135
    if (!isset($haystack[0], $needle[0])) {
4136
      return false;
4137
    }
4138
4139
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4140
      return true;
4141
    }
4142
4143
    return false;
4144
  }
4145
4146
  /**
4147 1
   * Check if the string ends with the given substring, case insensitive.
4148
   *
4149 1
   * @param string $haystack <p>The string to search in.</p>
4150
   * @param string $needle   <p>The substring to search for.</p>
4151
   *
4152
   * @return bool
4153
   */
4154 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4155
  {
4156
    $haystack = (string)$haystack;
4157
    $needle = (string)$needle;
4158
4159
    if (!isset($haystack[0], $needle[0])) {
4160
      return false;
4161
    }
4162
4163
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4164 12
      return true;
4165
    }
4166 12
4167 11
    return false;
4168 11
  }
4169 12
4170
  /**
4171
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4172
   *
4173
   * @link  http://php.net/manual/en/function.str-ireplace.php
4174
   *
4175
   * @param mixed $search  <p>
4176
   *                       Every replacement with search array is
4177
   *                       performed on the result of previous replacement.
4178
   *                       </p>
4179
   * @param mixed $replace <p>
4180
   *                       </p>
4181
   * @param mixed $subject <p>
4182 9
   *                       If subject is an array, then the search and
4183
   *                       replace is performed with every entry of
4184 9
   *                       subject, and the return value is an array as
4185 1
   *                       well.
4186
   *                       </p>
4187
   * @param int   $count   [optional] <p>
4188 8
   *                       The number of matched and replaced needles will
4189 2
   *                       be returned in count which is passed by
4190 2
   *                       reference.
4191
   *                       </p>
4192 8
   *
4193 8
   * @return mixed <p>A string or an array of replacements.</p>
4194 1
   */
4195
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4196
  {
4197 7
    $search = (array)$search;
4198
4199 7
    /** @noinspection AlterInForeachInspection */
4200
    foreach ($search as &$s) {
4201
      if ('' === $s .= '') {
4202 1
        $s = '/^(?<=.)$/';
4203
      } else {
4204
        $s = '/' . preg_quote($s, '/') . '/ui';
4205
      }
4206
    }
4207
4208
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4209
    $count = $replace; // used as reference parameter
4210
4211
    return $subject;
4212
  }
4213
4214
  /**
4215
   * Check if the string starts with the given substring, case insensitive.
4216
   *
4217
   * @param string $haystack <p>The string to search in.</p>
4218 1
   * @param string $needle   <p>The substring to search for.</p>
4219
   *
4220 1
   * @return bool
4221
   */
4222 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4223
  {
4224
    $haystack = (string)$haystack;
4225
    $needle = (string)$needle;
4226
4227
    if (!isset($haystack[0], $needle[0])) {
4228
      return false;
4229
    }
4230
4231
    if (self::stripos($haystack, $needle) === 0) {
4232 2
      return true;
4233
    }
4234 2
4235 2
    return false;
4236
  }
4237 2
4238 2
  /**
4239 2
   * Limit the number of characters in a string, but also after the next word.
4240
   *
4241 2
   * @param string $str
4242 2
   * @param int    $length
4243
   * @param string $strAddOn
4244
   *
4245
   * @return string
4246
   */
4247
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4248
  {
4249
    $str = (string)$str;
4250
4251
    if (!isset($str[0])) {
4252 3
      return '';
4253
    }
4254 3
4255 3
    $length = (int)$length;
4256 3
4257
    if (self::strlen($str) <= $length) {
4258 3
      return $str;
4259
    }
4260 3
4261
    if (self::substr($str, $length - 1, 1) === ' ') {
4262
      return self::substr($str, 0, $length - 1) . $strAddOn;
4263
    }
4264
4265
    $str = self::substr($str, 0, $length);
4266
    $array = explode(' ', $str);
4267
    array_pop($array);
4268
    $new_str = implode(' ', $array);
4269
4270
    if ($new_str === '') {
4271
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4272
    } else {
4273
      $str = $new_str . $strAddOn;
4274
    }
4275
4276
    return $str;
4277
  }
4278
4279
  /**
4280
   * Pad a UTF-8 string to given length with another string.
4281
   *
4282 2
   * @param string $str        <p>The input string.</p>
4283
   * @param int    $pad_length <p>The length of return string.</p>
4284
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4285 2
   * @param int    $pad_type   [optional] <p>
4286
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4287 2
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4288
   *                           </p>
4289
   *
4290
   * @return string <strong>Returns the padded string</strong>
4291
   */
4292
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4293
  {
4294
    $str_length = self::strlen($str);
4295
4296
    if (
4297
        is_int($pad_length) === true
4298
        &&
4299
        $pad_length > 0
4300
        &&
4301
        $pad_length >= $str_length
4302
    ) {
4303
      $ps_length = self::strlen($pad_string);
4304
4305
      $diff = $pad_length - $str_length;
4306
4307
      switch ($pad_type) {
4308 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4309
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4310
          $pre = self::substr($pre, 0, $diff);
4311
          $post = '';
4312
          break;
4313
4314 8
        case STR_PAD_BOTH:
4315
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4316 8
          $pre = self::substr($pre, 0, (int)$diff / 2);
4317 8
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4318
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4319 8
          break;
4320 3
4321
        case STR_PAD_RIGHT:
4322 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4323 7
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4324 1
          $post = self::substr($post, 0, $diff);
4325 1
          $pre = '';
4326 1
      }
4327
4328
      return $pre . $str . $post;
4329
    }
4330 7
4331 1
    return $str;
4332 7
  }
4333 7
4334 7
  /**
4335
   * Repeat a string.
4336
   *
4337
   * @param string $str        <p>
4338 7
   *                           The string to be repeated.
4339
   *                           </p>
4340
   * @param int    $multiplier <p>
4341
   *                           Number of time the input string should be
4342
   *                           repeated.
4343
   *                           </p>
4344
   *                           <p>
4345
   *                           multiplier has to be greater than or equal to 0.
4346
   *                           If the multiplier is set to 0, the function
4347
   *                           will return an empty string.
4348
   *                           </p>
4349
   *
4350
   * @return string <p>The repeated string.</p>
4351
   */
4352
  public static function str_repeat($str, $multiplier)
4353
  {
4354
    $str = self::filter($str);
4355 8
4356
    return str_repeat($str, $multiplier);
4357 8
  }
4358 2
4359
  /**
4360
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4361 6
   *
4362
   * Replace all occurrences of the search string with the replacement string
4363
   *
4364
   * @link http://php.net/manual/en/function.str-replace.php
4365 6
   *
4366
   * @param mixed $search  <p>
4367
   *                       The value being searched for, otherwise known as the needle.
4368
   *                       An array may be used to designate multiple needles.
4369
   *                       </p>
4370
   * @param mixed $replace <p>
4371
   *                       The replacement value that replaces found search
4372 6
   *                       values. An array may be used to designate multiple replacements.
4373
   *                       </p>
4374
   * @param mixed $subject <p>
4375
   *                       The string or array being searched and replaced on,
4376
   *                       otherwise known as the haystack.
4377
   *                       </p>
4378
   *                       <p>
4379
   *                       If subject is an array, then the search and
4380
   *                       replace is performed with every entry of
4381
   *                       subject, and the return value is an array as
4382
   *                       well.
4383
   *                       </p>
4384
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4385
   *
4386
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4387 62
   */
4388
  public static function str_replace($search, $replace, $subject, &$count = null)
4389 62
  {
4390
    return str_replace($search, $replace, $subject, $count);
4391 62
  }
4392 4
4393
  /**
4394
   * Shuffles all the characters in the string.
4395
   *
4396
   * @param string $str <p>The input string</p>
4397 61
   *
4398 2
   * @return string <p>The shuffled string.</p>
4399 61
   */
4400 60
  public static function str_shuffle($str)
4401 60
  {
4402 2
    $array = self::split($str);
4403
4404
    shuffle($array);
4405
4406 61
    return implode('', $array);
4407 61
  }
4408 1
4409
  /**
4410
   * Sort all characters according to code points.
4411 61
   *
4412 2
   * @param string $str    <p>A UTF-8 string.</p>
4413 2
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4414
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4415 61
   *
4416
   * @return string <p>String of sorted characters.</p>
4417
   */
4418
  public static function str_sort($str, $unique = false, $desc = false)
4419
  {
4420
    $array = self::codepoints($str);
4421
4422
    if ($unique) {
4423
      $array = array_flip(array_flip($array));
4424
    }
4425
4426
    if ($desc) {
4427
      arsort($array);
4428
    } else {
4429
      asort($array);
4430 1
    }
4431
4432 1
    return self::string($array);
4433
  }
4434
4435
  /**
4436
   * Split a string into an array.
4437
   *
4438
   * @param string $str
4439
   * @param int    $len
4440
   *
4441
   * @return array
4442
   */
4443
  public static function str_split($str, $len = 1)
4444
  {
4445
    // init
4446
    $len = (int)$len;
4447
    $str = (string)$str;
4448
4449 2
    if (!isset($str[0])) {
4450
      return array();
4451 2
    }
4452
4453
    if ($len < 1) {
4454
      return str_split($str, $len);
4455
    }
4456
4457
    /** @noinspection PhpInternalEntityUsedInspection */
4458
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4459
    $a = $a[0];
4460
4461
    if ($len === 1) {
4462
      return $a;
4463
    }
4464
4465
    $arrayOutput = array();
4466
    $p = -1;
4467 1
4468
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4469 1
    foreach ($a as $l => $a) {
4470
      if ($l % $len) {
4471
        $arrayOutput[$p] .= $a;
4472
      } else {
4473
        $arrayOutput[++$p] = $a;
4474
      }
4475
    }
4476
4477
    return $arrayOutput;
4478
  }
4479
4480
  /**
4481
   * Check if the string starts with the given substring.
4482
   *
4483
   * @param string $haystack <p>The string to search in.</p>
4484
   * @param string $needle   <p>The substring to search for.</p>
4485 2
   *
4486
   * @return bool
4487 2
   */
4488 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4489
  {
4490 2
    $haystack = (string)$haystack;
4491
    $needle = (string)$needle;
4492
4493
    if (!isset($haystack[0], $needle[0])) {
4494
      return false;
4495
    }
4496
4497
    if (self::strpos($haystack, $needle) === 0) {
4498
      return true;
4499
    }
4500
4501
    return false;
4502
  }
4503 1
4504
  /**
4505 1
   * Get a binary representation of a specific string.
4506 1
   *
4507
   * @param string $str <p>The input string.</p>
4508 1
   *
4509 1
   * @return string
4510
   */
4511
  public static function str_to_binary($str)
4512 1
  {
4513 1
    $str = (string)$str;
4514
4515 1
    $value = unpack('H*', $str);
4516
4517
    return base_convert($value[1], 16, 2);
4518
  }
4519
4520
  /**
4521
   * Convert a string into an array of words.
4522
   *
4523
   * @param string $str
4524
   * @param string $charlist
4525
   *
4526
   * @return array
4527
   */
4528
  public static function str_to_words($str, $charlist = '')
4529
  {
4530
    $str = (string)$str;
4531
4532
    if (!isset($str[0])) {
4533
      return array('');
4534
    }
4535 15
4536
    $charlist = self::rxClass($charlist, '\pL');
4537 15
4538 15
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4539
  }
4540 15
4541 2
  /**
4542
   * alias for "UTF8::to_ascii()"
4543
   *
4544
   * @see UTF8::to_ascii()
4545 14
   *
4546
   * @param string $str
4547
   * @param string $unknown
4548
   * @param bool   $strict
4549 14
   *
4550
   * @return string
4551
   */
4552
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4553 14
  {
4554
    return self::to_ascii($str, $unknown, $strict);
4555
  }
4556 2
4557 2
  /**
4558 2
   * Counts number of words in the UTF-8 string.
4559
   *
4560 14
   * @param string $str      <p>The input string.</p>
4561
   * @param int    $format   [optional] <p>
4562
   *                         <strong>0</strong> => return a number of words (default)<br />
4563
   *                         <strong>1</strong> => return an array of words<br />
4564
   *                         <strong>2</strong> => return an array of words with word-offset as key
4565
   *                         </p>
4566 14
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4567 2
   *
4568 14
   * @return array|int <p>The number of words in the string</p>
4569 14
   */
4570 14
  public static function str_word_count($str, $format = 0, $charlist = '')
4571 1
  {
4572
    $strParts = self::str_to_words($str, $charlist);
4573
4574 14
    $len = count($strParts);
4575 14
4576
    if ($format === 1) {
4577
4578
      $numberOfWords = array();
4579
      for ($i = 1; $i < $len; $i += 2) {
4580
        $numberOfWords[] = $strParts[$i];
4581
      }
4582
4583
    } elseif ($format === 2) {
4584
4585
      $numberOfWords = array();
4586
      $offset = self::strlen($strParts[0]);
4587
      for ($i = 1; $i < $len; $i += 2) {
4588
        $numberOfWords[$offset] = $strParts[$i];
4589
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4590
      }
4591
4592
    } else {
4593
4594
      $numberOfWords = ($len - 1) / 2;
4595
4596
    }
4597
4598
    return $numberOfWords;
4599
  }
4600
4601
  /**
4602
   * Case-insensitive string comparison.
4603
   *
4604
   * INFO: Case-insensitive version of UTF8::strcmp()
4605
   *
4606
   * @param string $str1
4607
   * @param string $str2
4608
   *
4609
   * @return int <p>
4610
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4611
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4612
   *             <strong>0</strong> if they are equal.
4613
   *             </p>
4614
   */
4615
  public static function strcasecmp($str1, $str2)
4616
  {
4617
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4618
  }
4619
4620 1
  /**
4621
   * alias for "UTF8::strstr()"
4622 1
   *
4623 1
   * @see UTF8::strstr()
4624 1
   *
4625
   * @param string  $haystack
4626 1
   * @param string  $needle
4627
   * @param bool    $before_needle
4628
   * @param string  $encoding
4629
   * @param boolean $cleanUtf8
4630
   *
4631
   * @return string|false
4632
   */
4633 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4634
  {
4635
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4636
  }
4637
4638
  /**
4639
   * Case-sensitive string comparison.
4640
   *
4641
   * @param string $str1
4642
   * @param string $str2
4643 4
   *
4644
   * @return int  <p>
4645 4
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4646
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4647 4
   *              <strong>0</strong> if they are equal.
4648 2
   *              </p>
4649
   */
4650
  public static function strcmp($str1, $str2)
4651 3
  {
4652
    /** @noinspection PhpUndefinedClassInspection */
4653
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4654
        \Normalizer::normalize($str1, \Normalizer::NFD),
4655
        \Normalizer::normalize($str2, \Normalizer::NFD)
4656
    );
4657
  }
4658
4659
  /**
4660
   * Find length of initial segment not matching mask.
4661
   *
4662
   * @param string $str
4663
   * @param string $charList
4664
   * @param int    $offset
4665
   * @param int    $length
4666
   *
4667
   * @return int|null
4668
   */
4669
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4670
  {
4671
    if ('' === $charList .= '') {
4672
      return null;
4673
    }
4674
4675
    if ($offset || 2147483647 !== $length) {
4676
      $str = (string)self::substr($str, $offset, $length);
4677 1
    }
4678
4679 1
    $str = (string)$str;
4680 1
    if (!isset($str[0])) {
4681 1
      return null;
4682
    }
4683 1
4684
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4685
      /** @noinspection OffsetOperationsInspection */
4686
      return self::strlen($length[1]);
4687
    }
4688
4689
    return self::strlen($str);
4690 1
  }
4691
4692
  /**
4693
   * alias for "UTF8::stristr()"
4694
   *
4695
   * @see UTF8::stristr()
4696
   *
4697
   * @param string  $haystack
4698
   * @param string  $needle
4699
   * @param bool    $before_needle
4700
   * @param string  $encoding
4701
   * @param boolean $cleanUtf8
4702
   *
4703
   * @return string|false
4704
   */
4705
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4706
  {
4707 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4708
  }
4709 1
4710
  /**
4711
   * Create a UTF-8 string from code points.
4712
   *
4713
   * INFO: opposite to UTF8::codepoints()
4714
   *
4715
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4716
   *
4717
   * @return string <p>UTF-8 encoded string.</p>
4718
   */
4719
  public static function string(array $array)
4720
  {
4721
    return implode(
4722
        '',
4723
        array_map(
4724
            array(
4725
                '\\voku\\helper\\UTF8',
4726
                'chr',
4727
            ),
4728
            $array
4729 11
        )
4730
    );
4731 11
  }
4732
4733 11
  /**
4734 2
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4735 2
   *
4736
   * @param string $str <p>The input string.</p>
4737 11
   *
4738
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4739 11
   */
4740 2
  public static function string_has_bom($str)
4741
  {
4742
    foreach (self::$bom as $bomString => $bomByteLength) {
4743
      if (0 === strpos($str, $bomString)) {
4744 10
        return true;
4745 10
      }
4746
    }
4747
4748
    return false;
4749 10
  }
4750
4751 10
  /**
4752
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4753
   *
4754 3
   * @link http://php.net/manual/en/function.strip-tags.php
4755 3
   *
4756 3
   * @param string  $str            <p>
4757
   *                                The input string.
4758 10
   *                                </p>
4759
   * @param string  $allowable_tags [optional] <p>
4760
   *                                You can use the optional second parameter to specify tags which should
4761
   *                                not be stripped.
4762
   *                                </p>
4763
   *                                <p>
4764 10
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4765 1
   *                                can not be changed with allowable_tags.
4766 10
   *                                </p>
4767 10
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4768 10
   *
4769 1
   * @return string <p>The stripped string.</p>
4770
   */
4771
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4772
  {
4773
    if ($cleanUtf8) {
4774 10
      $str = self::clean($str);
4775 10
    }
4776 10
4777 10
    return strip_tags($str, $allowable_tags);
4778
  }
4779
4780
  /**
4781
   * Finds position of first occurrence of a string within another, case insensitive.
4782
   *
4783
   * @link http://php.net/manual/en/function.mb-stripos.php
4784
   *
4785
   * @param string  $haystack  <p>
4786
   *                           The string from which to get the position of the first occurrence
4787
   *                           of needle
4788
   *                           </p>
4789
   * @param string  $needle    <p>
4790
   *                           The string to find in haystack
4791
   *                           </p>
4792
   * @param int     $offset    [optional] <p>
4793
   *                           The position in haystack
4794
   *                           to start searching
4795
   *                           </p>
4796
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4797
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4798
   *
4799
   * @return int|false <p>
4800
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4801
   *                   or false if needle is not found.
4802
   *                   </p>
4803
   */
4804
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4805
  {
4806
    $haystack = (string)$haystack;
4807
    $needle = (string)$needle;
4808
4809
    if (!isset($haystack[0], $needle[0])) {
4810
      return false;
4811
    }
4812
4813 10
    if ($cleanUtf8 === true) {
4814
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4815
      // if invalid characters are found in $haystack before $needle
4816 10
      $haystack = self::clean($haystack);
4817 10
      $needle = self::clean($needle);
4818
    }
4819 10
4820 2 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4821 2
        $encoding === 'UTF-8'
4822
        ||
4823 10
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4824 10
    ) {
4825 2
      $encoding = 'UTF-8';
4826
    } else {
4827
      $encoding = self::normalize_encoding($encoding);
4828 8
    }
4829
4830
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4831
      self::checkForSupport();
4832
    }
4833
4834
    if (
4835
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4836
        &&
4837
        self::$support['intl'] === true
4838
        &&
4839
        Bootup::is_php('5.4')
4840
    ) {
4841
      return \grapheme_stripos($haystack, $needle, $offset);
4842
    }
4843
4844
    // fallback to "mb_"-function via polyfill
4845 2
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4846
  }
4847 2
4848
  /**
4849
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4850
   *
4851
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4852
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4853
   * @param bool    $before_needle [optional] <p>
4854 2
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4855 1
   *                               haystack before the first occurrence of the needle (excluding the needle).
4856 1
   *                               </p>
4857
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4858
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4859
   *
4860 2
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4861 2
   */
4862 2
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4863 2
  {
4864
    $haystack = (string)$haystack;
4865
    $needle = (string)$needle;
4866
4867
    if (!isset($haystack[0], $needle[0])) {
4868
      return false;
4869
    }
4870
4871
    if ($encoding !== 'UTF-8') {
4872
      $encoding = self::normalize_encoding($encoding);
4873
    }
4874
4875
    if ($cleanUtf8 === true) {
4876
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4877
      // if invalid characters are found in $haystack before $needle
4878
      $needle = self::clean($needle);
4879
      $haystack = self::clean($haystack);
4880
    }
4881
4882 11
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4883
      self::checkForSupport();
4884 11
    }
4885 11
4886 11 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4887
        $encoding !== 'UTF-8'
4888 11
        &&
4889 1
        self::$support['mbstring'] === false
4890 1
    ) {
4891 1
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4892
    }
4893 11
4894
    if (self::$support['mbstring'] === true) {
4895 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4896
    }
4897 11
4898 1
    if (self::$support['intl'] === true) {
4899 1
      return \grapheme_stristr($haystack, $needle, $before_needle);
4900
    }
4901
4902 11
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4903 11
4904
    if (!isset($match[1])) {
4905 11
      return false;
4906
    }
4907 11
4908
    if ($before_needle) {
4909
      return $match[1];
4910
    }
4911
4912
    return self::substr($haystack, self::strlen($match[1]));
4913
  }
4914
4915
  /**
4916
   * Get the string length, not the byte-length!
4917
   *
4918
   * @link     http://php.net/manual/en/function.mb-strlen.php
4919
   *
4920
   * @param string  $str       <p>The string being checked for length.</p>
4921 21
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4922
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4923
   *
4924 21
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4925
   *             character counted as +1)</p>
4926 21
   */
4927 6
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4928
  {
4929
    $str = (string)$str;
4930 19
4931
    if (!isset($str[0])) {
4932
      return 0;
4933
    }
4934
4935 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4936 19
        $encoding === 'UTF-8'
4937 2
        ||
4938 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4939
    ) {
4940 19
      $encoding = 'UTF-8';
4941
    } else {
4942
      $encoding = self::normalize_encoding($encoding);
4943
    }
4944
4945
    switch ($encoding) {
4946
      case 'ASCII':
4947
      case 'CP850':
4948
        return strlen($str);
4949
    }
4950 3
4951
    if ($cleanUtf8 === true) {
4952 3
      $str = self::clean($str);
4953
    }
4954
4955
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4956
      self::checkForSupport();
4957
    }
4958
4959 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4960
        $encoding !== 'UTF-8'
4961
        &&
4962
        self::$support['mbstring'] === false
4963
    ) {
4964
      trigger_error('UTF8::strlen() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4965
    }
4966 16
4967
    if (self::$support['mbstring'] === true) {
4968 16
      return \mb_strlen($str, $encoding);
4969
    }
4970 16
4971 2
    if (self::$support['iconv'] === true) {
4972
      $returnTmp = \iconv_strlen($str, $encoding);
4973
      if ($returnTmp !== false) {
4974 15
        return $returnTmp;
4975
      }
4976
    }
4977
4978
    if (self::$support['intl'] === true) {
4979
      $returnTmp = \grapheme_strlen($str);
4980 15
      if ($returnTmp !== null) {
4981 2
        return $returnTmp;
4982 2
      }
4983
    }
4984 15
4985
    // fallback via vanilla php
4986
    preg_match_all('/./us', $str, $parts);
4987
    $returnTmp = count($parts[0]);
4988
    if ($returnTmp !== 0) {
4989
      return $returnTmp;
4990
    }
4991
4992
    // fallback to "mb_"-function via polyfill
4993
    return \mb_strlen($str);
4994
  }
4995
4996
  /**
4997
   * Case insensitive string comparisons using a "natural order" algorithm.
4998
   *
4999
   * INFO: natural order version of UTF8::strcasecmp()
5000
   *
5001 1
   * @param string $str1 <p>The first string.</p>
5002
   * @param string $str2 <p>The second string.</p>
5003 1
   *
5004 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5005 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5006 1
   *             <strong>0</strong> if they are equal
5007 1
   */
5008
  public static function strnatcasecmp($str1, $str2)
5009 1
  {
5010 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5011 1
  }
5012 1
5013 1
  /**
5014
   * String comparisons using a "natural order" algorithm
5015 1
   *
5016 1
   * INFO: natural order version of UTF8::strcmp()
5017
   *
5018 1
   * @link  http://php.net/manual/en/function.strnatcmp.php
5019
   *
5020
   * @param string $str1 <p>The first string.</p>
5021
   * @param string $str2 <p>The second string.</p>
5022
   *
5023
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5024
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5025
   *             <strong>0</strong> if they are equal
5026
   */
5027
  public static function strnatcmp($str1, $str2)
5028
  {
5029
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5030 1
  }
5031
5032 1
  /**
5033 1
   * Case-insensitive string comparison of the first n characters.
5034 1
   *
5035
   * @link  http://php.net/manual/en/function.strncasecmp.php
5036 1
   *
5037
   * @param string $str1 <p>The first string.</p>
5038
   * @param string $str2 <p>The second string.</p>
5039
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5040 1
   *
5041 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5042
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5043 1
   *             <strong>0</strong> if they are equal
5044
   */
5045
  public static function strncasecmp($str1, $str2, $len)
5046
  {
5047
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5048
  }
5049
5050
  /**
5051
   * String comparison of the first n characters.
5052
   *
5053
   * @link  http://php.net/manual/en/function.strncmp.php
5054
   *
5055
   * @param string $str1 <p>The first string.</p>
5056
   * @param string $str2 <p>The second string.</p>
5057
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5058
   *
5059 47
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5060
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5061
   *             <strong>0</strong> if they are equal
5062 47
   */
5063
  public static function strncmp($str1, $str2, $len)
5064 47
  {
5065 9
    $str1 = self::substr($str1, 0, $len);
5066
    $str2 = self::substr($str2, 0, $len);
5067
5068 45
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5065 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5066 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5069
  }
5070
5071
  /**
5072 1
   * Search a string for any of a set of characters.
5073 1
   *
5074
   * @link  http://php.net/manual/en/function.strpbrk.php
5075 45
   *
5076 45
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5077 37
   * @param string $char_list <p>This parameter is case sensitive.</p>
5078 37
   *
5079
   * @return string String starting from the character found, or false if it is not found.
5080 45
   */
5081 2
  public static function strpbrk($haystack, $char_list)
5082
  {
5083
    $haystack = (string)$haystack;
5084 43
    $char_list = (string)$char_list;
5085 20
5086 20
    if (!isset($haystack[0], $char_list[0])) {
5087 41
      return false;
5088
    }
5089
5090 43
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5091
      return substr($haystack, strpos($haystack, $m[0]));
5092
    } else {
5093
      return false;
5094
    }
5095
  }
5096 43
5097 2
  /**
5098 43
   * Find position of first occurrence of string in a string.
5099 43
   *
5100 43
   * @link http://php.net/manual/en/function.mb-strpos.php
5101 1
   *
5102
   * @param string  $haystack  <p>The string being checked.</p>
5103
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5104 43
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5105 43
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5106
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5107
   *
5108
   * @return int|false <p>
5109
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5110
   *                   If needle is not found it returns false.
5111
   *                   </p>
5112
   */
5113
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5114
  {
5115
    $haystack = (string)$haystack;
5116
    $needle = (string)$needle;
5117
5118
    if (!isset($haystack[0], $needle[0])) {
5119
      return false;
5120
    }
5121
5122
    // init
5123
    $offset = (int)$offset;
5124
5125
    // iconv and mbstring do not support integer $needle
5126
5127
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5128
      $needle = (string)self::chr($needle);
5129
    }
5130
5131
    if ($cleanUtf8 === true) {
5132
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5133
      // if invalid characters are found in $haystack before $needle
5134
      $needle = self::clean($needle);
5135 1
      $haystack = self::clean($haystack);
5136
    }
5137 1
5138 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5139
        $encoding === 'UTF-8'
5140 1
        ||
5141
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5142
    ) {
5143
      $encoding = 'UTF-8';
5144
    } else {
5145
      $encoding = self::normalize_encoding($encoding);
5146
    }
5147
5148
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5149
      self::checkForSupport();
5150
    }
5151
5152 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5153
        $encoding !== 'UTF-8'
5154
        &&
5155
        self::$support['mbstring'] === false
5156
    ) {
5157
      trigger_error('UTF8::strpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5158
    }
5159
5160
    if (self::$support['mbstring'] === true) {
5161 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5162
    }
5163 1
5164 1
    if (
5165
        $offset >= 0 // iconv_strpos() can't handle negative offset
5166 1
        &&
5167 1
        self::$support['iconv'] === true
5168
    ) {
5169
      // ignore invalid negative offset to keep compatibility
5170 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5171 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5172 1
    }
5173
5174 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5175 1
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5176
      if ($returnTmp !== false) {
5177
        return $returnTmp;
5178 1
      }
5179 1
    }
5180
5181 1
    // fallback via vanilla php
5182 1
5183 1
    $haystack = self::substr($haystack, $offset);
5184
5185 1
    if ($offset < 0) {
5186
      $offset = 0;
5187
    }
5188
5189
    $pos = strpos($haystack, $needle);
5190
    if ($pos === false) {
5191
      return false;
5192 1
    }
5193
5194
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5195
    if ($returnTmp !== false) {
5196
      return $returnTmp;
5197
    }
5198
5199
    // fallback to "mb_"-function via polyfill
5200
    return \mb_strpos($haystack, $needle, $offset);
5201
  }
5202
5203
  /**
5204
   * Finds the last occurrence of a character in a string within another.
5205
   *
5206
   * @link http://php.net/manual/en/function.mb-strrchr.php
5207 6
   *
5208
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5209 6
   * @param string $needle        <p>The string to find in haystack</p>
5210 1
   * @param bool   $before_needle [optional] <p>
5211
   *                              Determines which portion of haystack
5212
   *                              this function returns.
5213 1
   *                              If set to true, it returns all of haystack
5214 1
   *                              from the beginning to the last occurrence of needle.
5215 1
   *                              If set to false, it returns all of haystack
5216 1
   *                              from the last occurrence of needle to the end,
5217
   *                              </p>
5218
   * @param string $encoding      [optional] <p>
5219
   *                              Character encoding name to use.
5220 1
   *                              If it is omitted, internal character encoding is used.
5221 1
   *                              </p>
5222 1
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5223 1
   *
5224 1
   * @return string|false The portion of haystack or false if needle is not found.
5225 1
   */
5226 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5227 1
  {
5228
    if ($encoding !== 'UTF-8') {
5229
      $encoding = self::normalize_encoding($encoding);
5230
    }
5231 1
5232 1
    if ($cleanUtf8 === true) {
5233 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5234 1
      // if invalid characters are found in $haystack before $needle
5235 1
      $needle = self::clean($needle);
5236 1
      $haystack = self::clean($haystack);
5237 1
    }
5238 1
5239
    // fallback to "mb_"-function via polyfill
5240
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5241 1
  }
5242 1
5243 1
  /**
5244 1
   * Reverses characters order in the string.
5245
   *
5246
   * @param string $str The input string
5247
   *
5248 1
   * @return string The string with characters in the reverse sequence
5249
   */
5250 6
  public static function strrev($str)
5251 1
  {
5252 1
    $str = (string)$str;
5253 1
5254 1
    if (!isset($str[0])) {
5255
      return '';
5256 1
    }
5257
5258
    return implode('', array_reverse(self::split($str)));
5259 6
  }
5260 6
5261
  /**
5262 6
   * Finds the last occurrence of a character in a string within another, case insensitive.
5263 4
   *
5264 4
   * @link http://php.net/manual/en/function.mb-strrichr.php
5265
   *
5266 6
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5267
   * @param string  $needle        <p>The string to find in haystack.</p>
5268 6
   * @param bool    $before_needle [optional] <p>
5269
   *                               Determines which portion of haystack
5270
   *                               this function returns.
5271
   *                               If set to true, it returns all of haystack
5272
   *                               from the beginning to the last occurrence of needle.
5273
   *                               If set to false, it returns all of haystack
5274
   *                               from the last occurrence of needle to the end,
5275
   *                               </p>
5276
   * @param string  $encoding      [optional] <p>
5277
   *                               Character encoding name to use.
5278
   *                               If it is omitted, internal character encoding is used.
5279
   *                               </p>
5280 1
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5281
   *
5282 1
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5283
   */
5284 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5285 1
  {
5286
    if ($encoding !== 'UTF-8') {
5287
      $encoding = self::normalize_encoding($encoding);
5288 1
    }
5289 1
5290 1
    if ($cleanUtf8 === true) {
5291
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5292 1
      // if invalid characters are found in $haystack before $needle
5293
      $needle = self::clean($needle);
5294
      $haystack = self::clean($haystack);
5295 1
    }
5296 1
5297
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5298 1
  }
5299 1
5300
  /**
5301 1
   * Find position of last occurrence of a case-insensitive string.
5302
   *
5303 1
   * @param string  $haystack  <p>The string to look in.</p>
5304 1
   * @param string  $needle    <p>The string to look for.</p>
5305
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5306 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5307
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5308 1
   *
5309
   * @return int|false <p>
5310 1
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5311
   *                   not found, it returns false.
5312 1
   *                   </p>
5313
   */
5314
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5315
  {
5316
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5317
      $needle = (string)self::chr($needle);
5318
    }
5319
5320
    // init
5321
    $haystack = (string)$haystack;
5322
    $needle = (string)$needle;
5323
    $offset = (int)$offset;
5324
5325
    if (!isset($haystack[0], $needle[0])) {
5326 7
      return false;
5327
    }
5328 7
5329 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5330
        $cleanUtf8 === true
5331
        ||
5332
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5333
    ) {
5334
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5335
5336
      $needle = self::clean($needle);
5337
      $haystack = self::clean($haystack);
5338
    }
5339
5340 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5341
        $encoding === 'UTF-8'
5342 1
        ||
5343
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5344
    ) {
5345
      $encoding = 'UTF-8';
5346
    } else {
5347
      $encoding = self::normalize_encoding($encoding);
5348
    }
5349
5350
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5351
      self::checkForSupport();
5352
    }
5353
5354 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5355
        $encoding !== 'UTF-8'
5356 1
        &&
5357
        self::$support['mbstring'] === false
5358
    ) {
5359
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5360
    }
5361
5362
    if (self::$support['mbstring'] === true) {
5363
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5364
    }
5365
5366 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5367
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5368 1
      if ($returnTmp !== false) {
5369
        return $returnTmp;
5370 1
      }
5371
    }
5372
5373
    // fallback via vanilla php
5374
5375
    return self::strrpos(self::strtoupper($haystack, $encoding), self::strtoupper($needle, $encoding), $offset, $encoding, $cleanUtf8);
5376
  }
5377
5378
  /**
5379
   * Find position of last occurrence of a string in a string.
5380
   *
5381
   * @link http://php.net/manual/en/function.mb-strrpos.php
5382
   *
5383
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5384
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5385 13
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5386
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5387 13
   *                              the end of the string.
5388
   *                              </p>
5389
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5390 13
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5391
   *
5392 13
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5393 3
   *                   is not found, it returns false.</p>
5394
   */
5395
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5396 11
  {
5397
    if ((int)$needle === $needle && $needle >= 0) {
5398
      $needle = (string)self::chr($needle);
5399 11
    }
5400 7
5401
    // init
5402
    $haystack = (string)$haystack;
5403 5
    $needle = (string)$needle;
5404 1
    $offset = (int)$offset;
5405
5406
    if (!isset($haystack[0], $needle[0])) {
5407
      return false;
5408 1
    }
5409 1
5410 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5411
        $cleanUtf8 === true
5412 1
        ||
5413 1
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5414
    ) {
5415
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5416 1
      $needle = self::clean($needle);
5417
      $haystack = self::clean($haystack);
5418
    }
5419 1
5420 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5421 5
        $encoding === 'UTF-8'
5422 5
        ||
5423 5
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5424
    ) {
5425 5
      $encoding = 'UTF-8';
5426
    } else {
5427 5
      $encoding = self::normalize_encoding($encoding);
5428 5
    }
5429
5430
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5431 5
      self::checkForSupport();
5432
    }
5433
5434 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5435 5
        $encoding !== 'UTF-8'
5436 5
        &&
5437
        self::$support['mbstring'] === false
5438 5
    ) {
5439 2
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5440
    }
5441 2
5442 2 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5443 2
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5444
      if ($returnTmp !== false) {
5445 2
        return $returnTmp;
5446 1
      }
5447
    }
5448 1
5449 1 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5450 1
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5451
      if ($returnTmp !== false) {
5452 1
        return $returnTmp;
5453
      }
5454
    }
5455
5456
    // fallback via vanilla php
5457
5458
    if ($offset > 0) {
5459
      $haystack = self::substr($haystack, $offset);
5460
    } elseif ($offset < 0) {
5461
      $haystack = self::substr($haystack, 0, $offset);
5462
      $offset = 0;
5463
    }
5464
5465
    $pos = strrpos($haystack, $needle);
5466
    if ($pos === false) {
5467 1
      return false;
5468 2
    }
5469
5470 5
    return $offset + self::strlen(substr($haystack, 0, $pos));
5471
  }
5472
5473
  /**
5474
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5475 5
   * mask.
5476
   *
5477
   * @param string $str    <p>The input string.</p>
5478
   * @param string $mask   <p>The mask of chars</p>
5479
   * @param int    $offset [optional]
5480 5
   * @param int    $length [optional]
5481 5
   *
5482 1
   * @return int
5483 1
   */
5484
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5485 1
  {
5486 1
    // init
5487 1
    $length = (int)$length;
5488
    $offset = (int)$offset;
5489 1
5490
    if ($offset || 2147483647 !== $length) {
5491 5
      $str = self::substr($str, $offset, $length);
5492 5
    }
5493 5
5494 5
    $str = (string)$str;
5495 1
    if (!isset($str[0], $mask[0])) {
5496
      return 0;
5497 5
    }
5498
5499 5
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5500
  }
5501
5502
  /**
5503
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5504
   *
5505
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5506
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5507
   * @param bool    $before_needle [optional] <p>
5508
   *                               If <b>TRUE</b>, strstr() returns the part of the
5509 2
   *                               haystack before the first occurrence of the needle (excluding the needle).
5510
   *                               </p>
5511 2
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5512
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5513 1
   *
5514
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5515
   */
5516 1
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5517 1
  {
5518
    $haystack = (string)$haystack;
5519 1
    $needle = (string)$needle;
5520
5521
    if (!isset($haystack[0], $needle[0])) {
5522 2
      return false;
5523
    }
5524 2
5525 1
    if ($cleanUtf8 === true) {
5526
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5527
      // if invalid characters are found in $haystack before $needle
5528 2
      $needle = self::clean($needle);
5529
      $haystack = self::clean($haystack);
5530
    }
5531
5532
    if ($encoding !== 'UTF-8') {
5533
      $encoding = self::normalize_encoding($encoding);
5534
    }
5535
5536
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5537
      self::checkForSupport();
5538
    }
5539
5540 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5541
        $encoding !== 'UTF-8'
5542 1
        &&
5543
        self::$support['mbstring'] === false
5544
    ) {
5545
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5546
    }
5547
5548
    if (self::$support['mbstring'] === true) {
5549
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5550
      if ($returnTmp !== false) {
5551
        return $returnTmp;
5552
      }
5553
    }
5554
5555
    if (self::$support['intl'] === true) {
5556
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5557
      if ($returnTmp !== false) {
5558
        return $returnTmp;
5559
      }
5560
    }
5561
5562
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5563
5564
    if (!isset($match[1])) {
5565
      return false;
5566
    }
5567
5568 20
    if ($before_needle) {
5569
      return $match[1];
5570 20
    }
5571 2
5572
    return self::substr($haystack, self::strlen($match[1]));
5573
  }
5574 2
5575 2
  /**
5576
   * Unicode transformation for case-less matching.
5577 2
   *
5578
   * @link http://unicode.org/reports/tr21/tr21-5.html
5579
   *
5580 20
   * @param string  $str       <p>The input string.</p>
5581
   * @param bool    $full      [optional] <p>
5582 20
   *                           <b>true</b>, replace full case folding chars (default)<br />
5583 4
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5584
   *                           </p>
5585
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5586 19
   *
5587 19
   * @return string
5588
   */
5589
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5590 19
  {
5591 19
    // init
5592
    $str = (string)$str;
5593 19
5594 19
    if (!isset($str[0])) {
5595 19
      return '';
5596 19
    }
5597
5598 19
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5599
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5600 16
5601 16
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5602 16
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5603 16
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5604 5
    }
5605 5
5606 5
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5607
5608
    if ($full) {
5609 19
5610
      static $fullCaseFold = null;
5611 17
5612 13
      if ($fullCaseFold === null) {
5613 13
        $fullCaseFold = self::getData('caseFolding_full');
5614 13
      }
5615 8
5616 8
      /** @noinspection OffsetOperationsInspection */
5617 8
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5618
    }
5619
5620 19
    if ($cleanUtf8 === true) {
5621
      $str = self::clean($str);
5622 9
    }
5623 4
5624 4
    return self::strtolower($str);
5625 4
  }
5626 6
5627 6
  /**
5628 6
   * Make a string lowercase.
5629
   *
5630
   * @link http://php.net/manual/en/function.mb-strtolower.php
5631 9
   *
5632 6
   * @param string  $str       <p>The string being lowercased.</p>
5633 6
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5634 6
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5635
   *
5636
   * @return string str with all alphabetic characters converted to lowercase.
5637 19
   */
5638 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5639 4
  {
5640 4
    // init
5641 2
    $str = (string)$str;
5642 2
5643 3
    if (!isset($str[0])) {
5644 3
      return '';
5645 3
    }
5646
5647
    if ($cleanUtf8 === true) {
5648 4
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5649 16
      // if invalid characters are found in $haystack before $needle
5650
      $str = self::clean($str);
5651 19
    }
5652
5653
    if ($encoding !== 'UTF-8') {
5654 19
      $encoding = self::normalize_encoding($encoding);
5655 19
    }
5656
5657 3
    return \mb_strtolower($str, $encoding);
5658 19
  }
5659
5660 19
  /**
5661
   * Generic case sensitive transformation for collation matching.
5662
   *
5663 19
   * @param string $str <p>The input string</p>
5664 19
   *
5665 19
   * @return string
5666 2
   */
5667 19
  private static function strtonatfold($str)
5668
  {
5669 19
    /** @noinspection PhpUndefinedClassInspection */
5670
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5671 19
  }
5672
5673
  /**
5674
   * Make a string uppercase.
5675
   *
5676
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5677
   *
5678
   * @param string  $str       <p>The string being uppercased.</p>
5679
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5680
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5681
   *
5682
   * @return string str with all alphabetic characters converted to uppercase.
5683
   */
5684 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5685
  {
5686
    $str = (string)$str;
5687 26
5688
    if (!isset($str[0])) {
5689 26
      return '';
5690
    }
5691 26
5692 5
    if ($cleanUtf8 === true) {
5693
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5694
      // if invalid characters are found in $haystack before $needle
5695
      $str = self::clean($str);
5696 22
    }
5697 6
5698
    if ($encoding !== 'UTF-8') {
5699
      $encoding = self::normalize_encoding($encoding);
5700 16
    }
5701
5702
    return \mb_strtoupper($str, $encoding);
5703
  }
5704
5705
  /**
5706
   * Translate characters or replace sub-strings.
5707
   *
5708
   * @link  http://php.net/manual/en/function.strtr.php
5709
   *
5710
   * @param string          $str  <p>The string being translated.</p>
5711
   * @param string|string[] $from <p>The string replacing from.</p>
5712 14
   * @param string|string[] $to   <p>The string being translated to to.</p>
5713
   *
5714 14
   * @return string <p>
5715
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5716
   *                corresponding character in to.
5717
   *                </p>
5718
   */
5719
  public static function strtr($str, $from, $to = INF)
5720
  {
5721
    if (INF !== $to) {
5722
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5722 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5723
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5723 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5724
      $countFrom = count($from);
5725
      $countTo = count($to);
5726
5727
      if ($countFrom > $countTo) {
5728 1
        $from = array_slice($from, 0, $countTo);
5729
      } elseif ($countFrom < $countTo) {
5730 1
        $to = array_slice($to, 0, $countFrom);
5731
      }
5732
5733
      $from = array_combine($from, $to);
5734
    }
5735
5736
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5719 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5737
  }
5738
5739
  /**
5740
   * Return the width of a string.
5741
   *
5742
   * @param string  $str       <p>The input string.</p>
5743
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5744 8
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5745
   *
5746 8
   * @return int
5747 2
   */
5748
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5749
  {
5750 7
    if ($encoding !== 'UTF-8') {
5751 7
      $encoding = self::normalize_encoding($encoding);
5752 7
    }
5753
5754 7
    if ($cleanUtf8 === true) {
5755 1
      // iconv and mbstring are not tolerant to invalid encoding
5756 1
      // further, their behaviour is inconsistent with that of PHP's substr
5757 7
      $str = self::clean($str);
5758
    }
5759
5760 7
    // fallback to "mb_"-function via polyfill
5761
    return \mb_strwidth($str, $encoding);
5762 7
  }
5763 7
5764
  /**
5765
   * Get part of a string.
5766
   *
5767 7
   * @link http://php.net/manual/en/function.mb-substr.php
5768
   *
5769
   * @param string  $str       <p>The string being checked.</p>
5770
   * @param int     $start     <p>The first position used in str.</p>
5771 1
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5772 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5773 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5774 7
   *
5775 7
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5776 7
   */
5777
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5778 7
  {
5779 7
    // init
5780
    $str = (string)$str;
5781 7
5782
    if (!isset($str[0])) {
5783
      return '';
5784
    }
5785
5786
    if ($cleanUtf8 === true) {
5787
      // iconv and mbstring are not tolerant to invalid encoding
5788
      // further, their behaviour is inconsistent with that of PHP's substr
5789
      $str = self::clean($str);
5790
    }
5791
5792
    $str_length = 0;
5793
    if ($start || $length === null) {
5794
      $str_length = (int)self::strlen($str);
5795
    }
5796
5797
    if ($start && $start > $str_length) {
5798
      return false;
5799
    }
5800
5801 1
    if ($length === null) {
5802
      $length = $str_length;
5803 1
    } else {
5804
      $length = (int)$length;
5805 1
    }
5806 1
5807 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5808
        $encoding === 'UTF-8'
5809 1
        ||
5810
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5811 1
    ) {
5812
      $encoding = 'UTF-8';
5813 1
    } else {
5814 1
      $encoding = self::normalize_encoding($encoding);
5815 1
    }
5816 1
5817
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5818 1
      self::checkForSupport();
5819 1
    }
5820 1
5821 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5822 1
        $encoding !== 'UTF-8'
5823
        &&
5824
        self::$support['mbstring'] === false
5825
    ) {
5826
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5827
    }
5828
5829
    if (self::$support['mbstring'] === true) {
5830 1
      return \mb_substr($str, $start, $length, $encoding);
5831
    }
5832
5833
    if (
5834
        $length >= 0 // "iconv_substr()" can't handle negative length
5835
        &&
5836
        self::$support['iconv'] === true
5837
    ) {
5838
      return \iconv_substr($str, $start, $length);
5839
    }
5840
5841
    if (self::$support['intl'] === true) {
5842
      return \grapheme_substr($str, $start, $length);
5843
    }
5844
5845
    // fallback via vanilla php
5846
5847
    // split to array, and remove invalid characters
5848
    $array = self::split($str);
5849
5850
    // extract relevant part, and join to make sting again
5851
    return implode('', array_slice($array, $start, $length));
5852
  }
5853
5854
  /**
5855
   * Binary safe comparison of two strings from an offset, up to length characters.
5856
   *
5857
   * @param string  $main_str           <p>The main string being compared.</p>
5858
   * @param string  $str                <p>The secondary string being compared.</p>
5859
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5860
   *                                    the end of the string.</p>
5861
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5862
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5863
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5864
   *                                    insensitive.</p>
5865
   *
5866
   * @return int
5867
   */
5868
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5869
  {
5870
    $main_str = self::substr($main_str, $offset, $length);
5871
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5870 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5872
5873
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5870 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5871 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5870 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5871 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5874
  }
5875
5876
  /**
5877
   * Count the number of substring occurrences.
5878
   *
5879
   * @link  http://php.net/manual/en/function.substr-count.php
5880
   *
5881
   * @param string  $haystack  <p>The string to search in.</p>
5882
   * @param string  $needle    <p>The substring to search for.</p>
5883
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5884
   * @param int     $length    [optional] <p>
5885
   *                           The maximum length after the specified offset to search for the
5886
   *                           substring. It outputs a warning if the offset plus the length is
5887
   *                           greater than the haystack length.
5888
   *                           </p>
5889
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5890
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5891
   *
5892
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5893
   */
5894
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5895
  {
5896
    // init
5897
    $haystack = (string)$haystack;
5898
    $needle = (string)$needle;
5899
5900
    if (!isset($haystack[0], $needle[0])) {
5901
      return false;
5902
    }
5903
5904
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5905
      $offset = (int)$offset;
5906
      $length = (int)$length;
5907
5908
      if (
5909
          $length + $offset <= 0
5910
          &&
5911
          Bootup::is_php('7.1') === false
5912
      ) {
5913
        return false;
5914
      }
5915
5916
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5917
    }
5918
5919
    if ($encoding !== 'UTF-8') {
5920
      $encoding = self::normalize_encoding($encoding);
5921
    }
5922
5923
    if ($cleanUtf8 === true) {
5924
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5925
      // if invalid characters are found in $haystack before $needle
5926
      $needle = self::clean($needle);
5927
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5928
    }
5929
5930
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5931
      self::checkForSupport();
5932
    }
5933
5934 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5935
        $encoding !== 'UTF-8'
5936
        &&
5937
        self::$support['mbstring'] === false
5938
    ) {
5939
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5940
    }
5941
5942
    if (self::$support['mbstring'] === true) {
5943
      return \mb_substr_count($haystack, $needle, $encoding);
5944
    }
5945
5946
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
5947
5948
    return count($matches);
5949
  }
5950
5951
  /**
5952
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
5953
   *
5954
   * @param string $haystack <p>The string to search in.</p>
5955
   * @param string $needle   <p>The substring to search for.</p>
5956
   *
5957
   * @return string <p>Return the sub-string.</p>
5958
   */
5959 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5960
  {
5961
    // init
5962
    $haystack = (string)$haystack;
5963
    $needle = (string)$needle;
5964
5965
    if (!isset($haystack[0])) {
5966
      return '';
5967
    }
5968
5969
    if (!isset($needle[0])) {
5970
      return $haystack;
5971
    }
5972
5973
    if (self::str_istarts_with($haystack, $needle) === true) {
5974
      $haystack = self::substr($haystack, self::strlen($needle));
5975
    }
5976
5977
    return $haystack;
5978
  }
5979
5980
  /**
5981
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
5982
   *
5983
   * @param string $haystack <p>The string to search in.</p>
5984
   * @param string $needle   <p>The substring to search for.</p>
5985
   *
5986
   * @return string <p>Return the sub-string.</p>
5987
   */
5988 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5989
  {
5990
    // init
5991
    $haystack = (string)$haystack;
5992
    $needle = (string)$needle;
5993
5994
    if (!isset($haystack[0])) {
5995
      return '';
5996
    }
5997
5998
    if (!isset($needle[0])) {
5999
      return $haystack;
6000
    }
6001
6002
    if (self::str_iends_with($haystack, $needle) === true) {
6003
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6004
    }
6005
6006
    return $haystack;
6007
  }
6008
6009
  /**
6010
   * Removes an prefix ($needle) from start of the string ($haystack).
6011
   *
6012
   * @param string $haystack <p>The string to search in.</p>
6013
   * @param string $needle   <p>The substring to search for.</p>
6014
   *
6015
   * @return string <p>Return the sub-string.</p>
6016
   */
6017 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6018
  {
6019
    // init
6020
    $haystack = (string)$haystack;
6021
    $needle = (string)$needle;
6022
6023
    if (!isset($haystack[0])) {
6024
      return '';
6025
    }
6026
6027
    if (!isset($needle[0])) {
6028
      return $haystack;
6029
    }
6030
6031
    if (self::str_starts_with($haystack, $needle) === true) {
6032
      $haystack = self::substr($haystack, self::strlen($needle));
6033
    }
6034
6035
    return $haystack;
6036
  }
6037
6038
  /**
6039
   * Replace text within a portion of a string.
6040
   *
6041
   * source: https://gist.github.com/stemar/8287074
6042
   *
6043
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6044
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6045
   * @param int|int[]       $start            <p>
6046
   *                                          If start is positive, the replacing will begin at the start'th offset
6047
   *                                          into string.
6048
   *                                          <br /><br />
6049
   *                                          If start is negative, the replacing will begin at the start'th character
6050
   *                                          from the end of string.
6051
   *                                          </p>
6052
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6053
   *                                          portion of string which is to be replaced. If it is negative, it
6054
   *                                          represents the number of characters from the end of string at which to
6055
   *                                          stop replacing. If it is not given, then it will default to strlen(
6056
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6057 1
   *                                          length is zero then this function will have the effect of inserting
6058
   *                                          replacement into string at the given start offset.</p>
6059 1
   *
6060
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6061
   */
6062
  public static function substr_replace($str, $replacement, $start, $length = null)
6063
  {
6064
    if (is_array($str)) {
6065
      $num = count($str);
6066
6067
      // $replacement
6068
      if (is_array($replacement)) {
6069 6
        $replacement = array_slice($replacement, 0, $num);
6070
      } else {
6071 6
        $replacement = array_pad(array($replacement), $num, $replacement);
6072 6
      }
6073
6074 6
      // $start
6075 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6076 6
        $start = array_slice($start, 0, $num);
6077 3
        foreach ($start as &$valueTmp) {
6078
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6079
        }
6080
        unset($valueTmp);
6081 6
      } else {
6082
        $start = array_pad(array($start), $num, $start);
6083 6
      }
6084 1
6085 1
      // $length
6086 1
      if (!isset($length)) {
6087
        $length = array_fill(0, $num, 0);
6088 6 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6089
        $length = array_slice($length, 0, $num);
6090
        foreach ($length as &$valueTmpV2) {
6091
          if (isset($valueTmpV2)) {
6092
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6093
          } else {
6094
            $valueTmpV2 = 0;
6095
          }
6096
        }
6097
        unset($valueTmpV2);
6098 6
      } else {
6099
        $length = array_pad(array($length), $num, $length);
6100 6
      }
6101
6102 6
      // Recursive call
6103 6
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6104
6105
    } else {
6106 5
6107 5
      if (is_array($replacement)) {
6108
        if (count($replacement) > 0) {
6109 5
          $replacement = $replacement[0];
6110 1
        } else {
6111 1
          $replacement = '';
6112 1
        }
6113
      }
6114 5
    }
6115
6116
    // init
6117
    $str = (string)$str;
6118
    $replacement = (string)$replacement;
6119
6120
    if (!isset($str[0])) {
6121
      return $replacement;
6122
    }
6123
6124
    preg_match_all('/./us', $str, $smatches);
6125
    preg_match_all('/./us', $replacement, $rmatches);
6126
6127
    if ($length === null) {
6128
      $length = (int)self::strlen($str);
6129
    }
6130
6131
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6132
6133
    return implode('', $smatches[0]);
6134
  }
6135
6136
  /**
6137
   * Removes an suffix ($needle) from end of the string ($haystack).
6138
   *
6139
   * @param string $haystack <p>The string to search in.</p>
6140
   * @param string $needle   <p>The substring to search for.</p>
6141
   *
6142
   * @return string <p>Return the sub-string.</p>
6143
   */
6144 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6145
  {
6146 1
    $haystack = (string)$haystack;
6147
    $needle = (string)$needle;
6148
6149
    if (!isset($haystack[0])) {
6150
      return '';
6151
    }
6152
6153
    if (!isset($needle[0])) {
6154
      return $haystack;
6155
    }
6156
6157
    if (self::str_ends_with($haystack, $needle) === true) {
6158 1
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6159
    }
6160 1
6161
    return $haystack;
6162 1
  }
6163 1
6164
  /**
6165
   * Returns a case swapped version of the string.
6166 1
   *
6167
   * @param string  $str       <p>The input string.</p>
6168 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6169 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6170
   *
6171
   * @return string <p>Each character's case swapped.</p>
6172 1
   */
6173
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6174
  {
6175 1
    $str = (string)$str;
6176 1
6177 1
    if (!isset($str[0])) {
6178 1
      return '';
6179 1
    }
6180
6181
    if ($encoding !== 'UTF-8') {
6182 1
      $encoding = self::normalize_encoding($encoding);
6183
    }
6184
6185
    if ($cleanUtf8 === true) {
6186
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6187
      // if invalid characters are found in $haystack before $needle
6188
      $str = self::clean($str);
6189
    }
6190
6191
    $strSwappedCase = preg_replace_callback(
6192
        '/[\S]/u',
6193
        function ($match) use ($encoding) {
6194
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6195
6196
          if ($match[0] === $marchToUpper) {
6197
            return UTF8::strtolower($match[0], $encoding);
6198
          } else {
6199
            return $marchToUpper;
6200
          }
6201 10
        },
6202
        $str
6203 10
    );
6204 10
6205
    return $strSwappedCase;
6206 10
  }
6207 3
6208
  /**
6209
   * alias for "UTF8::to_ascii()"
6210 8
   *
6211 8
   * @see UTF8::to_ascii()
6212 8
   *
6213
   * @param string $s
6214 8
   * @param string $subst_chr
6215
   * @param bool   $strict
6216 8
   *
6217
   * @return string
6218 8
   *
6219 1
   * @deprecated
6220 1
   */
6221 1
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6222
  {
6223 8
    return self::to_ascii($s, $subst_chr, $strict);
6224 8
  }
6225
6226 8
  /**
6227 8
   * alias for "UTF8::to_iso8859()"
6228 8
   *
6229 8
   * @see UTF8::to_iso8859()
6230 8
   *
6231
   * @param string $str
6232 8
   *
6233 8
   * @return string|string[]
6234 8
   *
6235 8
   * @deprecated
6236
   */
6237 8
  public static function toIso8859($str)
6238 6
  {
6239 6
    return self::to_iso8859($str);
6240 6
  }
6241 6
6242
  /**
6243 6
   * alias for "UTF8::to_latin1()"
6244 3
   *
6245 3
   * @see UTF8::to_latin1()
6246
   *
6247 6
   * @param $str
6248 6
   *
6249
   * @return string
6250 8
   *
6251
   * @deprecated
6252
   */
6253
  public static function toLatin1($str)
6254
  {
6255
    return self::to_latin1($str);
6256
  }
6257
6258 1
  /**
6259
   * alias for "UTF8::to_utf8()"
6260 1
   *
6261
   * @see UTF8::to_utf8()
6262
   *
6263
   * @param string $str
6264
   *
6265
   * @return string
6266
   *
6267
   * @deprecated
6268
   */
6269
  public static function toUTF8($str)
6270
  {
6271
    return self::to_utf8($str);
6272
  }
6273
6274
  /**
6275
   * Convert a string into ASCII.
6276
   *
6277
   * @param string $str     <p>The input string.</p>
6278
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6279
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6280
   *                        performance</p>
6281
   *
6282
   * @return string
6283
   *
6284
   * @throws \Exception
6285
   */
6286
  public static function to_ascii($str, $unknown = '?', $strict = false)
6287
  {
6288
    static $UTF8_TO_ASCII;
6289
6290
    // init
6291
    $str = (string)$str;
6292
6293
    if (!isset($str[0])) {
6294
      return '';
6295
    }
6296
6297
    $str = self::clean($str, true, true, true);
6298
6299
    // check if we only have ASCII
6300
    if (self::is_ascii($str) === true) {
6301
      return $str;
6302
    }
6303
6304
    if ($strict === true) {
6305
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6306
        self::checkForSupport();
6307
      }
6308
6309
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6310
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6311
6312
        // check again, if we only have ASCII, now ...
6313
        if (self::is_ascii($str) === true) {
6314
          return $str;
6315
        }
6316
6317
      } else {
6318
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6319
      }
6320
    }
6321
6322
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6323
    $chars = $ar[0];
6324
    foreach ($chars as &$c) {
6325
6326
      $ordC0 = ord($c[0]);
6327
6328
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6329
        continue;
6330
      }
6331
6332
      $ordC1 = ord($c[1]);
6333
6334
      // ASCII - next please
6335
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6336
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6337
      }
6338
6339
      if ($ordC0 >= 224) {
6340
        $ordC2 = ord($c[2]);
6341
6342
        if ($ordC0 <= 239) {
6343
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6344
        }
6345
6346
        if ($ordC0 >= 240) {
6347
          $ordC3 = ord($c[3]);
6348
6349
          if ($ordC0 <= 247) {
6350
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6351
          }
6352
6353
          if ($ordC0 >= 248) {
6354
            $ordC4 = ord($c[4]);
6355
6356 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6357
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6358
            }
6359
6360
            if ($ordC0 >= 252) {
6361
              $ordC5 = ord($c[5]);
6362
6363 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6364
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6365
              }
6366
            }
6367
          }
6368
        }
6369
      }
6370
6371
      if ($ordC0 >= 254 && $ordC0 <= 255) {
6372
        $c = $unknown;
6373
        continue;
6374
      }
6375
6376
      if (!isset($ord)) {
6377
        $c = $unknown;
6378
        continue;
6379
      }
6380
6381
      $bank = $ord >> 8;
6382
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
6383
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
6384
        if (file_exists($bankfile)) {
6385
          /** @noinspection PhpIncludeInspection */
6386
          require $bankfile;
6387
        } else {
6388
          $UTF8_TO_ASCII[$bank] = array();
6389
        }
6390
      }
6391
6392
      $newchar = $ord & 255;
6393
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
6394
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6395
      } else {
6396
        $c = $unknown;
6397
      }
6398
    }
6399
6400
    return implode('', $chars);
6401
  }
6402
6403
  /**
6404
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6405
   *
6406
   * @param string|string[] $str
6407
   *
6408
   * @return string|string[]
6409
   */
6410
  public static function to_iso8859($str)
6411
  {
6412
    if (is_array($str)) {
6413
6414
      /** @noinspection ForeachSourceInspection */
6415
      foreach ($str as $k => $v) {
6416
        /** @noinspection AlterInForeachInspection */
6417
        /** @noinspection OffsetOperationsInspection */
6418
        $str[$k] = self::to_iso8859($v);
6419
      }
6420
6421
      return $str;
6422
    }
6423
6424
    $str = (string)$str;
6425
6426
    if (!isset($str[0])) {
6427
      return '';
6428
    }
6429
6430
    return self::utf8_decode($str);
6431
  }
6432
6433
  /**
6434
   * alias for "UTF8::to_iso8859()"
6435
   *
6436
   * @see UTF8::to_iso8859()
6437
   *
6438
   * @param string|string[] $str
6439
   *
6440
   * @return string|string[]
6441
   */
6442
  public static function to_latin1($str)
6443
  {
6444
    return self::to_iso8859($str);
6445
  }
6446
6447
  /**
6448
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
6449
   *
6450
   * - It decode UTF-8 codepoints and unicode escape sequences.
6451
   *
6452
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
6453
   *
6454
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
6455
   *
6456
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
6457
   *    are followed by any of these:  ("group B")
6458
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
6459
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
6460
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
6461
   * is also a valid unicode character, and will be left unchanged.
6462
   *
6463
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
6464
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
6465
   *
6466
   * @param string|string[] $str                    <p>Any string or array.</p>
6467
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6468
   *
6469
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6470
   */
6471
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6472
  {
6473
    if (is_array($str)) {
6474
      /** @noinspection ForeachSourceInspection */
6475
      foreach ($str as $k => $v) {
6476
        /** @noinspection AlterInForeachInspection */
6477
        /** @noinspection OffsetOperationsInspection */
6478
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6479
      }
6480
6481
      return $str;
6482
    }
6483
6484
    $str = (string)$str;
6485
6486
    if (!isset($str[0])) {
6487
      return $str;
6488
    }
6489
6490
    $max = strlen($str);
6491
    $buf = '';
6492
6493
    /** @noinspection ForeachInvariantsInspection */
6494
    for ($i = 0; $i < $max; $i++) {
6495
      $c1 = $str[$i];
6496
6497
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6498
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6499
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6500
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6501
6502
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6503
6504
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6505
            $buf .= $c1 . $c2;
6506
            $i++;
6507
          } else { // not valid UTF8 - convert it
6508
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6509
            $cc2 = ($c1 & "\x3f") | "\x80";
6510
            $buf .= $cc1 . $cc2;
6511
          }
6512
6513 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6514
6515
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6516
            $buf .= $c1 . $c2 . $c3;
6517
            $i += 2;
6518
          } else { // not valid UTF8 - convert it
6519
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6520
            $cc2 = ($c1 & "\x3f") | "\x80";
6521
            $buf .= $cc1 . $cc2;
6522
          }
6523
6524
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6525
6526 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6527
            $buf .= $c1 . $c2 . $c3 . $c4;
6528
            $i += 3;
6529
          } else { // not valid UTF8 - convert it
6530
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6531
            $cc2 = ($c1 & "\x3f") | "\x80";
6532
            $buf .= $cc1 . $cc2;
6533
          }
6534
6535
        } else { // doesn't look like UTF8, but should be converted
6536
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6537
          $cc2 = (($c1 & "\x3f") | "\x80");
6538
          $buf .= $cc1 . $cc2;
6539
        }
6540
6541
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6542
6543
        $ordC1 = ord($c1);
6544
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6545
          $buf .= self::$win1252ToUtf8[$ordC1];
6546
        } else {
6547
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6548
          $cc2 = (($c1 & "\x3f") | "\x80");
6549
          $buf .= $cc1 . $cc2;
6550
        }
6551
6552
      } else { // it doesn't need conversion
6553
        $buf .= $c1;
6554
      }
6555
    }
6556
6557
    // decode unicode escape sequences
6558
    $buf = preg_replace_callback(
6559
        '/\\\\u([0-9a-f]{4})/i',
6560
        function ($match) {
6561
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6562
        },
6563
        $buf
6564
    );
6565
6566
    // decode UTF-8 codepoints
6567
    if ($decodeHtmlEntityToUtf8 === true) {
6568
      $buf = self::html_entity_decode($buf, ENT_QUOTES);
6569
    }
6570
6571
    return $buf;
6572
  }
6573
6574
  /**
6575
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6576
   *
6577
   * INFO: This is slower then "trim()"
6578
   *
6579
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6580
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6581
   *
6582
   * @param string $str   <p>The string to be trimmed</p>
6583
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6584
   *
6585
   * @return string <p>The trimmed string.</p>
6586
   */
6587
  public static function trim($str = '', $chars = INF)
6588
  {
6589
    $str = (string)$str;
6590
6591
    if (!isset($str[0])) {
6592
      return '';
6593
    }
6594
6595
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6596
    if ($chars === INF || !$chars) {
6597
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6598
    }
6599
6600
    return self::rtrim(self::ltrim($str, $chars), $chars);
6601
  }
6602
6603
  /**
6604
   * Makes string's first char uppercase.
6605
   *
6606
   * @param string  $str       <p>The input string.</p>
6607
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6608
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6609
   *
6610
   * @return string <p>The resulting string</p>
6611
   */
6612
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6613
  {
6614
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6615
  }
6616
6617
  /**
6618
   * alias for "UTF8::ucfirst()"
6619
   *
6620
   * @see UTF8::ucfirst()
6621
   *
6622
   * @param string  $word
6623
   * @param string  $encoding
6624
   * @param boolean $cleanUtf8
6625
   *
6626
   * @return string
6627
   */
6628
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6629
  {
6630
    return self::ucfirst($word, $encoding, $cleanUtf8);
6631
  }
6632
6633
  /**
6634
   * Uppercase for all words in the string.
6635
   *
6636
   * @param string   $str        <p>The input string.</p>
6637
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6638
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6639
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6640
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6641
   *
6642
   * @return string
6643
   */
6644
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6645
  {
6646
    if (!$str) {
6647
      return '';
6648
    }
6649
6650
    $words = self::str_to_words($str, $charlist);
6651
    $newwords = array();
6652
6653
    if (count($exceptions) > 0) {
6654
      $useExceptions = true;
6655
    } else {
6656
      $useExceptions = false;
6657
    }
6658
6659
    foreach ($words as $word) {
6660
6661
      if (!$word) {
6662
        continue;
6663
      }
6664
6665
      if (
6666
          ($useExceptions === false)
6667
          ||
6668
          (
6669
              $useExceptions === true
6670
              &&
6671
              !in_array($word, $exceptions, true)
6672
          )
6673
      ) {
6674
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6675
      }
6676
6677
      $newwords[] = $word;
6678
    }
6679
6680
    return implode('', $newwords);
6681
  }
6682
6683
  /**
6684
   * Multi decode html entity & fix urlencoded-win1252-chars.
6685
   *
6686
   * e.g:
6687
   * 'test+test'                     => 'test test'
6688
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6689
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6690
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6691
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6692
   * 'Düsseldorf'                   => 'Düsseldorf'
6693
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6694
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6695
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6696
   *
6697
   * @param string $str          <p>The input string.</p>
6698
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6699
   *
6700
   * @return string
6701
   */
6702 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6703
  {
6704
    $str = (string)$str;
6705
6706
    if (!isset($str[0])) {
6707
      return '';
6708
    }
6709
6710
    $pattern = '/%u([0-9a-f]{3,4})/i';
6711
    if (preg_match($pattern, $str)) {
6712
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6713
    }
6714
6715
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6716
6717
    do {
6718
      $str_compare = $str;
6719
6720
      $str = self::fix_simple_utf8(
6721
          urldecode(
6722
              self::html_entity_decode(
6723
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6724
                  $flags
6725
              )
6726
          )
6727
      );
6728
6729
    } while ($multi_decode === true && $str_compare !== $str);
6730
6731
    return (string)$str;
6732
  }
6733
6734
  /**
6735
   * Return a array with "urlencoded"-win1252 -> UTF-8
6736
   *
6737
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6738
   *
6739
   * @return array
6740
   */
6741
  public static function urldecode_fix_win1252_chars()
6742
  {
6743
    static $array = array(
6744
        '%20' => ' ',
6745
        '%21' => '!',
6746
        '%22' => '"',
6747
        '%23' => '#',
6748
        '%24' => '$',
6749
        '%25' => '%',
6750
        '%26' => '&',
6751
        '%27' => "'",
6752
        '%28' => '(',
6753
        '%29' => ')',
6754
        '%2A' => '*',
6755
        '%2B' => '+',
6756
        '%2C' => ',',
6757
        '%2D' => '-',
6758
        '%2E' => '.',
6759
        '%2F' => '/',
6760
        '%30' => '0',
6761
        '%31' => '1',
6762
        '%32' => '2',
6763
        '%33' => '3',
6764
        '%34' => '4',
6765
        '%35' => '5',
6766
        '%36' => '6',
6767
        '%37' => '7',
6768
        '%38' => '8',
6769
        '%39' => '9',
6770
        '%3A' => ':',
6771
        '%3B' => ';',
6772
        '%3C' => '<',
6773
        '%3D' => '=',
6774
        '%3E' => '>',
6775
        '%3F' => '?',
6776
        '%40' => '@',
6777
        '%41' => 'A',
6778
        '%42' => 'B',
6779
        '%43' => 'C',
6780
        '%44' => 'D',
6781
        '%45' => 'E',
6782
        '%46' => 'F',
6783
        '%47' => 'G',
6784
        '%48' => 'H',
6785
        '%49' => 'I',
6786
        '%4A' => 'J',
6787
        '%4B' => 'K',
6788
        '%4C' => 'L',
6789
        '%4D' => 'M',
6790
        '%4E' => 'N',
6791
        '%4F' => 'O',
6792
        '%50' => 'P',
6793
        '%51' => 'Q',
6794
        '%52' => 'R',
6795
        '%53' => 'S',
6796
        '%54' => 'T',
6797
        '%55' => 'U',
6798
        '%56' => 'V',
6799
        '%57' => 'W',
6800
        '%58' => 'X',
6801
        '%59' => 'Y',
6802
        '%5A' => 'Z',
6803
        '%5B' => '[',
6804
        '%5C' => '\\',
6805
        '%5D' => ']',
6806
        '%5E' => '^',
6807
        '%5F' => '_',
6808
        '%60' => '`',
6809
        '%61' => 'a',
6810
        '%62' => 'b',
6811
        '%63' => 'c',
6812
        '%64' => 'd',
6813
        '%65' => 'e',
6814
        '%66' => 'f',
6815
        '%67' => 'g',
6816
        '%68' => 'h',
6817
        '%69' => 'i',
6818
        '%6A' => 'j',
6819
        '%6B' => 'k',
6820
        '%6C' => 'l',
6821
        '%6D' => 'm',
6822
        '%6E' => 'n',
6823
        '%6F' => 'o',
6824
        '%70' => 'p',
6825
        '%71' => 'q',
6826
        '%72' => 'r',
6827
        '%73' => 's',
6828
        '%74' => 't',
6829
        '%75' => 'u',
6830
        '%76' => 'v',
6831
        '%77' => 'w',
6832
        '%78' => 'x',
6833
        '%79' => 'y',
6834
        '%7A' => 'z',
6835
        '%7B' => '{',
6836
        '%7C' => '|',
6837
        '%7D' => '}',
6838
        '%7E' => '~',
6839
        '%7F' => '',
6840
        '%80' => '`',
6841
        '%81' => '',
6842
        '%82' => '‚',
6843
        '%83' => 'ƒ',
6844
        '%84' => '„',
6845
        '%85' => '…',
6846
        '%86' => '†',
6847
        '%87' => '‡',
6848
        '%88' => 'ˆ',
6849
        '%89' => '‰',
6850
        '%8A' => 'Š',
6851
        '%8B' => '‹',
6852
        '%8C' => 'Œ',
6853
        '%8D' => '',
6854
        '%8E' => 'Ž',
6855
        '%8F' => '',
6856
        '%90' => '',
6857
        '%91' => '‘',
6858
        '%92' => '’',
6859
        '%93' => '“',
6860
        '%94' => '”',
6861
        '%95' => '•',
6862
        '%96' => '–',
6863
        '%97' => '—',
6864
        '%98' => '˜',
6865
        '%99' => '™',
6866
        '%9A' => 'š',
6867
        '%9B' => '›',
6868
        '%9C' => 'œ',
6869
        '%9D' => '',
6870
        '%9E' => 'ž',
6871
        '%9F' => 'Ÿ',
6872
        '%A0' => '',
6873
        '%A1' => '¡',
6874
        '%A2' => '¢',
6875
        '%A3' => '£',
6876
        '%A4' => '¤',
6877
        '%A5' => '¥',
6878
        '%A6' => '¦',
6879
        '%A7' => '§',
6880
        '%A8' => '¨',
6881
        '%A9' => '©',
6882
        '%AA' => 'ª',
6883
        '%AB' => '«',
6884
        '%AC' => '¬',
6885
        '%AD' => '',
6886
        '%AE' => '®',
6887
        '%AF' => '¯',
6888
        '%B0' => '°',
6889
        '%B1' => '±',
6890
        '%B2' => '²',
6891
        '%B3' => '³',
6892
        '%B4' => '´',
6893
        '%B5' => 'µ',
6894
        '%B6' => '¶',
6895
        '%B7' => '·',
6896
        '%B8' => '¸',
6897
        '%B9' => '¹',
6898
        '%BA' => 'º',
6899
        '%BB' => '»',
6900
        '%BC' => '¼',
6901
        '%BD' => '½',
6902
        '%BE' => '¾',
6903
        '%BF' => '¿',
6904
        '%C0' => 'À',
6905
        '%C1' => 'Á',
6906
        '%C2' => 'Â',
6907
        '%C3' => 'Ã',
6908
        '%C4' => 'Ä',
6909
        '%C5' => 'Å',
6910
        '%C6' => 'Æ',
6911
        '%C7' => 'Ç',
6912
        '%C8' => 'È',
6913
        '%C9' => 'É',
6914
        '%CA' => 'Ê',
6915
        '%CB' => 'Ë',
6916
        '%CC' => 'Ì',
6917
        '%CD' => 'Í',
6918
        '%CE' => 'Î',
6919
        '%CF' => 'Ï',
6920
        '%D0' => 'Ð',
6921
        '%D1' => 'Ñ',
6922
        '%D2' => 'Ò',
6923
        '%D3' => 'Ó',
6924
        '%D4' => 'Ô',
6925
        '%D5' => 'Õ',
6926
        '%D6' => 'Ö',
6927
        '%D7' => '×',
6928
        '%D8' => 'Ø',
6929
        '%D9' => 'Ù',
6930
        '%DA' => 'Ú',
6931
        '%DB' => 'Û',
6932
        '%DC' => 'Ü',
6933
        '%DD' => 'Ý',
6934
        '%DE' => 'Þ',
6935
        '%DF' => 'ß',
6936
        '%E0' => 'à',
6937
        '%E1' => 'á',
6938
        '%E2' => 'â',
6939
        '%E3' => 'ã',
6940
        '%E4' => 'ä',
6941
        '%E5' => 'å',
6942
        '%E6' => 'æ',
6943
        '%E7' => 'ç',
6944
        '%E8' => 'è',
6945
        '%E9' => 'é',
6946
        '%EA' => 'ê',
6947
        '%EB' => 'ë',
6948
        '%EC' => 'ì',
6949
        '%ED' => 'í',
6950
        '%EE' => 'î',
6951
        '%EF' => 'ï',
6952
        '%F0' => 'ð',
6953
        '%F1' => 'ñ',
6954
        '%F2' => 'ò',
6955
        '%F3' => 'ó',
6956
        '%F4' => 'ô',
6957
        '%F5' => 'õ',
6958
        '%F6' => 'ö',
6959
        '%F7' => '÷',
6960
        '%F8' => 'ø',
6961
        '%F9' => 'ù',
6962
        '%FA' => 'ú',
6963
        '%FB' => 'û',
6964
        '%FC' => 'ü',
6965
        '%FD' => 'ý',
6966
        '%FE' => 'þ',
6967
        '%FF' => 'ÿ',
6968
    );
6969
6970
    return $array;
6971
  }
6972
6973
  /**
6974
   * Decodes an UTF-8 string to ISO-8859-1.
6975
   *
6976
   * @param string $str <p>The input string.</p>
6977
   *
6978
   * @return string
6979
   */
6980
  public static function utf8_decode($str)
6981
  {
6982
    // init
6983
    $str = (string)$str;
6984
6985
    if (!isset($str[0])) {
6986
      return '';
6987
    }
6988
6989
    $str = (string)self::to_utf8($str);
6990
6991
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6992
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6993
6994
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6995
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
6996
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
6997
    }
6998
6999
    /** @noinspection PhpInternalEntityUsedInspection */
7000
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
7001
  }
7002
7003
  /**
7004
   * Encodes an ISO-8859-1 string to UTF-8.
7005
   *
7006
   * @param string $str <p>The input string.</p>
7007
   *
7008
   * @return string
7009
   */
7010
  public static function utf8_encode($str)
7011
  {
7012
    // init
7013
    $str = (string)$str;
7014
7015
    if (!isset($str[0])) {
7016
      return '';
7017
    }
7018
7019
    $str = \utf8_encode($str);
7020
7021
    if (false === strpos($str, "\xC2")) {
7022
      return $str;
7023
    } else {
7024
7025
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7026
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7027
7028
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7029
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
7030
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
7031
      }
7032
7033
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7034
    }
7035
  }
7036
7037
  /**
7038
   * fix -> utf8-win1252 chars
7039
   *
7040
   * @param string $str <p>The input string.</p>
7041
   *
7042
   * @return string
7043
   *
7044
   * @deprecated use "UTF8::fix_simple_utf8()"
7045
   */
7046
  public static function utf8_fix_win1252_chars($str)
7047
  {
7048
    return self::fix_simple_utf8($str);
7049
  }
7050
7051
  /**
7052
   * Returns an array with all utf8 whitespace characters.
7053
   *
7054
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7055
   *
7056
   * @author: Derek E. [email protected]
7057
   *
7058
   * @return array <p>
7059
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7060
   *               as defined in above URL.
7061
   *               </p>
7062
   */
7063
  public static function whitespace_table()
7064
  {
7065
    return self::$whitespaceTable;
7066
  }
7067
7068
  /**
7069
   * Limit the number of words in a string.
7070
   *
7071
   * @param string $str      <p>The input string.</p>
7072
   * @param int    $words    <p>The limit of words as integer.</p>
7073
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7074
   *
7075
   * @return string
7076
   */
7077
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7078
  {
7079
    $str = (string)$str;
7080
7081
    if (!isset($str[0])) {
7082
      return '';
7083
    }
7084
7085
    $words = (int)$words;
7086
7087
    if ($words < 1) {
7088
      return '';
7089
    }
7090
7091
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7092
7093
    if (
7094
        !isset($matches[0])
7095
        ||
7096
        self::strlen($str) === self::strlen($matches[0])
7097
    ) {
7098
      return $str;
7099
    }
7100
7101
    return self::rtrim($matches[0]) . $strAddOn;
7102
  }
7103
7104
  /**
7105
   * Wraps a string to a given number of characters
7106
   *
7107
   * @link  http://php.net/manual/en/function.wordwrap.php
7108
   *
7109
   * @param string $str   <p>The input string.</p>
7110
   * @param int    $width [optional] <p>The column width.</p>
7111
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7112
   * @param bool   $cut   [optional] <p>
7113
   *                      If the cut is set to true, the string is
7114
   *                      always wrapped at or before the specified width. So if you have
7115
   *                      a word that is larger than the given width, it is broken apart.
7116
   *                      </p>
7117
   *
7118
   * @return string <p>The given string wrapped at the specified column.</p>
7119
   */
7120
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7121
  {
7122
    $str = (string)$str;
7123
    $break = (string)$break;
7124
7125
    if (!isset($str[0], $break[0])) {
7126
      return '';
7127
    }
7128
7129
    $w = '';
7130
    $strSplit = explode($break, $str);
7131
    $count = count($strSplit);
7132
7133
    $chars = array();
7134
    /** @noinspection ForeachInvariantsInspection */
7135
    for ($i = 0; $i < $count; ++$i) {
7136
7137
      if ($i) {
7138
        $chars[] = $break;
7139
        $w .= '#';
7140
      }
7141
7142
      $c = $strSplit[$i];
7143
      unset($strSplit[$i]);
7144
7145
      foreach (self::split($c) as $c) {
7146
        $chars[] = $c;
7147
        $w .= ' ' === $c ? ' ' : '?';
7148
      }
7149
    }
7150
7151
    $strReturn = '';
7152
    $j = 0;
7153
    $b = $i = -1;
7154
    $w = wordwrap($w, $width, '#', $cut);
7155
7156
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7157
      for (++$i; $i < $b; ++$i) {
7158
        $strReturn .= $chars[$j];
7159
        unset($chars[$j++]);
7160
      }
7161
7162
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7163
        unset($chars[$j++]);
7164
      }
7165
7166
      $strReturn .= $break;
7167
    }
7168
7169
    return $strReturn . implode('', $chars);
7170
  }
7171
7172
  /**
7173
   * Returns an array of Unicode White Space characters.
7174
   *
7175
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7176
   */
7177
  public static function ws()
7178
  {
7179
    return self::$whitespace;
7180
  }
7181
7182
}
7183