Completed
Push — master ( d98de5...cd7fbc )
by Lars
02:41
created

UTF8::int_to_chr()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 0
cts 1
cp 0
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
93
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
94
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
96
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
98
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
99
  );
100
101
  /**
102
   * Numeric code point => UTF-8 Character
103
   *
104
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
105
   *
106
   * @var array
107
   */
108
  private static $whitespace = array(
109
    // NUL Byte
110
    0     => "\x0",
111
    // Tab
112
    9     => "\x9",
113
    // New Line
114
    10    => "\xa",
115
    // Vertical Tab
116
    11    => "\xb",
117
    // Carriage Return
118
    13    => "\xd",
119
    // Ordinary Space
120
    32    => "\x20",
121
    // NO-BREAK SPACE
122
    160   => "\xc2\xa0",
123
    // OGHAM SPACE MARK
124
    5760  => "\xe1\x9a\x80",
125
    // MONGOLIAN VOWEL SEPARATOR
126
    6158  => "\xe1\xa0\x8e",
127
    // EN QUAD
128
    8192  => "\xe2\x80\x80",
129
    // EM QUAD
130
    8193  => "\xe2\x80\x81",
131
    // EN SPACE
132
    8194  => "\xe2\x80\x82",
133
    // EM SPACE
134
    8195  => "\xe2\x80\x83",
135
    // THREE-PER-EM SPACE
136
    8196  => "\xe2\x80\x84",
137
    // FOUR-PER-EM SPACE
138
    8197  => "\xe2\x80\x85",
139
    // SIX-PER-EM SPACE
140
    8198  => "\xe2\x80\x86",
141
    // FIGURE SPACE
142
    8199  => "\xe2\x80\x87",
143
    // PUNCTUATION SPACE
144
    8200  => "\xe2\x80\x88",
145
    // THIN SPACE
146
    8201  => "\xe2\x80\x89",
147
    //HAIR SPACE
148
    8202  => "\xe2\x80\x8a",
149
    // LINE SEPARATOR
150
    8232  => "\xe2\x80\xa8",
151
    // PARAGRAPH SEPARATOR
152
    8233  => "\xe2\x80\xa9",
153
    // NARROW NO-BREAK SPACE
154
    8239  => "\xe2\x80\xaf",
155
    // MEDIUM MATHEMATICAL SPACE
156
    8287  => "\xe2\x81\x9f",
157
    // IDEOGRAPHIC SPACE
158
    12288 => "\xe3\x80\x80",
159
  );
160
161
  /**
162
   * @var array
163
   */
164
  private static $whitespaceTable = array(
165
      'SPACE'                     => "\x20",
166
      'NO-BREAK SPACE'            => "\xc2\xa0",
167
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
168
      'EN QUAD'                   => "\xe2\x80\x80",
169
      'EM QUAD'                   => "\xe2\x80\x81",
170
      'EN SPACE'                  => "\xe2\x80\x82",
171
      'EM SPACE'                  => "\xe2\x80\x83",
172
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
173
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
174
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
175
      'FIGURE SPACE'              => "\xe2\x80\x87",
176
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
177
      'THIN SPACE'                => "\xe2\x80\x89",
178
      'HAIR SPACE'                => "\xe2\x80\x8a",
179
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
180
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
181
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
182
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
183
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
184
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
185
  );
186
187
  /**
188
   * bidirectional text chars
189
   *
190
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
191
   *
192
   * @var array
193
   */
194
  private static $bidiUniCodeControlsTable = array(
195
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
196
    8234 => "\xE2\x80\xAA",
197
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
198
    8235 => "\xE2\x80\xAB",
199
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
200
    8236 => "\xE2\x80\xAC",
201
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
202
    8237 => "\xE2\x80\xAD",
203
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
204
    8238 => "\xE2\x80\xAE",
205
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
206
    8294 => "\xE2\x81\xA6",
207
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
208
    8295 => "\xE2\x81\xA7",
209
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
210
    8296 => "\xE2\x81\xA8",
211
    // POP DIRECTIONAL ISOLATE
212
    8297 => "\xE2\x81\xA9",
213
  );
214
215
  /**
216
   * @var array
217
   */
218
  private static $commonCaseFold = array(
219
      'ſ'            => 's',
220
      "\xCD\x85"     => 'ι',
221
      'ς'            => 'σ',
222
      "\xCF\x90"     => 'β',
223
      "\xCF\x91"     => 'θ',
224
      "\xCF\x95"     => 'φ',
225
      "\xCF\x96"     => 'π',
226
      "\xCF\xB0"     => 'κ',
227
      "\xCF\xB1"     => 'ρ',
228
      "\xCF\xB5"     => 'ε',
229
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
230
      "\xE1\xBE\xBE" => 'ι',
231
  );
232
233
  /**
234
   * @var array
235
   */
236
  private static $brokenUtf8ToUtf8 = array(
237
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
238
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
239
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
240
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
241
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
242
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
243
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
244
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
245
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
246
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
247
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
248
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
249
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
250
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
251
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
252
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
253
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
254
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
255
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
256
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
257
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
258
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
259
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
260
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
261
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
262
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
263
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
264
      'ü'       => 'ü',
265
      'ä'       => 'ä',
266
      'ö'       => 'ö',
267
      'Ö'       => 'Ö',
268
      'ß'       => 'ß',
269
      'Ã '       => 'à',
270
      'á'       => 'á',
271
      'â'       => 'â',
272
      'ã'       => 'ã',
273
      'ù'       => 'ù',
274
      'ú'       => 'ú',
275
      'û'       => 'û',
276
      'Ù'       => 'Ù',
277
      'Ú'       => 'Ú',
278
      'Û'       => 'Û',
279
      'Ü'       => 'Ü',
280
      'ò'       => 'ò',
281
      'ó'       => 'ó',
282
      'ô'       => 'ô',
283
      'è'       => 'è',
284
      'é'       => 'é',
285
      'ê'       => 'ê',
286
      'ë'       => 'ë',
287
      'À'       => 'À',
288
      'Á'       => 'Á',
289
      'Â'       => 'Â',
290
      'Ã'       => 'Ã',
291
      'Ä'       => 'Ä',
292
      'Ã…'       => 'Å',
293
      'Ç'       => 'Ç',
294
      'È'       => 'È',
295
      'É'       => 'É',
296
      'Ê'       => 'Ê',
297
      'Ë'       => 'Ë',
298
      'ÃŒ'       => 'Ì',
299
      'Í'       => 'Í',
300
      'ÃŽ'       => 'Î',
301
      'Ï'       => 'Ï',
302
      'Ñ'       => 'Ñ',
303
      'Ã’'       => 'Ò',
304
      'Ó'       => 'Ó',
305
      'Ô'       => 'Ô',
306
      'Õ'       => 'Õ',
307
      'Ø'       => 'Ø',
308
      'Ã¥'       => 'å',
309
      'æ'       => 'æ',
310
      'ç'       => 'ç',
311
      'ì'       => 'ì',
312
      'í'       => 'í',
313
      'î'       => 'î',
314
      'ï'       => 'ï',
315
      'ð'       => 'ð',
316
      'ñ'       => 'ñ',
317
      'õ'       => 'õ',
318
      'ø'       => 'ø',
319
      'ý'       => 'ý',
320
      'ÿ'       => 'ÿ',
321
      '€'      => '€',
322
      '’'      => '’',
323
  );
324
325
  /**
326
   * @var array
327
   */
328
  private static $utf8ToWin1252 = array(
329
      "\xe2\x82\xac" => "\x80", // EURO SIGN
330
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
331
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
332
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
333
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
334
      "\xe2\x80\xa0" => "\x86", // DAGGER
335
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
336
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
337
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
338
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
339
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
340
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
341
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
342
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
343
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
344
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
345
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
346
      "\xe2\x80\xa2" => "\x95", // BULLET
347
      "\xe2\x80\x93" => "\x96", // EN DASH
348
      "\xe2\x80\x94" => "\x97", // EM DASH
349
      "\xcb\x9c"     => "\x98", // SMALL TILDE
350
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
351
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
352
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
353
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
354
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
355
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
356
  );
357
358
  /**
359
   * @var array
360
   */
361
  private static $utf8MSWord = array(
362
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
363
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
364
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
365
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
366
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
367
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
368
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
369
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
370
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
371
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
372
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
373
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
374
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
375
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
376
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
377
  );
378
379
  /**
380
   * @var array
381
   */
382
  private static $iconvEncoding = array(
383
      'ANSI_X3.4-1968',
384
      'ANSI_X3.4-1986',
385
      'ASCII',
386
      'CP367',
387
      'IBM367',
388
      'ISO-IR-6',
389
      'ISO646-US',
390
      'ISO_646.IRV:1991',
391
      'US',
392
      'US-ASCII',
393
      'CSASCII',
394
      'UTF-8',
395
      'ISO-10646-UCS-2',
396
      'UCS-2',
397
      'CSUNICODE',
398
      'UCS-2BE',
399
      'UNICODE-1-1',
400
      'UNICODEBIG',
401
      'CSUNICODE11',
402
      'UCS-2LE',
403
      'UNICODELITTLE',
404
      'ISO-10646-UCS-4',
405
      'UCS-4',
406
      'CSUCS4',
407
      'UCS-4BE',
408
      'UCS-4LE',
409
      'UTF-16',
410
      'UTF-16BE',
411
      'UTF-16LE',
412
      'UTF-32',
413
      'UTF-32BE',
414
      'UTF-32LE',
415
      'UNICODE-1-1-UTF-7',
416
      'UTF-7',
417
      'CSUNICODE11UTF7',
418
      'UCS-2-INTERNAL',
419
      'UCS-2-SWAPPED',
420
      'UCS-4-INTERNAL',
421
      'UCS-4-SWAPPED',
422
      'C99',
423
      'JAVA',
424
      'CP819',
425
      'IBM819',
426
      'ISO-8859-1',
427
      'ISO-IR-100',
428
      'ISO8859-1',
429
      'ISO_8859-1',
430
      'ISO_8859-1:1987',
431
      'L1',
432
      'LATIN1',
433
      'CSISOLATIN1',
434
      'ISO-8859-2',
435
      'ISO-IR-101',
436
      'ISO8859-2',
437
      'ISO_8859-2',
438
      'ISO_8859-2:1987',
439
      'L2',
440
      'LATIN2',
441
      'CSISOLATIN2',
442
      'ISO-8859-3',
443
      'ISO-IR-109',
444
      'ISO8859-3',
445
      'ISO_8859-3',
446
      'ISO_8859-3:1988',
447
      'L3',
448
      'LATIN3',
449
      'CSISOLATIN3',
450
      'ISO-8859-4',
451
      'ISO-IR-110',
452
      'ISO8859-4',
453
      'ISO_8859-4',
454
      'ISO_8859-4:1988',
455
      'L4',
456
      'LATIN4',
457
      'CSISOLATIN4',
458
      'CYRILLIC',
459
      'ISO-8859-5',
460
      'ISO-IR-144',
461
      'ISO8859-5',
462
      'ISO_8859-5',
463
      'ISO_8859-5:1988',
464
      'CSISOLATINCYRILLIC',
465
      'ARABIC',
466
      'ASMO-708',
467
      'ECMA-114',
468
      'ISO-8859-6',
469
      'ISO-IR-127',
470
      'ISO8859-6',
471
      'ISO_8859-6',
472
      'ISO_8859-6:1987',
473
      'CSISOLATINARABIC',
474
      'ECMA-118',
475
      'ELOT_928',
476
      'GREEK',
477
      'GREEK8',
478
      'ISO-8859-7',
479
      'ISO-IR-126',
480
      'ISO8859-7',
481
      'ISO_8859-7',
482
      'ISO_8859-7:1987',
483
      'ISO_8859-7:2003',
484
      'CSISOLATINGREEK',
485
      'HEBREW',
486
      'ISO-8859-8',
487
      'ISO-IR-138',
488
      'ISO8859-8',
489
      'ISO_8859-8',
490
      'ISO_8859-8:1988',
491
      'CSISOLATINHEBREW',
492
      'ISO-8859-9',
493
      'ISO-IR-148',
494
      'ISO8859-9',
495
      'ISO_8859-9',
496
      'ISO_8859-9:1989',
497
      'L5',
498
      'LATIN5',
499
      'CSISOLATIN5',
500
      'ISO-8859-10',
501
      'ISO-IR-157',
502
      'ISO8859-10',
503
      'ISO_8859-10',
504
      'ISO_8859-10:1992',
505
      'L6',
506
      'LATIN6',
507
      'CSISOLATIN6',
508
      'ISO-8859-11',
509
      'ISO8859-11',
510
      'ISO_8859-11',
511
      'ISO-8859-13',
512
      'ISO-IR-179',
513
      'ISO8859-13',
514
      'ISO_8859-13',
515
      'L7',
516
      'LATIN7',
517
      'ISO-8859-14',
518
      'ISO-CELTIC',
519
      'ISO-IR-199',
520
      'ISO8859-14',
521
      'ISO_8859-14',
522
      'ISO_8859-14:1998',
523
      'L8',
524
      'LATIN8',
525
      'ISO-8859-15',
526
      'ISO-IR-203',
527
      'ISO8859-15',
528
      'ISO_8859-15',
529
      'ISO_8859-15:1998',
530
      'LATIN-9',
531
      'ISO-8859-16',
532
      'ISO-IR-226',
533
      'ISO8859-16',
534
      'ISO_8859-16',
535
      'ISO_8859-16:2001',
536
      'L10',
537
      'LATIN10',
538
      'KOI8-R',
539
      'CSKOI8R',
540
      'KOI8-U',
541
      'KOI8-RU',
542
      'CP1250',
543
      'MS-EE',
544
      'WINDOWS-1250',
545
      'CP1251',
546
      'MS-CYRL',
547
      'WINDOWS-1251',
548
      'CP1252',
549
      'MS-ANSI',
550
      'WINDOWS-1252',
551
      'CP1253',
552
      'MS-GREEK',
553
      'WINDOWS-1253',
554
      'CP1254',
555
      'MS-TURK',
556
      'WINDOWS-1254',
557
      'CP1255',
558
      'MS-HEBR',
559
      'WINDOWS-1255',
560
      'CP1256',
561
      'MS-ARAB',
562
      'WINDOWS-1256',
563
      'CP1257',
564
      'WINBALTRIM',
565
      'WINDOWS-1257',
566
      'CP1258',
567
      'WINDOWS-1258',
568
      '850',
569
      'CP850',
570
      'IBM850',
571
      'CSPC850MULTILINGUAL',
572
      '862',
573
      'CP862',
574
      'IBM862',
575
      'CSPC862LATINHEBREW',
576
      '866',
577
      'CP866',
578
      'IBM866',
579
      'CSIBM866',
580
      'MAC',
581
      'MACINTOSH',
582
      'MACROMAN',
583
      'CSMACINTOSH',
584
      'MACCENTRALEUROPE',
585
      'MACICELAND',
586
      'MACCROATIAN',
587
      'MACROMANIA',
588
      'MACCYRILLIC',
589
      'MACUKRAINE',
590
      'MACGREEK',
591
      'MACTURKISH',
592
      'MACHEBREW',
593
      'MACARABIC',
594
      'MACTHAI',
595
      'HP-ROMAN8',
596
      'R8',
597
      'ROMAN8',
598
      'CSHPROMAN8',
599
      'NEXTSTEP',
600
      'ARMSCII-8',
601
      'GEORGIAN-ACADEMY',
602
      'GEORGIAN-PS',
603
      'KOI8-T',
604
      'CP154',
605
      'CYRILLIC-ASIAN',
606
      'PT154',
607
      'PTCP154',
608
      'CSPTCP154',
609
      'KZ-1048',
610
      'RK1048',
611
      'STRK1048-2002',
612
      'CSKZ1048',
613
      'MULELAO-1',
614
      'CP1133',
615
      'IBM-CP1133',
616
      'ISO-IR-166',
617
      'TIS-620',
618
      'TIS620',
619
      'TIS620-0',
620
      'TIS620.2529-1',
621
      'TIS620.2533-0',
622
      'TIS620.2533-1',
623
      'CP874',
624
      'WINDOWS-874',
625
      'VISCII',
626
      'VISCII1.1-1',
627
      'CSVISCII',
628
      'TCVN',
629
      'TCVN-5712',
630
      'TCVN5712-1',
631
      'TCVN5712-1:1993',
632
      'ISO-IR-14',
633
      'ISO646-JP',
634
      'JIS_C6220-1969-RO',
635
      'JP',
636
      'CSISO14JISC6220RO',
637
      'JISX0201-1976',
638
      'JIS_X0201',
639
      'X0201',
640
      'CSHALFWIDTHKATAKANA',
641
      'ISO-IR-87',
642
      'JIS0208',
643
      'JIS_C6226-1983',
644
      'JIS_X0208',
645
      'JIS_X0208-1983',
646
      'JIS_X0208-1990',
647
      'X0208',
648
      'CSISO87JISX0208',
649
      'ISO-IR-159',
650
      'JIS_X0212',
651
      'JIS_X0212-1990',
652
      'JIS_X0212.1990-0',
653
      'X0212',
654
      'CSISO159JISX02121990',
655
      'CN',
656
      'GB_1988-80',
657
      'ISO-IR-57',
658
      'ISO646-CN',
659
      'CSISO57GB1988',
660
      'CHINESE',
661
      'GB_2312-80',
662
      'ISO-IR-58',
663
      'CSISO58GB231280',
664
      'CN-GB-ISOIR165',
665
      'ISO-IR-165',
666
      'ISO-IR-149',
667
      'KOREAN',
668
      'KSC_5601',
669
      'KS_C_5601-1987',
670
      'KS_C_5601-1989',
671
      'CSKSC56011987',
672
      'EUC-JP',
673
      'EUCJP',
674
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
675
      'CSEUCPKDFMTJAPANESE',
676
      'MS_KANJI',
677
      'SHIFT-JIS',
678
      'SHIFT_JIS',
679
      'SJIS',
680
      'CSSHIFTJIS',
681
      'CP932',
682
      'ISO-2022-JP',
683
      'CSISO2022JP',
684
      'ISO-2022-JP-1',
685
      'ISO-2022-JP-2',
686
      'CSISO2022JP2',
687
      'CN-GB',
688
      'EUC-CN',
689
      'EUCCN',
690
      'GB2312',
691
      'CSGB2312',
692
      'GBK',
693
      'CP936',
694
      'MS936',
695
      'WINDOWS-936',
696
      'GB18030',
697
      'ISO-2022-CN',
698
      'CSISO2022CN',
699
      'ISO-2022-CN-EXT',
700
      'HZ',
701
      'HZ-GB-2312',
702
      'EUC-TW',
703
      'EUCTW',
704
      'CSEUCTW',
705
      'BIG-5',
706
      'BIG-FIVE',
707
      'BIG5',
708
      'BIGFIVE',
709
      'CN-BIG5',
710
      'CSBIG5',
711
      'CP950',
712
      'BIG5-HKSCS:1999',
713
      'BIG5-HKSCS:2001',
714
      'BIG5-HKSCS',
715
      'BIG5-HKSCS:2004',
716
      'BIG5HKSCS',
717
      'EUC-KR',
718
      'EUCKR',
719
      'CSEUCKR',
720
      'CP949',
721
      'UHC',
722
      'CP1361',
723
      'JOHAB',
724
      'ISO-2022-KR',
725
      'CSISO2022KR',
726
      'CP856',
727
      'CP922',
728
      'CP943',
729
      'CP1046',
730
      'CP1124',
731
      'CP1129',
732
      'CP1161',
733
      'IBM-1161',
734
      'IBM1161',
735
      'CSIBM1161',
736
      'CP1162',
737
      'IBM-1162',
738
      'IBM1162',
739
      'CSIBM1162',
740
      'CP1163',
741
      'IBM-1163',
742
      'IBM1163',
743
      'CSIBM1163',
744
      'DEC-KANJI',
745
      'DEC-HANYU',
746
      '437',
747
      'CP437',
748
      'IBM437',
749
      'CSPC8CODEPAGE437',
750
      'CP737',
751
      'CP775',
752
      'IBM775',
753
      'CSPC775BALTIC',
754
      '852',
755
      'CP852',
756
      'IBM852',
757
      'CSPCP852',
758
      'CP853',
759
      '855',
760
      'CP855',
761
      'IBM855',
762
      'CSIBM855',
763
      '857',
764
      'CP857',
765
      'IBM857',
766
      'CSIBM857',
767
      'CP858',
768
      '860',
769
      'CP860',
770
      'IBM860',
771
      'CSIBM860',
772
      '861',
773
      'CP-IS',
774
      'CP861',
775
      'IBM861',
776
      'CSIBM861',
777
      '863',
778
      'CP863',
779
      'IBM863',
780
      'CSIBM863',
781
      'CP864',
782
      'IBM864',
783
      'CSIBM864',
784
      '865',
785
      'CP865',
786
      'IBM865',
787
      'CSIBM865',
788
      '869',
789
      'CP-GR',
790
      'CP869',
791
      'IBM869',
792
      'CSIBM869',
793
      'CP1125',
794
      'EUC-JISX0213',
795
      'SHIFT_JISX0213',
796
      'ISO-2022-JP-3',
797
      'BIG5-2003',
798
      'ISO-IR-230',
799
      'TDS565',
800
      'ATARI',
801
      'ATARIST',
802
      'RISCOS-LATIN1',
803
  );
804
805
  /**
806
   * @var array
807 1
   */
808
  private static $support = array();
809 1
810 1
  /**
811
   * __construct()
812
   */
813
  public function __construct()
814
  {
815
    self::checkForSupport();
816
  }
817
818
  /**
819
   * Return the character at the specified position: $str[1] like functionality.
820 2
   *
821
   * @param string $str <p>A UTF-8 string.</p>
822 2
   * @param int    $pos <p>The position of character to return.</p>
823
   *
824
   * @return string <p>Single Multi-Byte character.</p>
825
   */
826
  public static function access($str, $pos)
827
  {
828
    $str = (string)$str;
829
    $pos = (int)$pos;
830
831
    if (!isset($str[0])) {
832
      return '';
833
    }
834 1
835
    if ($pos < 0) {
836 1
      return '';
837 1
    }
838 1
839
    return self::substr($str, $pos, 1);
840 1
  }
841
842
  /**
843
   * Prepends UTF-8 BOM character to the string and returns the whole string.
844
   *
845
   * INFO: If BOM already existed there, the Input string is returned.
846
   *
847
   * @param string $str <p>The input string.</p>
848
   *
849
   * @return string <p>The output string that contains BOM.</p>
850 1
   */
851
  public static function add_bom_to_string($str)
852 1
  {
853
    if (self::string_has_bom($str) === false) {
854
      $str = self::bom() . $str;
855
    }
856
857
    return $str;
858
  }
859
860 2
  /**
861
   * Convert binary into an string.
862 2
   *
863
   * @param mixed $bin 1|0
864
   *
865
   * @return string
866
   */
867
  public static function binary_to_str($bin)
868
  {
869
    if (!isset($bin[0])) {
870
      return '';
871
    }
872
873
    return pack('H*', base_convert($bin, 2, 16));
874 1
  }
875
876 1
  /**
877
   * Returns the UTF-8 Byte Order Mark Character.
878
   *
879
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
880
   *
881
   * @return string UTF-8 Byte Order Mark
882
   */
883
  public static function bom()
884 2
  {
885
    return "\xef\xbb\xbf";
886 2
  }
887
888 1
  /**
889
   * @alias of UTF8::chr_map()
890 1
   *
891 1
   * @see   UTF8::chr_map()
892 1
   *
893 1
   * @param string|array $callback
894 1
   * @param string       $str
895 1
   *
896 2
   * @return array
897
   */
898
  public static function callback($callback, $str)
899
  {
900
    return self::chr_map($callback, $str);
901
  }
902
903
  /**
904
   * This method will auto-detect your server environment for UTF-8 support.
905
   *
906
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
907 9
   */
908
  public static function checkForSupport()
909 9
  {
910 9
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
911 1
912
      self::$support['already_checked_via_portable_utf8'] = true;
913
914 9
      // http://php.net/manual/en/book.mbstring.php
915
      self::$support['mbstring'] = self::mbstring_loaded();
916
917
      // http://php.net/manual/en/book.iconv.php
918 9
      self::$support['iconv'] = self::iconv_loaded();
919
920
      // http://php.net/manual/en/book.intl.php
921
      self::$support['intl'] = self::intl_loaded();
922
923 9
      // http://php.net/manual/en/class.intlchar.php
924 9
      self::$support['intlChar'] = self::intlChar_loaded();
925 8
926
      // http://php.net/manual/en/book.pcre.php
927
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
928
    }
929 8
  }
930 6
931
  /**
932
   * Generates a UTF-8 encoded character from the given code point.
933 7
   *
934 6
   * INFO: opposite to UTF8::ord()
935 6
   *
936
   * @param int    $code_point <p>The code point for which to generate a character.</p>
937
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
938 7
   *
939 7
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
940 7
   */
941 7
  public static function chr($code_point, $encoding = 'UTF-8')
942
  {
943
    $i = (int)$code_point;
944 1
    if ($i !== $code_point) {
945 1
      return null;
946 1
    }
947 1
948 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
949
      self::checkForSupport();
950
    }
951
952
    if ($encoding !== 'UTF-8') {
953
      $encoding = self::normalize_encoding($encoding);
954
    } elseif (self::$support['intlChar'] === true) {
955
      return \IntlChar::chr($code_point);
956
    }
957
958
    // use static cache, if there is no support for "IntlChar"
959
    static $cache = array();
960
    $cacheKey = $code_point . $encoding;
961
    if (isset($cache[$cacheKey]) === true) {
962
      return $cache[$cacheKey];
963 1
    }
964
965 1
    if (0x80 > $code_point %= 0x200000) {
966
      $str = chr($code_point);
967 1
    } elseif (0x800 > $code_point) {
968
      $str = chr(0xC0 | $code_point >> 6) .
969
             chr(0x80 | $code_point & 0x3F);
970
    } elseif (0x10000 > $code_point) {
971
      $str = chr(0xE0 | $code_point >> 12) .
972
             chr(0x80 | $code_point >> 6 & 0x3F) .
973
             chr(0x80 | $code_point & 0x3F);
974
    } else {
975
      $str = chr(0xF0 | $code_point >> 18) .
976
             chr(0x80 | $code_point >> 12 & 0x3F) .
977
             chr(0x80 | $code_point >> 6 & 0x3F) .
978
             chr(0x80 | $code_point & 0x3F);
979
    }
980
981
    if ($encoding !== 'UTF-8') {
982 4
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
983
    }
984 4
985 3
    // add into static cache
986
    $cache[$cacheKey] = $str;
987
988 4
    return $str;
989
  }
990
991
  /**
992
   * Applies callback to all characters of a string.
993
   *
994
   * @param string|array $callback <p>The callback function.</p>
995
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
996
   *
997
   * @return array <p>The outcome of callback.</p>
998 2
   */
999
  public static function chr_map($callback, $str)
1000 2
  {
1001 2
    $chars = self::split($str);
1002 2
1003
    return array_map($callback, $chars);
1004 2
  }
1005
1006 2
  /**
1007
   * Generates an array of byte length of each character of a Unicode string.
1008
   *
1009 2
   * 1 byte => U+0000  - U+007F
1010
   * 2 byte => U+0080  - U+07FF
1011 2
   * 3 byte => U+0800  - U+FFFF
1012 2
   * 4 byte => U+10000 - U+10FFFF
1013 2
   *
1014
   * @param string $str <p>The original Unicode string.</p>
1015 1
   *
1016 1
   * @return array <p>An array of byte lengths of each character.</p>
1017 1
   */
1018
  public static function chr_size_list($str)
1019
  {
1020
    $str = (string)$str;
1021
1022
    if (!isset($str[0])) {
1023 2
      return array();
1024
    }
1025 2
1026 2
    return array_map('strlen', self::split($str));
1027
  }
1028 2
1029
  /**
1030
   * Get a decimal code representation of a specific character.
1031
   *
1032
   * @param string $char <p>The input character.</p>
1033
   *
1034
   * @return int
1035
   */
1036
  public static function chr_to_decimal($char)
1037
  {
1038
    $char = (string)$char;
1039 1
    $code = self::ord($char[0]);
1040
    $bytes = 1;
1041 1
1042
    if (!($code & 0x80)) {
1043
      // 0xxxxxxx
1044
      return $code;
1045
    }
1046
1047
    if (($code & 0xe0) === 0xc0) {
1048
      // 110xxxxx
1049
      $bytes = 2;
1050
      $code &= ~0xc0;
1051
    } elseif (($code & 0xf0) === 0xe0) {
1052
      // 1110xxxx
1053 1
      $bytes = 3;
1054
      $code &= ~0xe0;
1055 1
    } elseif (($code & 0xf8) === 0xf0) {
1056
      // 11110xxx
1057
      $bytes = 4;
1058
      $code &= ~0xf0;
1059
    }
1060
1061
    for ($i = 2; $i <= $bytes; $i++) {
1062
      // 10xxxxxx
1063
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1064
    }
1065
1066
    return $code;
1067
  }
1068
1069
  /**
1070
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1071 44
   *
1072
   * @param string $char <p>The input character</p>
1073
   * @param string $pfix [optional]
1074
   *
1075
   * @return string <p>The code point encoded as U+xxxx<p>
1076
   */
1077
  public static function chr_to_hex($char, $pfix = 'U+')
1078
  {
1079
    if ($char === '&#0;') {
1080
      $char = '';
1081
    }
1082
1083
    return self::int_to_hex(self::ord($char), $pfix);
1084
  }
1085
1086 44
  /**
1087 44
   * alias for "UTF8::chr_to_decimal()"
1088
   *
1089 44
   * @see UTF8::chr_to_decimal()
1090 44
   *
1091
   * @param string $chr
1092 44
   *
1093 17
   * @return int
1094 17
   */
1095
  public static function chr_to_int($chr)
1096 44
  {
1097 12
    return self::chr_to_decimal($chr);
1098 12
  }
1099
1100 44
  /**
1101 5
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1102 5
   *
1103
   * @param string $body     <p>The original string to be split.</p>
1104 44
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1105
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1106
   *
1107
   * @return string <p>The chunked string</p>
1108
   */
1109
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1110
  {
1111
    return implode($end, self::split($body, $chunklen));
1112
  }
1113
1114 4
  /**
1115
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1116 4
   *
1117
   * @param string $str                     <p>The string to be sanitized.</p>
1118 4
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1119 1
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1120
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1121
   *                                        => "..."</p>
1122
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1123 4
   *                                        $normalize_whitespace</p>
1124
   *
1125
   * @return string <p>Clean UTF-8 encoded string.</p>
1126
   */
1127
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1128
  {
1129
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1130 4
    // caused connection reset problem on larger strings
1131
1132 4
    $regx = '/
1133
      (
1134
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1135
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1136
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1137
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1138
        ){1,100}                      # ...one or more times
1139
      )
1140
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1141
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1142
    /x';
1143
    $str = preg_replace($regx, '$1', $str);
1144
1145
    $str = self::replace_diamond_question_mark($str, '');
1146 5
    $str = self::remove_invisible_characters($str);
1147
1148 5
    if ($normalize_whitespace === true) {
1149 5
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1150 5
    }
1151
1152 5
    if ($normalize_msword === true) {
1153
      $str = self::normalize_msword($str);
1154 5
    }
1155 5
1156 5
    if ($remove_bom === true) {
1157
      $str = self::remove_bom($str);
1158 5
    }
1159
1160 5
    return $str;
1161 1
  }
1162
1163 1
  /**
1164 1
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1165 1
   *
1166
   * @param string $str <p>The input string.</p>
1167 1
   *
1168 1
   * @return string
1169
   */
1170 5
  public static function cleanup($str)
1171
  {
1172
    $str = (string)$str;
1173
1174
    if (!isset($str[0])) {
1175
      return '';
1176
    }
1177
1178
    // fixed ISO <-> UTF-8 Errors
1179
    $str = self::fix_simple_utf8($str);
1180
1181
    // remove all none UTF-8 symbols
1182 6
    // && remove diamond question mark (�)
1183
    // && remove remove invisible characters (e.g. "\0")
1184 6
    // && remove BOM
1185
    // && normalize whitespace chars (but keep non-breaking-spaces)
1186
    $str = self::clean($str, true, true, false, true);
1187
1188
    return (string)$str;
1189
  }
1190
1191
  /**
1192
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1193
   *
1194 1
   * INFO: opposite to UTF8::string()
1195
   *
1196 1
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1197 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1198 1
   *                                    default, code points will be returned as integers.</p>
1199
   *
1200 1
   * @return array <p>The array of code points.</p>
1201
   */
1202
  public static function codepoints($arg, $u_style = false)
1203
  {
1204
    if (is_string($arg)) {
1205
      $arg = self::split($arg);
1206
    }
1207
1208
    $arg = array_map(
1209
        array(
1210
            '\\voku\\helper\\UTF8',
1211
            'ord',
1212
        ),
1213
        $arg
1214
    );
1215
1216 11
    if ($u_style) {
1217
      $arg = array_map(
1218 11
          array(
1219 11
              '\\voku\\helper\\UTF8',
1220
              'int_to_hex',
1221 11
          ),
1222 5
          $arg
1223
      );
1224
    }
1225 11
1226 1
    return $arg;
1227 1
  }
1228
1229 11
  /**
1230
   * Returns count of characters used in a string.
1231
   *
1232
   * @param string $str       <p>The input string.</p>
1233 11
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1234
   *
1235
   * @return array <p>An associative array of Character as keys and
1236 11
   *               their count as values.</p>
1237
   */
1238 1
  public static function count_chars($str, $cleanUtf8 = false)
1239 11
  {
1240
    return array_count_values(self::split($str, 1, $cleanUtf8));
1241
  }
1242
1243 11
  /**
1244
   * Converts a int-value into an UTF-8 character.
1245
   *
1246 11
   * @param int $int
1247 1
   *
1248 1
   * @return string
1249 1
   */
1250 11
  public static function decimal_to_chr($int)
1251 11
  {
1252
    if (Bootup::is_php('5.4') === true) {
1253
      $flags = ENT_QUOTES | ENT_HTML5;
1254
    } else {
1255
      $flags = ENT_QUOTES;
1256 2
    }
1257
1258
    return self::html_entity_decode('&#' . $int . ';', $flags);
1259 1
  }
1260
1261
  /**
1262 2
   * Encode a string with a new charset-encoding.
1263 1
   *
1264
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1265
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1266 2
   *
1267 2
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1268 2
   * @param string $str      <p>The input string</p>
1269
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1270 2
   *                         /> otherwise we auto-detect the current string-encoding</p>
1271
   *
1272 2
   * @return string
1273 2
   */
1274
  public static function encode($encoding, $str, $force = true)
1275
  {
1276
    $str = (string)$str;
1277 1
    $encoding = (string)$encoding;
1278
1279
    if (!isset($str[0], $encoding[0])) {
1280
      return $str;
1281
    }
1282
1283
    if ($encoding !== 'UTF-8') {
1284
      $encoding = self::normalize_encoding($encoding);
1285
    }
1286
1287
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1288
      self::checkForSupport();
1289
    }
1290
1291
    $encodingDetected = self::str_detect_encoding($str);
1292
1293
    if (
1294
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1295
        &&
1296
        (
1297
            $force === true
1298
            ||
1299
            $encodingDetected !== $encoding
1300
        )
1301
    ) {
1302
1303
      if (
1304
          $encoding === 'UTF-8'
1305
          &&
1306
          (
1307
              $force === true
1308
              || $encodingDetected === 'UTF-8'
1309
              || $encodingDetected === 'WINDOWS-1252'
1310
              || $encodingDetected === 'ISO-8859-1'
1311
          )
1312
      ) {
1313
        return self::to_utf8($str);
1314
      }
1315
1316
      if (
1317
          $encoding === 'ISO-8859-1'
1318
          &&
1319
          (
1320
              $force === true
1321
              || $encodingDetected === 'ISO-8859-1'
1322
              || $encodingDetected === 'UTF-8'
1323
          )
1324
      ) {
1325
        return self::to_iso8859($str);
1326
      }
1327
1328 View Code Duplication
      if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1329
          $encoding !== 'UTF-8'
1330
          &&
1331
          $encoding !== 'WINDOWS-1252'
1332
          &&
1333
          self::$support['mbstring'] === false
1334
      ) {
1335
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1336
      }
1337
1338
      $strEncoded = \mb_convert_encoding(
1339
          $str,
1340
          $encoding,
1341
          $encodingDetected
1342
      );
1343
1344
      if ($strEncoded) {
1345
        return $strEncoded;
1346
      }
1347
    }
1348
1349
    return $str;
1350
  }
1351
1352
  /**
1353
   * Reads entire file into a string.
1354
   *
1355
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1356
   *
1357
   * @link http://php.net/manual/en/function.file-get-contents.php
1358
   *
1359
   * @param string        $filename      <p>
1360
   *                                     Name of the file to read.
1361
   *                                     </p>
1362 2
   * @param int|null      $flags         [optional] <p>
1363
   *                                     Prior to PHP 6, this parameter is called
1364
   *                                     use_include_path and is a bool.
1365 2
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1366 2
   *                                     to trigger include path
1367
   *                                     search.
1368 2
   *                                     </p>
1369 2
   *                                     <p>
1370
   *                                     The value of flags can be any combination of
1371
   *                                     the following flags (with some restrictions), joined with the
1372
   *                                     binary OR (|)
1373 2
   *                                     operator.
1374 2
   *                                     </p>
1375
   *                                     <p>
1376 2
   *                                     <table>
1377 2
   *                                     Available flags
1378
   *                                     <tr valign="top">
1379 2
   *                                     <td>Flag</td>
1380 1
   *                                     <td>Description</td>
1381 1
   *                                     </tr>
1382 2
   *                                     <tr valign="top">
1383
   *                                     <td>
1384
   *                                     FILE_USE_INCLUDE_PATH
1385
   *                                     </td>
1386 2
   *                                     <td>
1387 1
   *                                     Search for filename in the include directory.
1388
   *                                     See include_path for more
1389
   *                                     information.
1390 1
   *                                     </td>
1391 1
   *                                     </tr>
1392 1
   *                                     <tr valign="top">
1393 1
   *                                     <td>
1394
   *                                     FILE_TEXT
1395 1
   *                                     </td>
1396
   *                                     <td>
1397
   *                                     As of PHP 6, the default encoding of the read
1398
   *                                     data is UTF-8. You can specify a different encoding by creating a
1399
   *                                     custom context or by changing the default using
1400
   *                                     stream_default_encoding. This flag cannot be
1401
   *                                     used with FILE_BINARY.
1402
   *                                     </td>
1403
   *                                     </tr>
1404
   *                                     <tr valign="top">
1405 1
   *                                     <td>
1406
   *                                     FILE_BINARY
1407 1
   *                                     </td>
1408
   *                                     <td>
1409
   *                                     With this flag, the file is read in binary mode. This is the default
1410
   *                                     setting and cannot be used with FILE_TEXT.
1411
   *                                     </td>
1412
   *                                     </tr>
1413
   *                                     </table>
1414
   *                                     </p>
1415
   * @param resource|null $context       [optional] <p>
1416
   *                                     A valid context resource created with
1417
   *                                     stream_context_create. If you don't need to use a
1418
   *                                     custom context, you can skip this parameter by &null;.
1419 9
   *                                     </p>
1420
   * @param int|null      $offset        [optional] <p>
1421 9
   *                                     The offset where the reading starts.
1422 9
   *                                     </p>
1423 3
   * @param int|null      $maxlen        [optional] <p>
1424
   *                                     Maximum length of data read. The default is to read until end
1425 3
   *                                     of file is reached.
1426 3
   *                                     </p>
1427 3
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1428 9
   *
1429 2
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1430 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1431 2
   *
1432 2
   * @return string <p>The function returns the read data or false on failure.</p>
1433 9
   */
1434
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1435 8
  {
1436
    // init
1437 2
    $timeout = (int)$timeout;
1438 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1439
1440 8
    if ($timeout && $context === null) {
1441
      $context = stream_context_create(
1442 8
          array(
1443 6
              'http' =>
1444 6
                  array(
1445 6
                      'timeout' => $timeout,
1446
                  ),
1447 6
          )
1448 3
      );
1449 3
    }
1450 5
1451
    if (is_int($maxlen) === true) {
1452
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1453
    } else {
1454
      $data = file_get_contents($filename, $flags, $context, $offset);
1455 8
    }
1456 8
1457 5
    // return false on error
1458 8
    if ($data === false) {
1459
      return false;
1460
    }
1461 2
1462 2
    if ($convertToUtf8 === true) {
1463 8
      $data = self::encode('UTF-8', $data, false);
1464 8
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1465 9
    }
1466
1467 9
    return $data;
1468
  }
1469
1470
  /**
1471
   * Checks if a file starts with BOM (Byte Order Mark) character.
1472
   *
1473
   * @param string $file_path <p>Path to a valid file.</p>
1474
   *
1475
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1476
   */
1477
  public static function file_has_bom($file_path)
1478
  {
1479
    return self::string_has_bom(file_get_contents($file_path));
1480
  }
1481
1482
  /**
1483
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1484
   *
1485
   * @param mixed  $var
1486
   * @param int    $normalization_form
1487
   * @param string $leading_combining
1488
   *
1489
   * @return mixed
1490
   */
1491
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1492
  {
1493
    switch (gettype($var)) {
1494 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1495
        foreach ($var as $k => $v) {
1496
          /** @noinspection AlterInForeachInspection */
1497
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1498
        }
1499
        break;
1500 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1501
        foreach ($var as $k => $v) {
1502
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1503
        }
1504
        break;
1505
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1506
1507
        if (false !== strpos($var, "\r")) {
1508
          // Workaround https://bugs.php.net/65732
1509
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1510
        }
1511
1512
        if (self::is_ascii($var) === false) {
1513
          /** @noinspection PhpUndefinedClassInspection */
1514
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1515
            $n = '-';
1516
          } else {
1517
            /** @noinspection PhpUndefinedClassInspection */
1518
            $n = \Normalizer::normalize($var, $normalization_form);
1519
1520 1
            if (isset($n[0])) {
1521
              $var = $n;
1522 1
            } else {
1523 1
              $var = self::encode('UTF-8', $var);
1524 1
            }
1525 1
          }
1526
1527
          if (
1528 1
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1529
              &&
1530
              preg_match('/^\p{Mn}/u', $var)
1531
          ) {
1532
            // Prevent leading combining chars
1533
            // for NFC-safe concatenations.
1534
            $var = $leading_combining . $var;
1535
          }
1536
        }
1537
1538
        break;
1539
    }
1540 1
1541
    return $var;
1542 1
  }
1543 1
1544 1
  /**
1545 1
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1546
   *
1547
   * Gets a specific external variable by name and optionally filters it
1548 1
   *
1549
   * @link  http://php.net/manual/en/function.filter-input.php
1550
   *
1551
   * @param int    $type          <p>
1552
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1553
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1554
   *                              <b>INPUT_ENV</b>.
1555
   *                              </p>
1556
   * @param string $variable_name <p>
1557
   *                              Name of a variable to get.
1558
   *                              </p>
1559 1
   * @param int    $filter        [optional] <p>
1560
   *                              The ID of the filter to apply. The
1561 1
   *                              manual page lists the available filters.
1562
   *                              </p>
1563
   * @param mixed  $options       [optional] <p>
1564
   *                              Associative array of options or bitwise disjunction of flags. If filter
1565
   *                              accepts options, flags can be provided in "flags" field of array.
1566
   *                              </p>
1567
   *
1568
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1569
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1570
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1571
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1572
   * @since 5.2.0
1573
   */
1574 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1575
  {
1576
    if (4 > func_num_args()) {
1577 7
      $var = filter_input($type, $variable_name, $filter);
1578
    } else {
1579 7
      $var = filter_input($type, $variable_name, $filter, $options);
1580 7
    }
1581
1582 7
    return self::filter($var);
1583
  }
1584 7
1585 2
  /**
1586
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
   *
1588 7
   * Gets external variables and optionally filters them
1589 1
   *
1590 1
   * @link  http://php.net/manual/en/function.filter-input-array.php
1591 1
   *
1592
   * @param int   $type       <p>
1593 7
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1594
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1595
   *                          <b>INPUT_ENV</b>.
1596
   *                          </p>
1597
   * @param mixed $definition [optional] <p>
1598
   *                          An array defining the arguments. A valid key is a string
1599
   *                          containing a variable name and a valid value is either a filter type, or an array
1600
   *                          optionally specifying the filter, flags and options. If the value is an
1601
   *                          array, valid keys are filter which specifies the
1602
   *                          filter type,
1603 1
   *                          flags which specifies any flags that apply to the
1604
   *                          filter, and options which specifies any options that
1605 1
   *                          apply to the filter. See the example below for a better understanding.
1606
   *                          </p>
1607 1
   *                          <p>
1608
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1609
   *                          input array are filtered by this filter.
1610 1
   *                          </p>
1611 1
   * @param bool  $add_empty  [optional] <p>
1612
   *                          Add missing keys as <b>NULL</b> to the return value.
1613 1
   *                          </p>
1614
   *
1615
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1616 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1617 1
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1618 1
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1619 1
   * fails.
1620 1
   * @since 5.2.0
1621
   */
1622 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1623
  {
1624
    if (2 > func_num_args()) {
1625
      $a = filter_input_array($type);
1626
    } else {
1627
      $a = filter_input_array($type, $definition, $add_empty);
1628
    }
1629
1630
    return self::filter($a);
1631
  }
1632 1
1633
  /**
1634 1
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1635
   *
1636
   * Filters a variable with a specified filter
1637
   *
1638 1
   * @link  http://php.net/manual/en/function.filter-var.php
1639
   *
1640
   * @param mixed $variable <p>
1641
   *                        Value to filter.
1642
   *                        </p>
1643
   * @param int   $filter   [optional] <p>
1644
   *                        The ID of the filter to apply. The
1645
   *                        manual page lists the available filters.
1646
   *                        </p>
1647
   * @param mixed $options  [optional] <p>
1648
   *                        Associative array of options or bitwise disjunction of flags. If filter
1649
   *                        accepts options, flags can be provided in "flags" field of array. For
1650
   *                        the "callback" filter, callable type should be passed. The
1651
   *                        callback must accept one argument, the value to be filtered, and return
1652
   *                        the value after filtering/sanitizing it.
1653
   *                        </p>
1654 1
   *                        <p>
1655
   *                        <code>
1656 1
   *                        // for filters that accept options, use this format
1657 1
   *                        $options = array(
1658
   *                        'options' => array(
1659
   *                        'default' => 3, // value to return if the filter fails
1660 1
   *                        // other options here
1661
   *                        'min_range' => 0
1662 1
   *                        ),
1663 1
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1664 1
   *                        );
1665 1
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1666 1
   *                        // for filter that only accept flags, you can pass them directly
1667 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1668 1
   *                        // for filter that only accept flags, you can also pass as an array
1669 1
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1670 1
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1671 1
   *                        // callback validate filter
1672 1
   *                        function foo($value)
1673
   *                        {
1674
   *                        // Expected format: Surname, GivenNames
1675
   *                        if (strpos($value, ", ") === false) return false;
1676
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1677
   *                        $empty = (empty($surname) || empty($givennames));
1678
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1679
   *                        if ($empty || $notstrings) {
1680
   *                        return false;
1681
   *                        } else {
1682
   *                        return $value;
1683
   *                        }
1684
   *                        }
1685
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1686
   *                        </code>
1687
   *                        </p>
1688
   *
1689
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1690
   * @since 5.2.0
1691
   */
1692 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1693 1
  {
1694
    if (3 > func_num_args()) {
1695
      $variable = filter_var($variable, $filter);
1696
    } else {
1697
      $variable = filter_var($variable, $filter, $options);
1698
    }
1699
1700
    return self::filter($variable);
1701
  }
1702
1703
  /**
1704
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1705
   *
1706
   * Gets multiple variables and optionally filters them
1707
   *
1708
   * @link  http://php.net/manual/en/function.filter-var-array.php
1709
   *
1710
   * @param array $data       <p>
1711
   *                          An array with string keys containing the data to filter.
1712
   *                          </p>
1713
   * @param mixed $definition [optional] <p>
1714
   *                          An array defining the arguments. A valid key is a string
1715
   *                          containing a variable name and a valid value is either a
1716
   *                          filter type, or an
1717
   *                          array optionally specifying the filter, flags and options.
1718
   *                          If the value is an array, valid keys are filter
1719
   *                          which specifies the filter type,
1720
   *                          flags which specifies any flags that apply to the
1721
   *                          filter, and options which specifies any options that
1722
   *                          apply to the filter. See the example below for a better understanding.
1723
   *                          </p>
1724
   *                          <p>
1725
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1726
   *                          input array are filtered by this filter.
1727
   *                          </p>
1728
   * @param bool  $add_empty  [optional] <p>
1729
   *                          Add missing keys as <b>NULL</b> to the return value.
1730
   *                          </p>
1731
   *
1732
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1733
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1734
   * the variable is not set.
1735
   * @since 5.2.0
1736
   */
1737 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1738
  {
1739
    if (2 > func_num_args()) {
1740
      $a = filter_var_array($data);
1741
    } else {
1742
      $a = filter_var_array($data, $definition, $add_empty);
1743
    }
1744
1745
    return self::filter($a);
1746
  }
1747
1748
  /**
1749
   * Check if the number of unicode characters are not more than the specified integer.
1750
   *
1751
   * @param string $str      The original string to be checked.
1752 1
   * @param int    $box_size The size in number of chars to be checked against string.
1753
   *
1754 1
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1755 1
   */
1756
  public static function fits_inside($str, $box_size)
1757 1
  {
1758
    return (self::strlen($str) <= $box_size);
1759
  }
1760
1761
  /**
1762
   * Try to fix simple broken UTF-8 strings.
1763
   *
1764
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1765
   *
1766
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1767
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1768
   * See: http://en.wikipedia.org/wiki/Windows-1252
1769
   *
1770
   * @param string $str <p>The input string</p>
1771
   *
1772 1
   * @return string
1773
   */
1774 1 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1775
  {
1776
    // init
1777
    $str = (string)$str;
1778
1779
    if (!isset($str[0])) {
1780
      return '';
1781
    }
1782
1783
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1784
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1785
1786 1
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1787
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1788 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1789 1
    }
1790
1791
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1792 1
  }
1793 1
1794
  /**
1795
   * Fix a double (or multiple) encoded UTF8 string.
1796 1
   *
1797
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1798
   *
1799
   * @return mixed
1800
   */
1801
  public static function fix_utf8($str)
1802
  {
1803
    if (is_array($str)) {
1804
1805
      /** @noinspection ForeachSourceInspection */
1806
      foreach ($str as $k => $v) {
1807
        /** @noinspection AlterInForeachInspection */
1808
        /** @noinspection OffsetOperationsInspection */
1809
        $str[$k] = self::fix_utf8($v);
1810 1
      }
1811
1812 1
      return $str;
1813
    }
1814
1815
    $last = '';
1816
    while ($last !== $str) {
1817
      $last = $str;
1818
      $str = self::to_utf8(
1819
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1818 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1820
      );
1821
    }
1822
1823
    return $str;
1824
  }
1825
1826 2
  /**
1827
   * Get character of a specific character.
1828
   *
1829 2
   * @param string $char
1830
   *
1831 2
   * @return string <p>'RTL' or 'LTR'</p>
1832 2
   */
1833 1
  public static function getCharDirection($char)
1834 1
  {
1835
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1836 2
      self::checkForSupport();
1837 1
    }
1838 1
1839
    if (self::$support['intlChar'] === true) {
1840 2
      $tmpReturn = \IntlChar::charDirection($char);
1841 2
1842 2
      // from "IntlChar"-Class
1843
      $charDirection = array(
1844 2
          'RTL' => array(1, 13, 14, 15, 21),
1845
          'LTR' => array(0, 11, 12, 20),
1846
      );
1847
1848
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1849
        return 'LTR';
1850
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1851
        return 'RTL';
1852
      }
1853
    }
1854
1855
    $c = static::chr_to_decimal($char);
1856
1857
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1858
      return 'LTR';
1859
    }
1860
1861
    if (0x85e >= $c) {
1862
1863
      if (0x5be === $c ||
1864
          0x5c0 === $c ||
1865
          0x5c3 === $c ||
1866
          0x5c6 === $c ||
1867
          (0x5d0 <= $c && 0x5ea >= $c) ||
1868
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1869
          0x608 === $c ||
1870
          0x60b === $c ||
1871
          0x60d === $c ||
1872
          0x61b === $c ||
1873
          (0x61e <= $c && 0x64a >= $c) ||
1874
          (0x66d <= $c && 0x66f >= $c) ||
1875
          (0x671 <= $c && 0x6d5 >= $c) ||
1876
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1877
          (0x6ee <= $c && 0x6ef >= $c) ||
1878
          (0x6fa <= $c && 0x70d >= $c) ||
1879
          0x710 === $c ||
1880
          (0x712 <= $c && 0x72f >= $c) ||
1881
          (0x74d <= $c && 0x7a5 >= $c) ||
1882
          0x7b1 === $c ||
1883
          (0x7c0 <= $c && 0x7ea >= $c) ||
1884
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1885
          0x7fa === $c ||
1886
          (0x800 <= $c && 0x815 >= $c) ||
1887
          0x81a === $c ||
1888
          0x824 === $c ||
1889
          0x828 === $c ||
1890
          (0x830 <= $c && 0x83e >= $c) ||
1891
          (0x840 <= $c && 0x858 >= $c) ||
1892
          0x85e === $c
1893
      ) {
1894
        return 'RTL';
1895
      }
1896
1897
    } elseif (0x200f === $c) {
1898
1899
      return 'RTL';
1900
1901
    } elseif (0xfb1d <= $c) {
1902
1903
      if (0xfb1d === $c ||
1904
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1905
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1906
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1907
          0xfb3e === $c ||
1908
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1909
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1910
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1911
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1912
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1913
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1914
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1915
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1916
          (0xfe76 <= $c && 0xfefc >= $c) ||
1917
          (0x10800 <= $c && 0x10805 >= $c) ||
1918
          0x10808 === $c ||
1919
          (0x1080a <= $c && 0x10835 >= $c) ||
1920
          (0x10837 <= $c && 0x10838 >= $c) ||
1921
          0x1083c === $c ||
1922
          (0x1083f <= $c && 0x10855 >= $c) ||
1923
          (0x10857 <= $c && 0x1085f >= $c) ||
1924
          (0x10900 <= $c && 0x1091b >= $c) ||
1925
          (0x10920 <= $c && 0x10939 >= $c) ||
1926 9
          0x1093f === $c ||
1927
          0x10a00 === $c ||
1928 9
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1929
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1930 9
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1931 6
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1932
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1933
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1934 9
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1935 7
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1936
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1937
          (0x10b78 <= $c && 0x10b7f >= $c)
1938
      ) {
1939 9
        return 'RTL';
1940 9
      }
1941
    }
1942 9
1943 9
    return 'LTR';
1944 9
  }
1945 9
1946 9
  /**
1947 6
   * get data from "/data/*.ser"
1948
   *
1949
   * @param string $file
1950 9
   *
1951 2
   * @return bool|string|array|int <p>Will return false on error.</p>
1952 2
   */
1953
  private static function getData($file)
1954 9
  {
1955 4
    $file = __DIR__ . '/data/' . $file . '.php';
1956 4
    if (file_exists($file)) {
1957 4
      /** @noinspection PhpIncludeInspection */
1958
      return require $file;
1959
    } else {
1960 4
      return false;
1961
    }
1962
  }
1963 9
1964
  /**
1965 9
   * alias for "UTF8::string_has_bom()"
1966 9
   *
1967
   * @see UTF8::string_has_bom()
1968 7
   *
1969
   * @param string $str
1970 7
   *
1971 6
   * @return bool
1972
   *
1973 4
   * @deprecated
1974
   */
1975 9
  public static function hasBom($str)
1976
  {
1977 9
    return self::string_has_bom($str);
1978
  }
1979
1980 9
  /**
1981 9
   * Converts a hexadecimal-value into an UTF-8 character.
1982 9
   *
1983
   * @param string $hexdec <p>The hexadecimal value.</p>
1984 9
   *
1985
   * @return string|false <p>One single UTF-8 character.</p>
1986 9
   */
1987
  public static function hex_to_chr($hexdec)
1988 9
  {
1989
    return self::decimal_to_chr(hexdec($hexdec));
1990
  }
1991
1992
  /**
1993
   * Converts hexadecimal U+xxxx code point representation to integer.
1994
   *
1995
   * INFO: opposite to UTF8::int_to_hex()
1996
   *
1997
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
1998
   *
1999
   * @return int|false <p>The code point, or false on failure.</p>
2000
   */
2001
  public static function hex_to_int($hexdec)
2002
  {
2003
    if (!$hexdec) {
2004
      return false;
2005
    }
2006
2007
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2008
      return intval($match[1], 16);
2009
    }
2010
2011
    return false;
2012
  }
2013
2014
  /**
2015
   * alias for "UTF8::html_entity_decode()"
2016
   *
2017
   * @see UTF8::html_entity_decode()
2018
   *
2019
   * @param string $str
2020
   * @param int    $flags
2021
   * @param string $encoding
2022
   *
2023
   * @return string
2024
   */
2025
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2026
  {
2027
    return self::html_entity_decode($str, $flags, $encoding);
2028
  }
2029
2030
  /**
2031
   * Converts a UTF-8 string to a series of HTML numbered entities.
2032
   *
2033
   * INFO: opposite to UTF8::html_decode()
2034
   *
2035
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2036
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2037
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2038
   *
2039
   * @return string <p>HTML numbered entities.</p>
2040
   */
2041
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2042
  {
2043
    // init
2044
    $str = (string)$str;
2045
2046
    if (!isset($str[0])) {
2047
      return '';
2048
    }
2049
2050
    if ($encoding !== 'UTF-8') {
2051
      $encoding = self::normalize_encoding($encoding);
2052
    }
2053
2054
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2055
    if (function_exists('mb_encode_numericentity')) {
2056
2057
      $startCode = 0x00;
2058
      if ($keepAsciiChars === true) {
2059
        $startCode = 0x80;
2060
      }
2061
2062
      return mb_encode_numericentity(
2063
          $str,
2064
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2065
          $encoding
2066
      );
2067
    }
2068
2069
    return implode(
2070
        '',
2071
        array_map(
2072
            function ($data) use ($keepAsciiChars, $encoding) {
2073
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2074
            },
2075
            self::split($str)
2076
        )
2077
    );
2078
  }
2079
2080
  /**
2081
   * UTF-8 version of html_entity_decode()
2082
   *
2083
   * The reason we are not using html_entity_decode() by itself is because
2084
   * while it is not technically correct to leave out the semicolon
2085
   * at the end of an entity most browsers will still interpret the entity
2086
   * correctly. html_entity_decode() does not convert entities without
2087
   * semicolons, so we are left with our own little solution here. Bummer.
2088
   *
2089
   * Convert all HTML entities to their applicable characters
2090
   *
2091
   * INFO: opposite to UTF8::html_encode()
2092
   *
2093
   * @link http://php.net/manual/en/function.html-entity-decode.php
2094 2
   *
2095
   * @param string $str      <p>
2096 2
   *                         The input string.
2097 1
   *                         </p>
2098 1
   * @param int    $flags    [optional] <p>
2099
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2100 2
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2101
   *                         <table>
2102 2
   *                         Available <i>flags</i> constants
2103 1
   *                         <tr valign="top">
2104
   *                         <td>Constant Name</td>
2105
   *                         <td>Description</td>
2106 2
   *                         </tr>
2107 2
   *                         <tr valign="top">
2108 2
   *                         <td><b>ENT_COMPAT</b></td>
2109 2
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2110 2
   *                         </tr>
2111 1
   *                         <tr valign="top">
2112
   *                         <td><b>ENT_QUOTES</b></td>
2113 1
   *                         <td>Will convert both double and single quotes.</td>
2114 1
   *                         </tr>
2115 1
   *                         <tr valign="top">
2116 1
   *                         <td><b>ENT_NOQUOTES</b></td>
2117 1
   *                         <td>Will leave both double and single quotes unconverted.</td>
2118 2
   *                         </tr>
2119
   *                         <tr valign="top">
2120 2
   *                         <td><b>ENT_HTML401</b></td>
2121
   *                         <td>
2122
   *                         Handle code as HTML 4.01.
2123
   *                         </td>
2124
   *                         </tr>
2125
   *                         <tr valign="top">
2126
   *                         <td><b>ENT_XML1</b></td>
2127
   *                         <td>
2128
   *                         Handle code as XML 1.
2129
   *                         </td>
2130
   *                         </tr>
2131
   *                         <tr valign="top">
2132
   *                         <td><b>ENT_XHTML</b></td>
2133
   *                         <td>
2134
   *                         Handle code as XHTML.
2135
   *                         </td>
2136
   *                         </tr>
2137
   *                         <tr valign="top">
2138
   *                         <td><b>ENT_HTML5</b></td>
2139
   *                         <td>
2140
   *                         Handle code as HTML 5.
2141
   *                         </td>
2142
   *                         </tr>
2143
   *                         </table>
2144
   *                         </p>
2145
   * @param string $encoding [optional] <p>Encoding to use.</p>
2146
   *
2147
   * @return string <p>The decoded string.</p>
2148
   */
2149
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2150
  {
2151
    // init
2152
    $str = (string)$str;
2153
2154
    if (!isset($str[0])) {
2155
      return '';
2156
    }
2157
2158
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2159
      return $str;
2160
    }
2161
2162
    if (
2163
        strpos($str, '&') === false
2164
        ||
2165
        (
2166
            strpos($str, '&#') === false
2167
            &&
2168
            strpos($str, ';') === false
2169
        )
2170
    ) {
2171
      return $str;
2172
    }
2173
2174
    if ($encoding !== 'UTF-8') {
2175
      $encoding = self::normalize_encoding($encoding);
2176
    }
2177
2178
    if ($flags === null) {
2179
      if (Bootup::is_php('5.4') === true) {
2180
        $flags = ENT_QUOTES | ENT_HTML5;
2181
      } else {
2182
        $flags = ENT_QUOTES;
2183
      }
2184
    }
2185
2186
    do {
2187
      $str_compare = $str;
2188
2189
      $str = preg_replace_callback(
2190
          "/&#\d{2,6};/",
2191
          function ($matches) use ($encoding) {
2192
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2193
2194
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2195
              return $returnTmp;
2196
            } else {
2197
              return $matches[0];
2198
            }
2199
          },
2200
          $str
2201
      );
2202
2203
      // decode numeric & UTF16 two byte entities
2204
      $str = html_entity_decode(
2205
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2206
          $flags,
2207
          $encoding
2208
      );
2209
2210
    } while ($str_compare !== $str);
2211
2212
    return $str;
2213
  }
2214
2215
  /**
2216
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2217
   *
2218
   * @link http://php.net/manual/en/function.htmlentities.php
2219
   *
2220
   * @param string $str           <p>
2221
   *                              The input string.
2222
   *                              </p>
2223
   * @param int    $flags         [optional] <p>
2224
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2225
   *                              invalid code unit sequences and the used document type. The default is
2226
   *                              ENT_COMPAT | ENT_HTML401.
2227
   *                              <table>
2228
   *                              Available <i>flags</i> constants
2229
   *                              <tr valign="top">
2230
   *                              <td>Constant Name</td>
2231
   *                              <td>Description</td>
2232 1
   *                              </tr>
2233
   *                              <tr valign="top">
2234 1
   *                              <td><b>ENT_COMPAT</b></td>
2235
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2236
   *                              </tr>
2237
   *                              <tr valign="top">
2238 1
   *                              <td><b>ENT_QUOTES</b></td>
2239
   *                              <td>Will convert both double and single quotes.</td>
2240
   *                              </tr>
2241
   *                              <tr valign="top">
2242
   *                              <td><b>ENT_NOQUOTES</b></td>
2243
   *                              <td>Will leave both double and single quotes unconverted.</td>
2244
   *                              </tr>
2245
   *                              <tr valign="top">
2246 1
   *                              <td><b>ENT_IGNORE</b></td>
2247
   *                              <td>
2248 1
   *                              Silently discard invalid code unit sequences instead of returning
2249
   *                              an empty string. Using this flag is discouraged as it
2250
   *                              may have security implications.
2251
   *                              </td>
2252
   *                              </tr>
2253
   *                              <tr valign="top">
2254
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2255
   *                              <td>
2256
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2257
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2258
   *                              </td>
2259
   *                              </tr>
2260
   *                              <tr valign="top">
2261 3
   *                              <td><b>ENT_DISALLOWED</b></td>
2262
   *                              <td>
2263 3
   *                              Replace invalid code points for the given document type with a
2264 3
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2265
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2266 3
   *                              instance, to ensure the well-formedness of XML documents with
2267
   *                              embedded external content.
2268 3
   *                              </td>
2269
   *                              </tr>
2270
   *                              <tr valign="top">
2271
   *                              <td><b>ENT_HTML401</b></td>
2272
   *                              <td>
2273
   *                              Handle code as HTML 4.01.
2274
   *                              </td>
2275
   *                              </tr>
2276
   *                              <tr valign="top">
2277
   *                              <td><b>ENT_XML1</b></td>
2278
   *                              <td>
2279 1
   *                              Handle code as XML 1.
2280
   *                              </td>
2281 1
   *                              </tr>
2282
   *                              <tr valign="top">
2283
   *                              <td><b>ENT_XHTML</b></td>
2284
   *                              <td>
2285
   *                              Handle code as XHTML.
2286
   *                              </td>
2287
   *                              </tr>
2288
   *                              <tr valign="top">
2289 2
   *                              <td><b>ENT_HTML5</b></td>
2290
   *                              <td>
2291 2
   *                              Handle code as HTML 5.
2292
   *                              </td>
2293
   *                              </tr>
2294
   *                              </table>
2295
   *                              </p>
2296
   * @param string $encoding      [optional] <p>
2297
   *                              Like <b>htmlspecialchars</b>,
2298
   *                              <b>htmlentities</b> takes an optional third argument
2299
   *                              <i>encoding</i> which defines encoding used in
2300
   *                              conversion.
2301
   *                              Although this argument is technically optional, you are highly
2302
   *                              encouraged to specify the correct value for your code.
2303 2
   *                              </p>
2304
   * @param bool   $double_encode [optional] <p>
2305 2
   *                              When <i>double_encode</i> is turned off PHP will not
2306
   *                              encode existing html entities. The default is to convert everything.
2307
   *                              </p>
2308
   *
2309
   *
2310
   * @return string the encoded string.
2311
   * </p>
2312
   * <p>
2313
   * If the input <i>string</i> contains an invalid code unit
2314
   * sequence within the given <i>encoding</i> an empty string
2315
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2316
   * <b>ENT_SUBSTITUTE</b> flags are set.
2317 1
   */
2318
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2319 1
  {
2320
    if ($encoding !== 'UTF-8') {
2321
      $encoding = self::normalize_encoding($encoding);
2322
    }
2323
2324
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2325
2326
    if ($encoding !== 'UTF-8') {
2327
      return $str;
2328
    }
2329
2330
    $byteLengths = self::chr_size_list($str);
2331
    $search = array();
2332
    $replacements = array();
2333
    foreach ($byteLengths as $counter => $byteLength) {
2334
      if ($byteLength >= 3) {
2335
        $char = self::access($str, $counter);
2336
2337
        if (!isset($replacements[$char])) {
2338
          $search[$char] = $char;
2339
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2335 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2340
        }
2341
      }
2342
    }
2343
2344
    return str_replace($search, $replacements, $str);
2345
  }
2346
2347
  /**
2348
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2349
   *
2350
   * INFO: Take a look at "UTF8::htmlentities()"
2351
   *
2352
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2353
   *
2354
   * @param string $str           <p>
2355
   *                              The string being converted.
2356
   *                              </p>
2357
   * @param int    $flags         [optional] <p>
2358
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2359 1
   *                              invalid code unit sequences and the used document type. The default is
2360
   *                              ENT_COMPAT | ENT_HTML401.
2361 1
   *                              <table>
2362
   *                              Available <i>flags</i> constants
2363
   *                              <tr valign="top">
2364
   *                              <td>Constant Name</td>
2365
   *                              <td>Description</td>
2366
   *                              </tr>
2367
   *                              <tr valign="top">
2368
   *                              <td><b>ENT_COMPAT</b></td>
2369
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2370
   *                              </tr>
2371
   *                              <tr valign="top">
2372
   *                              <td><b>ENT_QUOTES</b></td>
2373
   *                              <td>Will convert both double and single quotes.</td>
2374
   *                              </tr>
2375
   *                              <tr valign="top">
2376
   *                              <td><b>ENT_NOQUOTES</b></td>
2377
   *                              <td>Will leave both double and single quotes unconverted.</td>
2378
   *                              </tr>
2379
   *                              <tr valign="top">
2380
   *                              <td><b>ENT_IGNORE</b></td>
2381
   *                              <td>
2382
   *                              Silently discard invalid code unit sequences instead of returning
2383
   *                              an empty string. Using this flag is discouraged as it
2384
   *                              may have security implications.
2385
   *                              </td>
2386
   *                              </tr>
2387 1
   *                              <tr valign="top">
2388
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2389 1
   *                              <td>
2390
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2391
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2392
   *                              </td>
2393
   *                              </tr>
2394
   *                              <tr valign="top">
2395
   *                              <td><b>ENT_DISALLOWED</b></td>
2396
   *                              <td>
2397
   *                              Replace invalid code points for the given document type with a
2398
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2399
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2400
   *                              instance, to ensure the well-formedness of XML documents with
2401 1
   *                              embedded external content.
2402
   *                              </td>
2403 1
   *                              </tr>
2404
   *                              <tr valign="top">
2405
   *                              <td><b>ENT_HTML401</b></td>
2406
   *                              <td>
2407
   *                              Handle code as HTML 4.01.
2408
   *                              </td>
2409
   *                              </tr>
2410
   *                              <tr valign="top">
2411
   *                              <td><b>ENT_XML1</b></td>
2412
   *                              <td>
2413
   *                              Handle code as XML 1.
2414
   *                              </td>
2415
   *                              </tr>
2416 16
   *                              <tr valign="top">
2417
   *                              <td><b>ENT_XHTML</b></td>
2418 16
   *                              <td>
2419
   *                              Handle code as XHTML.
2420
   *                              </td>
2421
   *                              </tr>
2422
   *                              <tr valign="top">
2423
   *                              <td><b>ENT_HTML5</b></td>
2424
   *                              <td>
2425
   *                              Handle code as HTML 5.
2426
   *                              </td>
2427
   *                              </tr>
2428
   *                              </table>
2429
   *                              </p>
2430
   * @param string $encoding      [optional] <p>
2431 28
   *                              Defines encoding used in conversion.
2432
   *                              </p>
2433 28
   *                              <p>
2434
   *                              For the purposes of this function, the encodings
2435 28
   *                              ISO-8859-1, ISO-8859-15,
2436 5
   *                              UTF-8, cp866,
2437
   *                              cp1251, cp1252, and
2438
   *                              KOI8-R are effectively equivalent, provided the
2439 28
   *                              <i>string</i> itself is valid for the encoding, as
2440
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2441
   *                              the same positions in all of these encodings.
2442
   *                              </p>
2443
   * @param bool   $double_encode [optional] <p>
2444
   *                              When <i>double_encode</i> is turned off PHP will not
2445
   *                              encode existing html entities, the default is to convert everything.
2446
   *                              </p>
2447
   *
2448
   * @return string The converted string.
2449 1
   * </p>
2450
   * <p>
2451 1
   * If the input <i>string</i> contains an invalid code unit
2452
   * sequence within the given <i>encoding</i> an empty string
2453 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2454 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2455
   */
2456
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2457 1
  {
2458 1
    if ($encoding !== 'UTF-8') {
2459
      $encoding = self::normalize_encoding($encoding);
2460 1
    }
2461
2462
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2463
  }
2464
2465
  /**
2466
   * Checks whether iconv is available on the server.
2467
   *
2468
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2469
   */
2470
  public static function iconv_loaded()
2471 16
  {
2472
    $return = extension_loaded('iconv') ? true : false;
2473
2474 16
    // INFO: "default_charset" is already set by the "Bootup"-class
2475
2476
    if (!Bootup::is_php('5.6')) {
2477 16
      // INFO: "iconv_set_encoding" is deprecated cince PHP 5.6
2478
      iconv_set_encoding('input_encoding', 'UTF-8');
2479 16
      iconv_set_encoding('output_encoding', 'UTF-8');
2480 16
      iconv_set_encoding('internal_encoding', 'UTF-8');
2481 15
    }
2482 16
2483 6
    return $return;
2484
  }
2485 15
2486
  /**
2487
   * alias for "UTF8::decimal_to_chr()"
2488
   *
2489
   * @see UTF8::decimal_to_chr()
2490
   *
2491
   * @param int $int
2492
   *
2493
   * @return string
2494
   */
2495
  public static function int_to_chr($int)
2496
  {
2497
    return self::decimal_to_chr($int);
2498
  }
2499
2500
  /**
2501
   * Converts Integer to hexadecimal U+xxxx code point representation.
2502
   *
2503
   * INFO: opposite to UTF8::hex_to_int()
2504
   *
2505
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2506
   * @param string $pfix [optional]
2507
   *
2508
   * @return string <p>The code point, or empty string on failure.</p>
2509
   */
2510
  public static function int_to_hex($int, $pfix = 'U+')
2511
  {
2512
    if (ctype_digit((string)$int)) {
2513
      $hex = dechex((int)$int);
2514
2515
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2516
2517
      return $pfix . $hex;
2518
    }
2519
2520
    return '';
2521
  }
2522
2523
  /**
2524
   * Checks whether intl-char is available on the server.
2525
   *
2526
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2527
   */
2528
  public static function intlChar_loaded()
2529
  {
2530
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2531
  }
2532
2533
  /**
2534
   * Checks whether intl is available on the server.
2535
   *
2536 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2537
   */
2538 1
  public static function intl_loaded()
2539
  {
2540 1
    return extension_loaded('intl') ? true : false;
2541
  }
2542
2543
  /**
2544
   * alias for "UTF8::is_ascii()"
2545 1
   *
2546
   * @see UTF8::is_ascii()
2547 1
   *
2548
   * @param string $str
2549 1
   *
2550 1
   * @return boolean
2551
   *
2552 1
   * @deprecated
2553
   */
2554
  public static function isAscii($str)
2555
  {
2556
    return self::is_ascii($str);
2557
  }
2558
2559
  /**
2560
   * alias for "UTF8::is_base64()"
2561
   *
2562
   * @see UTF8::is_base64()
2563 1
   *
2564
   * @param string $str
2565 1
   *
2566
   * @return bool
2567 1
   *
2568
   * @deprecated
2569
   */
2570
  public static function isBase64($str)
2571
  {
2572 1
    return self::is_base64($str);
2573 1
  }
2574 1
2575 1
  /**
2576 1
   * alias for "UTF8::is_binary()"
2577
   *
2578 1
   * @see UTF8::is_binary()
2579
   *
2580
   * @param string $str
2581
   *
2582
   * @return bool
2583
   *
2584
   * @deprecated
2585
   */
2586
  public static function isBinary($str)
2587
  {
2588
    return self::is_binary($str);
2589
  }
2590
2591
  /**
2592
   * alias for "UTF8::is_bom()"
2593 4
   *
2594
   * @see UTF8::is_bom()
2595 4
   *
2596
   * @param string $utf8_chr
2597 4
   *
2598
   * @return boolean
2599 4
   *
2600 4
   * @deprecated
2601 4
   */
2602 4
  public static function isBom($utf8_chr)
2603 4
  {
2604 4
    return self::is_bom($utf8_chr);
2605 4
  }
2606 4
2607 4
  /**
2608 2
   * alias for "UTF8::is_html()"
2609 2
   *
2610 4
   * @see UTF8::is_html()
2611 4
   *
2612 4
   * @param string $str
2613
   *
2614 4
   * @return boolean
2615 4
   *
2616 4
   * @deprecated
2617 4
   */
2618 4
  public static function isHtml($str)
2619 4
  {
2620 4
    return self::is_html($str);
2621 4
  }
2622 4
2623 3
  /**
2624 3
   * alias for "UTF8::is_json()"
2625 4
   *
2626 4
   * @see UTF8::is_json()
2627 4
   *
2628
   * @param string $str
2629 4
   *
2630 3
   * @return bool
2631 2
   *
2632
   * @deprecated
2633 3
   */
2634
  public static function isJson($str)
2635
  {
2636
    return self::is_json($str);
2637 3
  }
2638
2639 3
  /**
2640
   * alias for "UTF8::is_utf16()"
2641
   *
2642
   * @see UTF8::is_utf16()
2643
   *
2644
   * @param string $str
2645
   *
2646
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2647
   *
2648
   * @deprecated
2649
   */
2650
  public static function isUtf16($str)
2651
  {
2652
    return self::is_utf16($str);
2653 3
  }
2654
2655 3
  /**
2656
   * alias for "UTF8::is_utf32()"
2657 3
   *
2658
   * @see UTF8::is_utf32()
2659 3
   *
2660 3
   * @param string $str
2661 3
   *
2662 3
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2663 3
   *
2664 3
   * @deprecated
2665 3
   */
2666 3
  public static function isUtf32($str)
2667 3
  {
2668 1
    return self::is_utf32($str);
2669 1
  }
2670 3
2671 3
  /**
2672 3
   * alias for "UTF8::is_utf8()"
2673
   *
2674 3
   * @see UTF8::is_utf8()
2675 3
   *
2676 3
   * @param string $str
2677 3
   * @param bool   $strict
2678 3
   *
2679 3
   * @return bool
2680 3
   *
2681 3
   * @deprecated
2682 3
   */
2683 1
  public static function isUtf8($str, $strict = false)
2684 1
  {
2685 3
    return self::is_utf8($str, $strict);
2686 3
  }
2687 3
2688
  /**
2689 3
   * Checks if a string is 7 bit ASCII.
2690 1
   *
2691 1
   * @param string $str <p>The string to check.</p>
2692
   *
2693 1
   * @return bool <p>
2694
   *              <strong>true</strong> if it is ASCII<br />
2695
   *              <strong>false</strong> otherwise
2696
   *              </p>
2697 3
   */
2698
  public static function is_ascii($str)
2699 3
  {
2700
    $str = (string)$str;
2701
2702
    if (!isset($str[0])) {
2703
      return true;
2704
    }
2705
2706
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2707
  }
2708
2709
  /**
2710
   * Returns true if the string is base64 encoded, false otherwise.
2711
   *
2712 43
   * @param string $str <p>The input string.</p>
2713
   *
2714 43
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2715
   */
2716 43
  public static function is_base64($str)
2717 3
  {
2718
    $str = (string)$str;
2719
2720 41
    if (!isset($str[0])) {
2721 1
      return false;
2722 1
    }
2723
2724
    if (base64_encode(base64_decode($str, true)) === $str) {
2725
      return true;
2726
    } else {
2727
      return false;
2728
    }
2729
  }
2730 41
2731
  /**
2732
   * Check if the input is binary... (is look like a hack).
2733
   *
2734
   * @param mixed $input
2735
   *
2736
   * @return bool
2737
   */
2738
  public static function is_binary($input)
2739
  {
2740 41
    if (preg_match('~^[01]+$~', $input)) {
2741
      return true;
2742 41
    }
2743 41
2744 41
    $testLength = strlen($input);
2745
    if (substr_count($input, "\x0") / $testLength > 0.3) {
2746
      return true;
2747 41
    }
2748 41
2749 41
    return false;
2750
  }
2751
2752 41
  /**
2753
   * Check if the file is binary.
2754 36
   *
2755 41
   * @param string $file
2756
   *
2757 34
   * @return boolean
2758 34
   */
2759 34
  public static function is_binary_file($file)
2760 34
  {
2761 39
    try {
2762
      $fp = fopen($file, 'rb');
2763 21
      $block = fread($fp, 512);
2764 21
      fclose($fp);
2765 21
    } catch (\Exception $e) {
2766 21
      $block = '';
2767 33
    }
2768
2769 9
    return self::is_binary($block);
2770 9
  }
2771 9
2772 9
  /**
2773 16
   * Checks if the given string is equal to any "Byte Order Mark".
2774
   *
2775
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2776
   *
2777
   * @param string $str <p>The input string.</p>
2778
   *
2779
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2780
   */
2781
  public static function is_bom($str)
2782 3
  {
2783 3
    foreach (self::$bom as $bomString => $bomByteLength) {
2784 3
      if ($str === $bomString) {
2785 3
        return true;
2786 9
      }
2787
    }
2788 3
2789 3
    return false;
2790 3
  }
2791 3
2792 3
  /**
2793
   * Check if the string contains any html-tags <lall>.
2794
   *
2795
   * @param string $str <p>The input string.</p>
2796 5
   *
2797
   * @return boolean
2798 41
   */
2799
  public static function is_html($str)
2800
  {
2801 36
    $str = (string)$str;
2802
2803 33
    if (!isset($str[0])) {
2804 33
      return false;
2805 33
    }
2806 33
2807
    // init
2808
    $matches = array();
2809
2810
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2811 33
2812
    if (count($matches) === 0) {
2813
      return false;
2814
    } else {
2815
      return true;
2816
    }
2817 33
  }
2818 33
2819 33
  /**
2820 33
   * Try to check if "$str" is an json-string.
2821
   *
2822 33
   * @param string $str <p>The input string.</p>
2823
   *
2824 33
   * @return bool
2825 33
   */
2826 5
  public static function is_json($str)
2827
  {
2828
    $str = (string)$str;
2829 33
2830 33
    if (!isset($str[0])) {
2831 33
      return false;
2832 33
    }
2833 33
2834
    if (
2835
        is_object(self::json_decode($str))
2836
        &&
2837
        json_last_error() === JSON_ERROR_NONE
2838 18
    ) {
2839
      return true;
2840
    } else {
2841 41
      return false;
2842
    }
2843 20
  }
2844
2845
  /**
2846
   * Check if the string is UTF-16.
2847
   *
2848
   * @param string $str <p>The input string.</p>
2849
   *
2850
   * @return int|false <p>
2851
   *                   <strong>false</strong> if is't not UTF-16,<br />
2852
   *                   <strong>1</strong> for UTF-16LE,<br />
2853
   *                   <strong>2</strong> for UTF-16BE.
2854
   *                   </p>
2855
   */
2856 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2857
  {
2858
    $str = self::remove_bom($str);
2859
2860
    if (self::is_binary($str)) {
2861
2862
      $maybeUTF16LE = 0;
2863
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2864
      if ($test) {
2865
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2866
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2867
        if ($test3 === $test) {
2868
          $strChars = self::count_chars($str, true);
2869
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2870
            if (in_array($test3char, $strChars, true) === true) {
2871
              $maybeUTF16LE++;
2872
            }
2873
          }
2874
        }
2875
      }
2876
2877
      $maybeUTF16BE = 0;
2878
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2879
      if ($test) {
2880
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2881
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2882
        if ($test3 === $test) {
2883 2
          $strChars = self::count_chars($str, true);
2884
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2885 2
            if (in_array($test3char, $strChars, true) === true) {
2886
              $maybeUTF16BE++;
2887 2
            }
2888 2
          }
2889 2
        }
2890
      }
2891
2892
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2893 2
        if ($maybeUTF16LE > $maybeUTF16BE) {
2894
          return 1;
2895
        } else {
2896
          return 2;
2897
        }
2898
      }
2899
2900
    }
2901
2902
    return false;
2903
  }
2904
2905
  /**
2906
   * Check if the string is UTF-32.
2907
   *
2908
   * @param string $str
2909
   *
2910
   * @return int|false <p>
2911
   *                   <strong>false</strong> if is't not UTF-16,<br />
2912
   *                   <strong>1</strong> for UTF-32LE,<br />
2913
   *                   <strong>2</strong> for UTF-32BE.
2914
   *                   </p>
2915
   */
2916 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2917
  {
2918
    $str = self::remove_bom($str);
2919
2920
    if (self::is_binary($str)) {
2921
2922
      $maybeUTF32LE = 0;
2923
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2924
      if ($test) {
2925
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2926
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2927
        if ($test3 === $test) {
2928
          $strChars = self::count_chars($str, true);
2929
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2930
            if (in_array($test3char, $strChars, true) === true) {
2931
              $maybeUTF32LE++;
2932 2
            }
2933
          }
2934 2
        }
2935
      }
2936 2
2937
      $maybeUTF32BE = 0;
2938
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2939 2
      if ($test) {
2940
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2941
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2942 2
        if ($test3 === $test) {
2943
          $strChars = self::count_chars($str, true);
2944
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2945
            if (in_array($test3char, $strChars, true) === true) {
2946
              $maybeUTF32BE++;
2947
            }
2948
          }
2949
        }
2950
      }
2951
2952 6
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2953
        if ($maybeUTF32LE > $maybeUTF32BE) {
2954 6
          return 1;
2955
        } else {
2956
          return 2;
2957
        }
2958
      }
2959
2960
    }
2961
2962
    return false;
2963
  }
2964
2965 24
  /**
2966
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2967 24
   *
2968
   * @see    http://hsivonen.iki.fi/php-utf8/
2969 24
   *
2970 2
   * @param string $str    <p>The string to be checked.</p>
2971
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2972
   *
2973
   * @return bool
2974 23
   */
2975 2
  public static function is_utf8($str, $strict = false)
2976
  {
2977
    $str = (string)$str;
2978 23
2979
    if (!isset($str[0])) {
2980 23
      return true;
2981
    }
2982
2983
    if ($strict === true) {
2984
      if (self::is_utf16($str) !== false) {
2985
        return false;
2986
      }
2987
2988
      if (self::is_utf32($str) !== false) {
2989
        return false;
2990 1
      }
2991
    }
2992 1
2993
    if (self::pcre_utf8_support() !== true) {
2994
2995
      // If even just the first character can be matched, when the /u
2996 1
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2997
      // invalid, nothing at all will match, even if the string contains
2998
      // some valid sequences
2999
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3000
3001
    } else {
3002
3003
      $mState = 0; // cached expected number of octets after the current octet
3004
      // until the beginning of the next UTF8 character sequence
3005
      $mUcs4 = 0; // cached Unicode character
3006
      $mBytes = 1; // cached expected number of octets in the current sequence
3007 1
      $len = strlen($str);
3008
3009 1
      /** @noinspection ForeachInvariantsInspection */
3010 1
      for ($i = 0; $i < $len; $i++) {
3011 1
        $in = ord($str[$i]);
3012
        if ($mState === 0) {
3013 1
          // When mState is zero we expect either a US-ASCII character or a
3014
          // multi-octet sequence.
3015
          if (0 === (0x80 & $in)) {
3016
            // US-ASCII, pass straight through.
3017
            $mBytes = 1;
3018 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3019
            // First octet of 2 octet sequence.
3020
            $mUcs4 = $in;
3021
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3022 2
            $mState = 1;
3023
            $mBytes = 2;
3024 2
          } elseif (0xE0 === (0xF0 & $in)) {
3025
            // First octet of 3 octet sequence.
3026 2
            $mUcs4 = $in;
3027 2
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3028 2
            $mState = 2;
3029
            $mBytes = 3;
3030 2 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3031
            // First octet of 4 octet sequence.
3032
            $mUcs4 = $in;
3033
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3034
            $mState = 3;
3035
            $mBytes = 4;
3036
          } elseif (0xF8 === (0xFC & $in)) {
3037
            /* First octet of 5 octet sequence.
3038
            *
3039
            * This is illegal because the encoded codepoint must be either
3040 1
            * (a) not the shortest form or
3041
            * (b) outside the Unicode range of 0-0x10FFFF.
3042 1
            * Rather than trying to resynchronize, we will carry on until the end
3043
            * of the sequence and let the later error handling code catch it.
3044
            */
3045
            $mUcs4 = $in;
3046 1
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3047
            $mState = 4;
3048
            $mBytes = 5;
3049 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3050
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3051
            $mUcs4 = $in;
3052
            $mUcs4 = ($mUcs4 & 1) << 30;
3053
            $mState = 5;
3054
            $mBytes = 6;
3055
          } else {
3056
            /* Current octet is neither in the US-ASCII range nor a legal first
3057
             * octet of a multi-octet sequence.
3058 1
             */
3059
            return false;
3060 1
          }
3061
        } else {
3062
          // When mState is non-zero, we expect a continuation of the multi-octet
3063
          // sequence
3064
          if (0x80 === (0xC0 & $in)) {
3065
            // Legal continuation.
3066
            $shift = ($mState - 1) * 6;
3067
            $tmp = $in;
3068
            $tmp = ($tmp & 0x0000003F) << $shift;
3069
            $mUcs4 |= $tmp;
3070 16
            /**
3071
             * End of the multi-octet sequence. mUcs4 now contains the final
3072 16
             * Unicode code point to be output
3073
             */
3074 16
            if (0 === --$mState) {
3075 2
              /*
3076
              * Check for illegal sequences and code points.
3077
              */
3078 16
              // From Unicode 3.1, non-shortest form is illegal
3079 1
              if (
3080
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3081
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3082 16
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3083 4
                  (4 < $mBytes) ||
3084
                  // From Unicode 3.2, surrogate characters are illegal.
3085
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3086 15
                  // Code points outside the Unicode range are illegal.
3087 14
                  ($mUcs4 > 0x10FFFF)
3088
              ) {
3089
                return false;
3090 4
              }
3091 4
              // initialize UTF8 cache
3092 4
              $mState = 0;
3093
              $mUcs4 = 0;
3094
              $mBytes = 1;
3095 4
            }
3096 4
          } else {
3097 4
            /**
3098 4
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3099 4
             * Incomplete multi-octet sequence.
3100 4
             */
3101 4
            return false;
3102 4
          }
3103 4
        }
3104 4
      }
3105 4
3106 4
      return true;
3107 4
    }
3108 4
  }
3109 4
3110
  /**
3111 4
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3112 4
   * Decodes a JSON string
3113 4
   *
3114
   * @link http://php.net/manual/en/function.json-decode.php
3115 4
   *
3116
   * @param string $json    <p>
3117 4
   *                        The <i>json</i> string being decoded.
3118
   *                        </p>
3119
   *                        <p>
3120
   *                        This function only works with UTF-8 encoded strings.
3121
   *                        </p>
3122
   *                        <p>PHP implements a superset of
3123
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3124
   *                        only supports these values when they are nested inside an array or an object.
3125
   *                        </p>
3126
   * @param bool   $assoc   [optional] <p>
3127 13
   *                        When <b>TRUE</b>, returned objects will be converted into
3128
   *                        associative arrays.
3129 13
   *                        </p>
3130 13
   * @param int    $depth   [optional] <p>
3131
   *                        User specified recursion depth.
3132 13
   *                        </p>
3133 1
   * @param int    $options [optional] <p>
3134 1
   *                        Bitmask of JSON decode options. Currently only
3135 1
   *                        <b>JSON_BIGINT_AS_STRING</b>
3136
   *                        is supported (default is to cast large integers as floats)
3137 13
   *                        </p>
3138
   *
3139
   * @return mixed the value encoded in <i>json</i> in appropriate
3140
   * PHP type. Values true, false and
3141
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3142
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3143
   * <i>json</i> cannot be decoded or if the encoded
3144
   * data is deeper than the recursion limit.
3145
   */
3146
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
3147
  {
3148
    $json = self::filter($json);
3149
3150 18
    if (Bootup::is_php('5.4') === true) {
3151
      $json = json_decode($json, $assoc, $depth, $options);
3152 18
    } else {
3153 18
      $json = json_decode($json, $assoc, $depth);
3154
    }
3155 18
3156
    return $json;
3157 18
  }
3158
3159 2
  /**
3160
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3161 2
   * Returns the JSON representation of a value.
3162
   *
3163 1
   * @link http://php.net/manual/en/function.json-encode.php
3164 1
   *
3165
   * @param mixed $value   <p>
3166 2
   *                       The <i>value</i> being encoded. Can be any type except
3167 2
   *                       a resource.
3168
   *                       </p>
3169 18
   *                       <p>
3170 18
   *                       All string data must be UTF-8 encoded.
3171 1
   *                       </p>
3172 1
   *                       <p>PHP implements a superset of
3173
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3174 18
   *                       only supports these values when they are nested inside an array or an object.
3175 18
   *                       </p>
3176
   * @param int   $options [optional] <p>
3177 18
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3178
   *                       <b>JSON_HEX_TAG</b>,
3179
   *                       <b>JSON_HEX_AMP</b>,
3180
   *                       <b>JSON_HEX_APOS</b>,
3181
   *                       <b>JSON_NUMERIC_CHECK</b>,
3182
   *                       <b>JSON_PRETTY_PRINT</b>,
3183
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3184
   *                       <b>JSON_FORCE_OBJECT</b>,
3185
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3186
   *                       constants is described on
3187
   *                       the JSON constants page.
3188
   *                       </p>
3189
   * @param int   $depth   [optional] <p>
3190
   *                       Set the maximum depth. Must be greater than zero.
3191
   *                       </p>
3192
   *
3193
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3194
   */
3195
  public static function json_encode($value, $options = 0, $depth = 512)
3196
  {
3197
    $value = self::filter($value);
3198
3199
    if (Bootup::is_php('5.5')) {
3200
      $json = json_encode($value, $options, $depth);
3201
    } else {
3202
      $json = json_encode($value, $options);
3203
    }
3204
3205
    return $json;
3206
  }
3207
3208
  /**
3209
   * Makes string's first char lowercase.
3210
   *
3211
   * @param string $str <p>The input string</p>
3212
   *
3213
   * @return string <p>The resulting string</p>
3214
   */
3215
  public static function lcfirst($str)
3216
  {
3217
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3218
  }
3219
3220
  /**
3221
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3222
   *
3223
   * @param string $str   <p>The string to be trimmed</p>
3224
   * @param string $chars <p>Optional characters to be stripped</p>
3225
   *
3226
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3227
   */
3228 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3229
  {
3230 17
    $str = (string)$str;
3231
3232 17
    if (!isset($str[0])) {
3233 3
      return '';
3234
    }
3235
3236 16
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3237
    if ($chars === INF || !$chars) {
3238
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3239
    }
3240 16
3241
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3242
  }
3243
3244
  /**
3245
   * Returns the UTF-8 character with the maximum code point in the given data.
3246
   *
3247
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3248 16
   *
3249 16
   * @return string <p>The character with the highest code point than others.</p>
3250 15
   */
3251 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3252
  {
3253 9
    if (is_array($arg)) {
3254 9
      $arg = implode('', $arg);
3255 9
    }
3256
3257 9
    return self::chr(max(self::codepoints($arg)));
3258 1
  }
3259
3260
  /**
3261 9
   * Calculates and returns the maximum number of bytes taken by any
3262 4
   * UTF-8 encoded character in the given string.
3263
   *
3264
   * @param string $str <p>The original Unicode string.</p>
3265 9
   *
3266 5
   * @return int <p>Max byte lengths of the given chars.</p>
3267
   */
3268
  public static function max_chr_width($str)
3269 9
  {
3270
    $bytes = self::chr_size_list($str);
3271
    if (count($bytes) > 0) {
3272
      return (int)max($bytes);
3273
    } else {
3274
      return 0;
3275
    }
3276
  }
3277
3278
  /**
3279
   * Checks whether mbstring is available on the server.
3280
   *
3281
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3282
   */
3283
  public static function mbstring_loaded()
3284
  {
3285 1
    $return = extension_loaded('mbstring') ? true : false;
3286
3287
    if ($return === true) {
3288 1
      \mb_internal_encoding('UTF-8');
3289
    }
3290 1
3291 1
    return $return;
3292 1
  }
3293
3294
  /**
3295 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3296
   *
3297
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3298
   *
3299
   * @return string <p>The character with the lowest code point than others.</p>
3300
   */
3301 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3302
  {
3303 41
    if (is_array($arg)) {
3304
      $arg = implode('', $arg);
3305
    }
3306 41
3307
    return self::chr(min(self::codepoints($arg)));
3308
  }
3309
3310
  /**
3311
   * alias for "UTF8::normalize_encoding()"
3312
   *
3313
   * @see UTF8::normalize_encoding()
3314
   *
3315
   * @param string $encoding
3316
   *
3317 1
   * @return string
3318
   *
3319 1
   * @deprecated
3320 1
   */
3321
  public static function normalizeEncoding($encoding)
3322
  {
3323 1
    return self::normalize_encoding($encoding);
3324 1
  }
3325 1
3326
  /**
3327
   * Normalize the encoding-"name" input.
3328 1
   *
3329
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3330
   *
3331 1
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3332
   */
3333
  public static function normalize_encoding($encoding)
3334
  {
3335 1
    static $staticNormalizeEncodingCache = array();
3336 1
3337 1
    if (!$encoding) {
3338
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3339
    }
3340 1
3341
    if ('UTF-8' === $encoding) {
3342
      return $encoding;
3343 1
    }
3344
3345
    if (in_array($encoding, self::$iconvEncoding, true)) {
3346
      return $encoding;
3347 1
    }
3348
3349 1
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3350 1
      return $staticNormalizeEncodingCache[$encoding];
3351 1
    }
3352 1
3353 1
    $encodingOrig = $encoding;
3354
    $encoding = strtoupper($encoding);
3355
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3356
3357
    $equivalences = array(
3358
        'ISO88591'    => 'ISO-8859-1',
3359
        'ISO8859'     => 'ISO-8859-1',
3360
        'ISO'         => 'ISO-8859-1',
3361
        'LATIN1'      => 'ISO-8859-1',
3362
        'LATIN'       => 'ISO-8859-1',
3363
        'WIN1252'     => 'ISO-8859-1',
3364
        'WINDOWS1252' => 'ISO-8859-1',
3365 5
        'UTF16'       => 'UTF-16',
3366
        'UTF32'       => 'UTF-32',
3367 5
        'UTF8'        => 'UTF-8',
3368
        'UTF'         => 'UTF-8',
3369
        'UTF7'        => 'UTF-7',
3370
        '8BIT'        => 'CP850',
3371
        'BINARY'      => 'CP850',
3372
    );
3373
3374
    if (!empty($equivalences[$encodingUpperHelper])) {
3375
      $encoding = $equivalences[$encodingUpperHelper];
3376
    }
3377 10
3378
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3379 10
3380 10
    return $encoding;
3381 5
  }
3382 5
3383 10
  /**
3384
   * Normalize some MS Word special characters.
3385 10
   *
3386
   * @param string $str <p>The string to be normalized.</p>
3387
   *
3388
   * @return string
3389
   */
3390 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3391
  {
3392
    // init
3393
    $str = (string)$str;
3394
3395
    if (!isset($str[0])) {
3396 1
      return '';
3397
    }
3398 1
3399 1
    static $UTF8_MSWORD_KEYS_CACHE = null;
3400 1
    static $UTF8_MSWORD_VALUES_CACHE = null;
3401
3402 1
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3403 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3404 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3405 1
    }
3406 1
3407
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3408 1
  }
3409
3410
  /**
3411
   * Normalize the whitespace.
3412
   *
3413
   * @param string $str                     <p>The string to be normalized.</p>
3414
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3415
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3416
   *                                        bidirectional text chars.</p>
3417
   *
3418
   * @return string
3419
   */
3420
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3421
  {
3422
    // init
3423
    $str = (string)$str;
3424 45
3425
    if (!isset($str[0])) {
3426
      return '';
3427 45
    }
3428
3429
    static $WHITESPACE_CACHE = array();
3430
    $cacheKey = (int)$keepNonBreakingSpace;
3431 45
3432 45
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3433 45
3434 45
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3435
3436 45
      if ($keepNonBreakingSpace === true) {
3437
        /** @noinspection OffsetOperationsInspection */
3438
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3439 45
      }
3440 45
3441
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3442 45
    }
3443
3444
    if ($keepBidiUnicodeControls === false) {
3445
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3446
3447
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3448
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3449
      }
3450
3451
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3452
    }
3453 45
3454
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3455 45
  }
3456
3457 45
  /**
3458 45
   * Format a number with grouped thousands.
3459 45
   *
3460
   * @param float  $number
3461 45
   * @param int    $decimals
3462 45
   * @param string $dec_point
3463 45
   * @param string $thousands_sep
3464
   *
3465 45
   * @return string
3466
   *    *
3467
   * @deprecated Because this has nothing to do with UTF8. :/
3468
   */
3469
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3470
  {
3471
    $thousands_sep = (string)$thousands_sep;
3472
    $dec_point = (string)$dec_point;
3473
3474
    if (
3475
        isset($thousands_sep[1], $dec_point[1])
3476 23
        &&
3477
        Bootup::is_php('5.4') === true
3478 23
    ) {
3479
      return str_replace(
3480 23
          array(
3481 5
              '.',
3482
              ',',
3483
          ),
3484
          array(
3485 19
              $dec_point,
3486 3
              $thousands_sep,
3487
          ),
3488
          number_format($number, $decimals, '.', ',')
3489 18
      );
3490
    }
3491 18
3492
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3493
  }
3494
3495
  /**
3496
   * Calculates Unicode code point of the given UTF-8 encoded character.
3497
   *
3498
   * INFO: opposite to UTF8::chr()
3499
   *
3500
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3501
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3502 52
   *
3503
   * @return int <p>
3504 52
   *             Unicode code point of the given character,<br />
3505
   *             0 on invalid UTF-8 byte sequence.
3506 52
   *             </p>
3507
   */
3508 52
  public static function ord($chr, $encoding = 'UTF-8')
3509 40
  {
3510
    if (!$chr && $chr !== '0') {
3511
      return 0;
3512 18
    }
3513
3514
    if ($encoding !== 'UTF-8') {
3515 18
      $encoding = self::normalize_encoding($encoding);
3516 17
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3517
    }
3518 17
3519 17
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3520 17
      self::checkForSupport();
3521 2
    }
3522 2
3523
    if (self::$support['intlChar'] === true) {
3524
      $tmpReturn = \IntlChar::ord($chr);
3525 18
      if ($tmpReturn) {
3526
        return $tmpReturn;
3527 18
      }
3528 18
    }
3529 18
3530
    // use static cache, if there is no support for "IntlChar"
3531 18
    static $cache = array();
3532 18
    if (isset($cache[$chr]) === true) {
3533 18
      return $cache[$chr];
3534
    }
3535
3536
    $chr_orig = $chr;
3537 18
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3538
    $chr = unpack('C*', substr($chr, 0, 4));
3539 18
    $code = $chr ? $chr[1] : 0;
3540
3541
    if (0xF0 <= $code && isset($chr[4])) {
3542
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3543
    }
3544
3545
    if (0xE0 <= $code && isset($chr[3])) {
3546
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3547
    }
3548
3549
    if (0xC0 <= $code && isset($chr[2])) {
3550
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3551
    }
3552
3553
    return $cache[$chr_orig] = $code;
3554
  }
3555
3556
  /**
3557
   * Parses the string into an array (into the the second parameter).
3558
   *
3559
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3560 1
   *          if the second parameter is not set!
3561
   *
3562 1
   * @link http://php.net/manual/en/function.parse-str.php
3563 1
   *
3564
   * @param string  $str       <p>The input string.</p>
3565
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3566
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3567
   *
3568 1
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3569 1
   */
3570 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3571 1
  {
3572
    if ($cleanUtf8 === true) {
3573
      $str = self::clean($str);
3574 1
    }
3575
3576
    $return = \mb_parse_str($str, $result);
3577
    if ($return === false || empty($result)) {
3578
      return false;
3579
    }
3580
3581
    return true;
3582
  }
3583
3584
  /**
3585
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3586 36
   *
3587
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3588 36
   */
3589
  public static function pcre_utf8_support()
3590 36
  {
3591 2
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3592
    return (bool)@preg_match('//u', '');
3593
  }
3594
3595 36
  /**
3596 36
   * Create an array containing a range of UTF-8 characters.
3597
   *
3598 36
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3599
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3600
   *
3601
   * @return array
3602 36
   */
3603
  public static function range($var1, $var2)
3604 36
  {
3605 6
    if (!$var1 || !$var2) {
3606 6
      return array();
3607
    }
3608 36
3609 36 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3610 36
      $start = (int)$var1;
3611 36
    } elseif (ctype_xdigit($var1)) {
3612 36
      $start = (int)self::hex_to_int($var1);
3613
    } else {
3614 36
      $start = self::ord($var1);
3615
    }
3616
3617
    if (!$start) {
3618
      return array();
3619
    }
3620
3621 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3622
      $end = (int)$var2;
3623
    } elseif (ctype_xdigit($var2)) {
3624
      $end = (int)self::hex_to_int($var2);
3625
    } else {
3626
      $end = self::ord($var2);
3627
    }
3628
3629
    if (!$end) {
3630
      return array();
3631
    }
3632
3633
    return array_map(
3634
        array(
3635
            '\\voku\\helper\\UTF8',
3636
            'chr',
3637
        ),
3638
        range($start, $end)
3639
    );
3640
  }
3641
3642
  /**
3643
   * Multi decode html entity & fix urlencoded-win1252-chars.
3644
   *
3645
   * e.g:
3646 36
   * 'test+test'                     => 'test+test'
3647 5
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3648
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3649 5
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3650 5
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3651
   * 'Düsseldorf'                   => 'Düsseldorf'
3652
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3653 36
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3654
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3655
   *
3656
   * @param string $str          <p>The input string.</p>
3657 36
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3658
   *
3659
   * @return string
3660
   */
3661 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3662
  {
3663
    $str = (string)$str;
3664
3665
    if (!isset($str[0])) {
3666
      return '';
3667
    }
3668
3669
    $pattern = '/%u([0-9a-f]{3,4})/i';
3670 12
    if (preg_match($pattern, $str)) {
3671
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3672
    }
3673
3674
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3675
3676 12
    do {
3677 2
      $str_compare = $str;
3678 1
3679 2
      $str = self::fix_simple_utf8(
3680 1
          rawurldecode(
3681 2
              self::html_entity_decode(
3682
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3683 2
                  $flags
3684
              )
3685
          )
3686 2
      );
3687
3688
    } while ($multi_decode === true && $str_compare !== $str);
3689
3690
    return (string)$str;
3691
  }
3692 12
3693 3
  /**
3694
   * alias for "UTF8::remove_bom()"
3695
   *
3696
   * @see UTF8::remove_bom()
3697
   *
3698
   * @param string $str
3699
   *
3700 12
   * @return string
3701 9
   *
3702
   * @deprecated
3703
   */
3704
  public static function removeBOM($str)
3705
  {
3706
    return self::remove_bom($str);
3707
  }
3708
3709
  /**
3710 6
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3711 6
   *
3712 6
   * @param string $str <p>The input string.</p>
3713 6
   *
3714 6
   * @return string <p>String without UTF-BOM</p>
3715 6
   */
3716 6
  public static function remove_bom($str)
3717 6
  {
3718 6
    foreach (self::$bom as $bomString => $bomByteLength) {
3719 6
      if (0 === strpos($str, $bomString)) {
3720 6
        $str = substr($str, $bomByteLength);
3721 6
      }
3722 6
    }
3723 6
3724 6
    return $str;
3725 6
  }
3726 6
3727 6
  /**
3728 6
   * Removes duplicate occurrences of a string in another string.
3729 6
   *
3730 6
   * @param string          $str  <p>The base string.</p>
3731
   * @param string|string[] $what <p>String to search for in the base string.</p>
3732 6
   *
3733 6
   * @return string <p>The result string with removed duplicates.</p>
3734 6
   */
3735
  public static function remove_duplicates($str, $what = ' ')
3736
  {
3737
    if (is_string($what)) {
3738
      $what = array($what);
3739
    }
3740
3741
    if (is_array($what)) {
3742
      /** @noinspection ForeachSourceInspection */
3743
      foreach ($what as $item) {
3744
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3745
      }
3746
    }
3747
3748
    return $str;
3749
  }
3750
3751
  /**
3752
   * Remove invisible characters from a string.
3753
   *
3754
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3755
   *
3756
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3757
   *
3758
   * @param string $str
3759
   * @param bool   $url_encoded
3760
   * @param string $replacement
3761
   *
3762
   * @return string
3763
   */
3764
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3765
  {
3766
    // init
3767
    $non_displayables = array();
3768
3769
    // every control character except newline (dec 10),
3770
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3771
    if ($url_encoded) {
3772
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3773
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3774
    }
3775
3776
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3777
3778 14
    do {
3779
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3780 14
    } while ($count !== 0);
3781
3782
    return $str;
3783 14
  }
3784 14
3785 1
  /**
3786 1
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3787 13
   *
3788
   * @param string $str                <p>The input string</p>
3789 14
   * @param string $replacementChar    <p>The replacement character.</p>
3790
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3791 14
   *
3792 14
   * @return string
3793
   */
3794 14
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3795
  {
3796
    $str = (string)$str;
3797
3798
    if (!isset($str[0])) {
3799
      return '';
3800
    }
3801
3802
    if ($processInvalidUtf8 === true) {
3803
      $replacementCharHelper = $replacementChar;
3804
      if ($replacementChar === '') {
3805
        $replacementCharHelper = 'none';
3806 1
      }
3807
3808 1
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3809
        self::checkForSupport();
3810 1
      }
3811
3812
      if (self::$support['mbstring'] === false) {
3813
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3814 1
      }
3815
3816 1
      $save = \mb_substitute_character();
3817
      \mb_substitute_character($replacementCharHelper);
3818
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3819
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3820 1
      \mb_substitute_character($save);
3821 1
    }
3822
3823
    return str_replace(
3824 1
        array(
3825 1
            "\xEF\xBF\xBD",
3826 1
            '�',
3827 1
        ),
3828
        array(
3829 1
            $replacementChar,
3830
            $replacementChar,
3831
        ),
3832 1
        $str
3833
    );
3834
  }
3835 1
3836
  /**
3837
   * Strip whitespace or other characters from end of a UTF-8 string.
3838
   *
3839
   * @param string $str   <p>The string to be trimmed.</p>
3840
   * @param string $chars <p>Optional characters to be stripped.</p>
3841
   *
3842
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3843
   */
3844 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3845
  {
3846
    $str = (string)$str;
3847
3848
    if (!isset($str[0])) {
3849
      return '';
3850
    }
3851 2
3852
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3853 2
    if ($chars === INF || !$chars) {
3854
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3855
    }
3856 2
3857 2
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3858
  }
3859 2
3860
  /**
3861 2
   * rxClass
3862 2
   *
3863
   * @param string $s
3864 2
   * @param string $class
3865
   *
3866
   * @return string
3867 2
   */
3868 2
  private static function rxClass($s, $class = '')
3869 2
  {
3870 2
    static $rxClassCache = array();
3871 2
3872
    $cacheKey = $s . $class;
3873 2
3874 2
    if (isset($rxClassCache[$cacheKey])) {
3875 2
      return $rxClassCache[$cacheKey];
3876 2
    }
3877 2
3878 2
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3879
    $class = array($class);
3880 2
3881 2
    /** @noinspection SuspiciousLoopInspection */
3882 2
    foreach (self::str_split($s) as $s) {
3883 2
      if ('-' === $s) {
3884 2
        $class[0] = '-' . $class[0];
3885 2
      } elseif (!isset($s[2])) {
3886
        $class[0] .= preg_quote($s, '/');
3887 2
      } elseif (1 === self::strlen($s)) {
3888
        $class[0] .= $s;
3889
      } else {
3890 2
        $class[] = $s;
3891
      }
3892
    }
3893
3894
    if ($class[0]) {
3895
      $class[0] = '[' . $class[0] . ']';
3896
    }
3897
3898
    if (1 === count($class)) {
3899
      $return = $class[0];
3900
    } else {
3901
      $return = '(?:' . implode('|', $class) . ')';
3902
    }
3903
3904
    $rxClassCache[$cacheKey] = $return;
3905
3906
    return $return;
3907
  }
3908
3909
  /**
3910
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3911 1
   */
3912
  public static function showSupport()
3913 1
  {
3914
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3915 1
      self::checkForSupport();
3916
    }
3917
3918
    foreach (self::$support as $utf8Support) {
3919
      echo $utf8Support . "\n<br>";
3920
    }
3921
  }
3922
3923
  /**
3924
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3925
   *
3926
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3927
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3928
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
3929
   *
3930
   * @return string <p>The HTML numbered entity.</p>
3931
   */
3932
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
3933
  {
3934
    // init
3935
    $char = (string)$char;
3936
3937
    if (!isset($char[0])) {
3938
      return '';
3939
    }
3940
3941
    if (
3942
        $keepAsciiChars === true
3943
        &&
3944
        self::is_ascii($char) === true
3945
    ) {
3946
      return $char;
3947 12
    }
3948
3949 12
    if ($encoding !== 'UTF-8') {
3950
      $encoding = self::normalize_encoding($encoding);
3951
    }
3952
3953
    return '&#' . self::ord($char, $encoding) . ';';
3954
  }
3955
3956
  /**
3957
   * Convert a string to an array of Unicode characters.
3958
   *
3959 1
   * @param string  $str       <p>The string to split into array.</p>
3960
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3961 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3962
   *
3963 1
   * @return string[] <p>An array containing chunks of the string.</p>
3964
   */
3965 1
  public static function split($str, $length = 1, $cleanUtf8 = false)
3966
  {
3967
    $str = (string)$str;
3968
3969
    if (!isset($str[0])) {
3970
      return array();
3971
    }
3972
3973
    // init
3974
    $str = (string)$str;
3975
    $ret = array();
3976
3977 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3978
      self::checkForSupport();
3979 1
    }
3980
3981 1
    if (self::$support['pcre_utf8'] === true) {
3982 1
3983 1
      if ($cleanUtf8 === true) {
3984
        $str = self::clean($str);
3985 1
      }
3986 1
3987 1
      preg_match_all('/./us', $str, $retArray);
3988 1
      if (isset($retArray[0])) {
3989
        $ret = $retArray[0];
3990
      }
3991 1
      unset($retArray);
3992
3993
    } else {
3994
3995
      // fallback
3996
3997
      $len = strlen($str);
3998
3999
      /** @noinspection ForeachInvariantsInspection */
4000
      for ($i = 0; $i < $len; $i++) {
4001
        if (($str[$i] & "\x80") === "\x00") {
4002 21
          $ret[] = $str[$i];
4003
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
4004
          if (($str[$i + 1] & "\xC0") === "\x80") {
4005 21
            $ret[] = $str[$i] . $str[$i + 1];
4006 21
4007
            $i++;
4008 21
          }
4009 1 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4010
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
4011
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4012 20
4013
            $i += 2;
4014
          }
4015
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
4016 20 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4017 20
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4018
4019 20
            $i += 3;
4020 20
          }
4021
        }
4022
      }
4023 1
    }
4024 1
4025
    if ($length > 1) {
4026
      $ret = array_chunk($ret, $length);
4027 1
4028 1
      return array_map(
4029 1
          function ($item) {
4030 1
            return implode('', $item);
4031 1
          }, $ret
4032
      );
4033 1
    }
4034
4035 1
    /** @noinspection OffsetOperationsInspection */
4036
    if (isset($ret[0]) && $ret[0] === '') {
4037
      return array();
4038
    }
4039
4040
    return $ret;
4041
  }
4042
4043
  /**
4044
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4045 1
   *
4046
   * @param string $str <p>The input string.</p>
4047 1
   *
4048
   * @return false|string <p>
4049 1
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4050
   *                      otherwise it will return false.
4051 1
   *                      </p>
4052
   */
4053
  public static function str_detect_encoding($str)
4054
  {
4055
    //
4056
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4057
    //
4058
4059
    if (self::is_binary($str)) {
4060
      if (self::is_utf16($str) === 1) {
4061
        return 'UTF-16LE';
4062
      } elseif (self::is_utf16($str) === 2) {
4063
        return 'UTF-16BE';
4064
      } elseif (self::is_utf32($str) === 1) {
4065 7
        return 'UTF-32LE';
4066
      } elseif (self::is_utf32($str) === 2) {
4067 7
        return 'UTF-32BE';
4068
      }
4069
    }
4070
4071
    //
4072
    // 2.) simple check for ASCII chars
4073
    //
4074
4075
    if (self::is_ascii($str) === true) {
4076
      return 'ASCII';
4077
    }
4078
4079
    //
4080
    // 3.) simple check for UTF-8 chars
4081
    //
4082
4083 1
    if (self::is_utf8($str) === true) {
4084
      return 'UTF-8';
4085 1
    }
4086 1
4087
    //
4088 1
    // 4.) check via "\mb_detect_encoding()"
4089
    //
4090 1
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4091
4092 1
    $detectOrder = array(
4093 1
        'ISO-8859-1',
4094 1
        'ISO-8859-2',
4095 1
        'ISO-8859-3',
4096
        'ISO-8859-4',
4097 1
        'ISO-8859-5',
4098
        'ISO-8859-6',
4099 1
        'ISO-8859-7',
4100 1
        'ISO-8859-8',
4101 1
        'ISO-8859-9',
4102 1
        'ISO-8859-10',
4103 1
        'ISO-8859-13',
4104 1
        'ISO-8859-14',
4105
        'ISO-8859-15',
4106 1
        'ISO-8859-16',
4107
        'WINDOWS-1251',
4108 1
        'WINDOWS-1252',
4109
        'WINDOWS-1254',
4110
        'ISO-2022-JP',
4111
        'JIS',
4112 1
        'EUC-JP',
4113
    );
4114
4115
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4116
    if ($encoding) {
4117
      return $encoding;
4118
    }
4119
4120
    //
4121
    // 5.) check via "iconv()"
4122
    //
4123
4124
    $md5 = md5($str);
4125
    foreach (self::$iconvEncoding as $encodingTmp) {
4126
      # INFO: //IGNORE and //TRANSLIT still throw notice
4127
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4128
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4129 9
        return $encodingTmp;
4130
      }
4131 9
    }
4132
4133
    return false;
4134
  }
4135
4136
  /**
4137
   * Check if the string ends with the given substring.
4138
   *
4139
   * @param string $haystack <p>The string to search in.</p>
4140
   * @param string $needle   <p>The substring to search for.</p>
4141
   *
4142
   * @return bool
4143
   */
4144 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4145
  {
4146
    $haystack = (string)$haystack;
4147 1
    $needle = (string)$needle;
4148
4149 1
    if (!isset($haystack[0], $needle[0])) {
4150
      return false;
4151
    }
4152
4153
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4154
      return true;
4155
    }
4156
4157
    return false;
4158
  }
4159
4160
  /**
4161
   * Check if the string ends with the given substring, case insensitive.
4162
   *
4163
   * @param string $haystack <p>The string to search in.</p>
4164 12
   * @param string $needle   <p>The substring to search for.</p>
4165
   *
4166 12
   * @return bool
4167 11
   */
4168 11 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4169 12
  {
4170
    $haystack = (string)$haystack;
4171
    $needle = (string)$needle;
4172
4173
    if (!isset($haystack[0], $needle[0])) {
4174
      return false;
4175
    }
4176
4177
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4178
      return true;
4179
    }
4180
4181
    return false;
4182 9
  }
4183
4184 9
  /**
4185 1
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4186
   *
4187
   * @link  http://php.net/manual/en/function.str-ireplace.php
4188 8
   *
4189 2
   * @param mixed $search  <p>
4190 2
   *                       Every replacement with search array is
4191
   *                       performed on the result of previous replacement.
4192 8
   *                       </p>
4193 8
   * @param mixed $replace <p>
4194 1
   *                       </p>
4195
   * @param mixed $subject <p>
4196
   *                       If subject is an array, then the search and
4197 7
   *                       replace is performed with every entry of
4198
   *                       subject, and the return value is an array as
4199 7
   *                       well.
4200
   *                       </p>
4201
   * @param int   $count   [optional] <p>
4202 1
   *                       The number of matched and replaced needles will
4203
   *                       be returned in count which is passed by
4204
   *                       reference.
4205
   *                       </p>
4206
   *
4207
   * @return mixed <p>A string or an array of replacements.</p>
4208
   */
4209
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4210
  {
4211
    $search = (array)$search;
4212
4213
    /** @noinspection AlterInForeachInspection */
4214
    foreach ($search as &$s) {
4215
      if ('' === $s .= '') {
4216
        $s = '/^(?<=.)$/';
4217
      } else {
4218 1
        $s = '/' . preg_quote($s, '/') . '/ui';
4219
      }
4220 1
    }
4221
4222
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4223
    $count = $replace; // used as reference parameter
4224
4225
    return $subject;
4226
  }
4227
4228
  /**
4229
   * Check if the string starts with the given substring, case insensitive.
4230
   *
4231
   * @param string $haystack <p>The string to search in.</p>
4232 2
   * @param string $needle   <p>The substring to search for.</p>
4233
   *
4234 2
   * @return bool
4235 2
   */
4236 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4237 2
  {
4238 2
    $haystack = (string)$haystack;
4239 2
    $needle = (string)$needle;
4240
4241 2
    if (!isset($haystack[0], $needle[0])) {
4242 2
      return false;
4243
    }
4244
4245
    if (self::stripos($haystack, $needle) === 0) {
4246
      return true;
4247
    }
4248
4249
    return false;
4250
  }
4251
4252 3
  /**
4253
   * Limit the number of characters in a string, but also after the next word.
4254 3
   *
4255 3
   * @param string $str
4256 3
   * @param int    $length
4257
   * @param string $strAddOn
4258 3
   *
4259
   * @return string
4260 3
   */
4261
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4262
  {
4263
    $str = (string)$str;
4264
4265
    if (!isset($str[0])) {
4266
      return '';
4267
    }
4268
4269
    $length = (int)$length;
4270
4271
    if (self::strlen($str) <= $length) {
4272
      return $str;
4273
    }
4274
4275
    if (self::substr($str, $length - 1, 1) === ' ') {
4276
      return self::substr($str, 0, $length - 1) . $strAddOn;
4277
    }
4278
4279
    $str = self::substr($str, 0, $length);
4280
    $array = explode(' ', $str);
4281
    array_pop($array);
4282 2
    $new_str = implode(' ', $array);
4283
4284
    if ($new_str === '') {
4285 2
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4286
    } else {
4287 2
      $str = $new_str . $strAddOn;
4288
    }
4289
4290
    return $str;
4291
  }
4292
4293
  /**
4294
   * Pad a UTF-8 string to given length with another string.
4295
   *
4296
   * @param string $str        <p>The input string.</p>
4297
   * @param int    $pad_length <p>The length of return string.</p>
4298
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4299
   * @param int    $pad_type   [optional] <p>
4300
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4301
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4302
   *                           </p>
4303
   *
4304
   * @return string <strong>Returns the padded string</strong>
4305
   */
4306
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4307
  {
4308
    $str_length = self::strlen($str);
4309
4310
    if (
4311
        is_int($pad_length) === true
4312
        &&
4313
        $pad_length > 0
4314 8
        &&
4315
        $pad_length >= $str_length
4316 8
    ) {
4317 8
      $ps_length = self::strlen($pad_string);
4318
4319 8
      $diff = $pad_length - $str_length;
4320 3
4321
      switch ($pad_type) {
4322 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4323 7
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4324 1
          $pre = self::substr($pre, 0, $diff);
4325 1
          $post = '';
4326 1
          break;
4327
4328
        case STR_PAD_BOTH:
4329
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4330 7
          $pre = self::substr($pre, 0, (int)$diff / 2);
4331 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4332 7
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4333 7
          break;
4334 7
4335
        case STR_PAD_RIGHT:
4336 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4337
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4338 7
          $post = self::substr($post, 0, $diff);
4339
          $pre = '';
4340
      }
4341
4342
      return $pre . $str . $post;
4343
    }
4344
4345
    return $str;
4346
  }
4347
4348
  /**
4349
   * Repeat a string.
4350
   *
4351
   * @param string $str        <p>
4352
   *                           The string to be repeated.
4353
   *                           </p>
4354
   * @param int    $multiplier <p>
4355 8
   *                           Number of time the input string should be
4356
   *                           repeated.
4357 8
   *                           </p>
4358 2
   *                           <p>
4359
   *                           multiplier has to be greater than or equal to 0.
4360
   *                           If the multiplier is set to 0, the function
4361 6
   *                           will return an empty string.
4362
   *                           </p>
4363
   *
4364
   * @return string <p>The repeated string.</p>
4365 6
   */
4366
  public static function str_repeat($str, $multiplier)
4367
  {
4368
    $str = self::filter($str);
4369
4370
    return str_repeat($str, $multiplier);
4371
  }
4372 6
4373
  /**
4374
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4375
   *
4376
   * Replace all occurrences of the search string with the replacement string
4377
   *
4378
   * @link http://php.net/manual/en/function.str-replace.php
4379
   *
4380
   * @param mixed $search  <p>
4381
   *                       The value being searched for, otherwise known as the needle.
4382
   *                       An array may be used to designate multiple needles.
4383
   *                       </p>
4384
   * @param mixed $replace <p>
4385
   *                       The replacement value that replaces found search
4386
   *                       values. An array may be used to designate multiple replacements.
4387 62
   *                       </p>
4388
   * @param mixed $subject <p>
4389 62
   *                       The string or array being searched and replaced on,
4390
   *                       otherwise known as the haystack.
4391 62
   *                       </p>
4392 4
   *                       <p>
4393
   *                       If subject is an array, then the search and
4394
   *                       replace is performed with every entry of
4395
   *                       subject, and the return value is an array as
4396
   *                       well.
4397 61
   *                       </p>
4398 2
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4399 61
   *
4400 60
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4401 60
   */
4402 2
  public static function str_replace($search, $replace, $subject, &$count = null)
4403
  {
4404
    return str_replace($search, $replace, $subject, $count);
4405
  }
4406 61
4407 61
  /**
4408 1
   * Replace the first "$search"-term with the "$replace"-term.
4409
   *
4410
   * @param string $search
4411 61
   * @param string $replace
4412 2
   * @param string $subject
4413 2
   *
4414
   * @return string
4415 61
   */
4416
  public static function str_replace_first($search, $replace, $subject)
4417
  {
4418
    $pos = self::strpos($subject, $search);
4419
4420
    if ($pos !== false) {
4421
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4422
    }
4423
4424
    return $subject;
4425
  }
4426
4427
  /**
4428
   * Shuffles all the characters in the string.
4429
   *
4430 1
   * @param string $str <p>The input string</p>
4431
   *
4432 1
   * @return string <p>The shuffled string.</p>
4433
   */
4434
  public static function str_shuffle($str)
4435
  {
4436
    $array = self::split($str);
4437
4438
    shuffle($array);
4439
4440
    return implode('', $array);
4441
  }
4442
4443
  /**
4444
   * Sort all characters according to code points.
4445
   *
4446
   * @param string $str    <p>A UTF-8 string.</p>
4447
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4448
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4449 2
   *
4450
   * @return string <p>String of sorted characters.</p>
4451 2
   */
4452
  public static function str_sort($str, $unique = false, $desc = false)
4453
  {
4454
    $array = self::codepoints($str);
4455
4456
    if ($unique) {
4457
      $array = array_flip(array_flip($array));
4458
    }
4459
4460
    if ($desc) {
4461
      arsort($array);
4462
    } else {
4463
      asort($array);
4464
    }
4465
4466
    return self::string($array);
4467 1
  }
4468
4469 1
  /**
4470
   * Split a string into an array.
4471
   *
4472
   * @param string $str
4473
   * @param int    $len
4474
   *
4475
   * @return array
4476
   */
4477
  public static function str_split($str, $len = 1)
4478
  {
4479
    // init
4480
    $len = (int)$len;
4481
    $str = (string)$str;
4482
4483
    if (!isset($str[0])) {
4484
      return array();
4485 2
    }
4486
4487 2
    if ($len < 1) {
4488 2
      return str_split($str, $len);
4489
    }
4490 2
4491
    /** @noinspection PhpInternalEntityUsedInspection */
4492
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4493
    $a = $a[0];
4494
4495
    if ($len === 1) {
4496
      return $a;
4497
    }
4498
4499
    $arrayOutput = array();
4500
    $p = -1;
4501
4502
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4503 1
    foreach ($a as $l => $a) {
4504
      if ($l % $len) {
4505 1
        $arrayOutput[$p] .= $a;
4506 1
      } else {
4507
        $arrayOutput[++$p] = $a;
4508 1
      }
4509 1
    }
4510
4511
    return $arrayOutput;
4512 1
  }
4513 1
4514
  /**
4515 1
   * Check if the string starts with the given substring.
4516
   *
4517
   * @param string $haystack <p>The string to search in.</p>
4518
   * @param string $needle   <p>The substring to search for.</p>
4519
   *
4520
   * @return bool
4521
   */
4522 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4523
  {
4524
    $haystack = (string)$haystack;
4525
    $needle = (string)$needle;
4526
4527
    if (!isset($haystack[0], $needle[0])) {
4528
      return false;
4529
    }
4530
4531
    if (self::strpos($haystack, $needle) === 0) {
4532
      return true;
4533
    }
4534
4535 15
    return false;
4536
  }
4537 15
4538 15
  /**
4539
   * Get a binary representation of a specific string.
4540 15
   *
4541 2
   * @param string $str <p>The input string.</p>
4542
   *
4543
   * @return string
4544
   */
4545 14
  public static function str_to_binary($str)
4546
  {
4547
    $str = (string)$str;
4548
4549 14
    $value = unpack('H*', $str);
4550
4551
    return base_convert($value[1], 16, 2);
4552
  }
4553 14
4554
  /**
4555
   * Convert a string into an array of words.
4556 2
   *
4557 2
   * @param string $str
4558 2
   * @param string $charlist
4559
   *
4560 14
   * @return array
4561
   */
4562
  public static function str_to_words($str, $charlist = '')
4563
  {
4564
    $str = (string)$str;
4565
4566 14
    if (!isset($str[0])) {
4567 2
      return array('');
4568 14
    }
4569 14
4570 14
    $charlist = self::rxClass($charlist, '\pL');
4571 1
4572
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4573
  }
4574 14
4575 14
  /**
4576
   * alias for "UTF8::to_ascii()"
4577
   *
4578
   * @see UTF8::to_ascii()
4579
   *
4580
   * @param string $str
4581
   * @param string $unknown
4582
   * @param bool   $strict
4583
   *
4584
   * @return string
4585
   */
4586
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4587
  {
4588
    return self::to_ascii($str, $unknown, $strict);
4589
  }
4590
4591
  /**
4592
   * Counts number of words in the UTF-8 string.
4593
   *
4594
   * @param string $str      <p>The input string.</p>
4595
   * @param int    $format   [optional] <p>
4596
   *                         <strong>0</strong> => return a number of words (default)<br />
4597
   *                         <strong>1</strong> => return an array of words<br />
4598
   *                         <strong>2</strong> => return an array of words with word-offset as key
4599
   *                         </p>
4600
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4601
   *
4602
   * @return array|int <p>The number of words in the string</p>
4603
   */
4604
  public static function str_word_count($str, $format = 0, $charlist = '')
4605
  {
4606
    $strParts = self::str_to_words($str, $charlist);
4607
4608
    $len = count($strParts);
4609
4610
    if ($format === 1) {
4611
4612
      $numberOfWords = array();
4613
      for ($i = 1; $i < $len; $i += 2) {
4614
        $numberOfWords[] = $strParts[$i];
4615
      }
4616
4617
    } elseif ($format === 2) {
4618
4619
      $numberOfWords = array();
4620 1
      $offset = self::strlen($strParts[0]);
4621
      for ($i = 1; $i < $len; $i += 2) {
4622 1
        $numberOfWords[$offset] = $strParts[$i];
4623 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4624 1
      }
4625
4626 1
    } else {
4627
4628
      $numberOfWords = ($len - 1) / 2;
4629
4630
    }
4631
4632
    return $numberOfWords;
4633 1
  }
4634
4635
  /**
4636
   * Case-insensitive string comparison.
4637
   *
4638
   * INFO: Case-insensitive version of UTF8::strcmp()
4639
   *
4640
   * @param string $str1
4641
   * @param string $str2
4642
   *
4643 4
   * @return int <p>
4644
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4645 4
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4646
   *             <strong>0</strong> if they are equal.
4647 4
   *             </p>
4648 2
   */
4649
  public static function strcasecmp($str1, $str2)
4650
  {
4651 3
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4652
  }
4653
4654
  /**
4655
   * alias for "UTF8::strstr()"
4656
   *
4657
   * @see UTF8::strstr()
4658
   *
4659
   * @param string  $haystack
4660
   * @param string  $needle
4661
   * @param bool    $before_needle
4662
   * @param string  $encoding
4663
   * @param boolean $cleanUtf8
4664
   *
4665
   * @return string|false
4666
   */
4667
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4668
  {
4669
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4670
  }
4671
4672
  /**
4673
   * Case-sensitive string comparison.
4674
   *
4675
   * @param string $str1
4676
   * @param string $str2
4677 1
   *
4678
   * @return int  <p>
4679 1
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4680 1
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4681 1
   *              <strong>0</strong> if they are equal.
4682
   *              </p>
4683 1
   */
4684
  public static function strcmp($str1, $str2)
4685
  {
4686
    /** @noinspection PhpUndefinedClassInspection */
4687
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4688
        \Normalizer::normalize($str1, \Normalizer::NFD),
4689
        \Normalizer::normalize($str2, \Normalizer::NFD)
4690 1
    );
4691
  }
4692
4693
  /**
4694
   * Find length of initial segment not matching mask.
4695
   *
4696
   * @param string $str
4697
   * @param string $charList
4698
   * @param int    $offset
4699
   * @param int    $length
4700
   *
4701
   * @return int|null
4702
   */
4703
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4704
  {
4705
    if ('' === $charList .= '') {
4706
      return null;
4707 1
    }
4708
4709 1
    if ($offset || 2147483647 !== $length) {
4710
      $str = (string)self::substr($str, $offset, $length);
4711
    }
4712
4713
    $str = (string)$str;
4714
    if (!isset($str[0])) {
4715
      return null;
4716
    }
4717
4718
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4719
      /** @noinspection OffsetOperationsInspection */
4720
      return self::strlen($length[1]);
4721
    }
4722
4723
    return self::strlen($str);
4724
  }
4725
4726
  /**
4727
   * alias for "UTF8::stristr()"
4728
   *
4729 11
   * @see UTF8::stristr()
4730
   *
4731 11
   * @param string  $haystack
4732
   * @param string  $needle
4733 11
   * @param bool    $before_needle
4734 2
   * @param string  $encoding
4735 2
   * @param boolean $cleanUtf8
4736
   *
4737 11
   * @return string|false
4738
   */
4739 11
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4740 2
  {
4741
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4742
  }
4743
4744 10
  /**
4745 10
   * Create a UTF-8 string from code points.
4746
   *
4747
   * INFO: opposite to UTF8::codepoints()
4748
   *
4749 10
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4750
   *
4751 10
   * @return string <p>UTF-8 encoded string.</p>
4752
   */
4753
  public static function string(array $array)
4754 3
  {
4755 3
    return implode(
4756 3
        '',
4757
        array_map(
4758 10
            array(
4759
                '\\voku\\helper\\UTF8',
4760
                'chr',
4761
            ),
4762
            $array
4763
        )
4764 10
    );
4765 1
  }
4766 10
4767 10
  /**
4768 10
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4769 1
   *
4770
   * @param string $str <p>The input string.</p>
4771
   *
4772
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4773
   */
4774 10
  public static function string_has_bom($str)
4775 10
  {
4776 10
    foreach (self::$bom as $bomString => $bomByteLength) {
4777 10
      if (0 === strpos($str, $bomString)) {
4778
        return true;
4779
      }
4780
    }
4781
4782
    return false;
4783
  }
4784
4785
  /**
4786
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4787
   *
4788
   * @link http://php.net/manual/en/function.strip-tags.php
4789
   *
4790
   * @param string  $str            <p>
4791
   *                                The input string.
4792
   *                                </p>
4793
   * @param string  $allowable_tags [optional] <p>
4794
   *                                You can use the optional second parameter to specify tags which should
4795
   *                                not be stripped.
4796
   *                                </p>
4797
   *                                <p>
4798
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4799
   *                                can not be changed with allowable_tags.
4800
   *                                </p>
4801
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4802
   *
4803
   * @return string <p>The stripped string.</p>
4804
   */
4805
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4806
  {
4807
    if ($cleanUtf8) {
4808
      $str = self::clean($str);
4809
    }
4810
4811
    return strip_tags($str, $allowable_tags);
4812
  }
4813 10
4814
  /**
4815
   * Finds position of first occurrence of a string within another, case insensitive.
4816 10
   *
4817 10
   * @link http://php.net/manual/en/function.mb-stripos.php
4818
   *
4819 10
   * @param string  $haystack  <p>
4820 2
   *                           The string from which to get the position of the first occurrence
4821 2
   *                           of needle
4822
   *                           </p>
4823 10
   * @param string  $needle    <p>
4824 10
   *                           The string to find in haystack
4825 2
   *                           </p>
4826
   * @param int     $offset    [optional] <p>
4827
   *                           The position in haystack
4828 8
   *                           to start searching
4829
   *                           </p>
4830
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4831
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4832
   *
4833
   * @return int|false <p>
4834
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4835
   *                   or false if needle is not found.
4836
   *                   </p>
4837
   */
4838
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4839
  {
4840
    $haystack = (string)$haystack;
4841
    $needle = (string)$needle;
4842
4843
    if (!isset($haystack[0], $needle[0])) {
4844
      return false;
4845 2
    }
4846
4847 2
    if ($cleanUtf8 === true) {
4848
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4849
      // if invalid characters are found in $haystack before $needle
4850
      $haystack = self::clean($haystack);
4851
      $needle = self::clean($needle);
4852
    }
4853
4854 2 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4855 1
        $encoding === 'UTF-8'
4856 1
        ||
4857
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4858
    ) {
4859
      $encoding = 'UTF-8';
4860 2
    } else {
4861 2
      $encoding = self::normalize_encoding($encoding);
4862 2
    }
4863 2
4864
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4865
      self::checkForSupport();
4866
    }
4867
4868
    if (
4869
        $encoding == 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
4870
        &&
4871
        self::$support['intl'] === true
4872
        &&
4873
        Bootup::is_php('5.4')
4874
    ) {
4875
      return \grapheme_stripos($haystack, $needle, $offset);
4876
    }
4877
4878
    // fallback to "mb_"-function via polyfill
4879
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4880
  }
4881
4882 11
  /**
4883
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4884 11
   *
4885 11
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4886 11
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4887
   * @param bool    $before_needle [optional] <p>
4888 11
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4889 1
   *                               haystack before the first occurrence of the needle (excluding the needle).
4890 1
   *                               </p>
4891 1
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4892
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4893 11
   *
4894
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4895 11
   */
4896
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4897 11
  {
4898 1
    $haystack = (string)$haystack;
4899 1
    $needle = (string)$needle;
4900
4901
    if (!isset($haystack[0], $needle[0])) {
4902 11
      return false;
4903 11
    }
4904
4905 11
    if ($encoding !== 'UTF-8') {
4906
      $encoding = self::normalize_encoding($encoding);
4907 11
    }
4908
4909
    if ($cleanUtf8 === true) {
4910
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4911
      // if invalid characters are found in $haystack before $needle
4912
      $needle = self::clean($needle);
4913
      $haystack = self::clean($haystack);
4914
    }
4915
4916
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4917
      self::checkForSupport();
4918
    }
4919
4920 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4921 21
        $encoding !== 'UTF-8'
4922
        &&
4923
        self::$support['mbstring'] === false
4924 21
    ) {
4925
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
4926 21
    }
4927 6
4928
    if (self::$support['mbstring'] === true) {
4929
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4930 19
    }
4931
4932
    if (self::$support['intl'] === true) {
4933
      return \grapheme_stristr($haystack, $needle, $before_needle);
4934
    }
4935
4936 19
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
4937 2
4938 2
    if (!isset($match[1])) {
4939
      return false;
4940 19
    }
4941
4942
    if ($before_needle) {
4943
      return $match[1];
4944
    }
4945
4946
    return self::substr($haystack, self::strlen($match[1]));
4947
  }
4948
4949
  /**
4950 3
   * Get the string length, not the byte-length!
4951
   *
4952 3
   * @link     http://php.net/manual/en/function.mb-strlen.php
4953
   *
4954
   * @param string  $str       <p>The string being checked for length.</p>
4955
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4956
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4957
   *
4958
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4959
   *             character counted as +1)</p>
4960
   */
4961
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4962
  {
4963
    $str = (string)$str;
4964
4965
    if (!isset($str[0])) {
4966 16
      return 0;
4967
    }
4968 16
4969 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4970 16
        $encoding === 'UTF-8'
4971 2
        ||
4972
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4973
    ) {
4974 15
      $encoding = 'UTF-8';
4975
    } else {
4976
      $encoding = self::normalize_encoding($encoding);
4977
    }
4978
4979
    switch ($encoding) {
4980 15
      case 'ASCII':
4981 2
      case 'CP850':
4982 2
        return strlen($str);
4983
    }
4984 15
4985
    if ($cleanUtf8 === true) {
4986
      $str = self::clean($str);
4987
    }
4988
4989
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4990
      self::checkForSupport();
4991
    }
4992
4993 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4994
        $encoding !== 'UTF-8'
4995
        &&
4996
        self::$support['mbstring'] === false
4997
        &&
4998
        self::$support['iconv'] === false
4999
    ) {
5000
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5001 1
    }
5002
5003 1
    if (
5004 1
        $encoding !== 'UTF-8'
5005 1
        &&
5006 1
        self::$support['iconv'] === true
5007 1
        &&
5008
        self::$support['mbstring'] === false
5009 1
    ) {
5010 1
      $returnTmp = \iconv_strlen($str, $encoding);
5011 1
      if ($returnTmp !== false) {
5012 1
        return $returnTmp;
5013 1
      }
5014
    }
5015 1
5016 1
    if (self::$support['mbstring'] === true) {
5017
      return \mb_strlen($str, $encoding);
5018 1
    }
5019
5020
    if (self::$support['intl'] === true) {
5021
      $str = self::clean($str);
5022
      $returnTmp = \grapheme_strlen($str);
5023
      if ($returnTmp !== null) {
5024
        return $returnTmp;
5025
      }
5026
    }
5027
5028
    if (self::$support['iconv'] === true) {
5029
      $returnTmp = \iconv_strlen($str, $encoding);
5030 1
      if ($returnTmp !== false) {
5031
        return $returnTmp;
5032 1
      }
5033 1
    }
5034 1
5035
    // fallback via vanilla php
5036 1
    preg_match_all('/./us', $str, $parts);
5037
    $returnTmp = count($parts[0]);
5038
    if ($returnTmp !== 0) {
5039
      return $returnTmp;
5040 1
    }
5041 1
5042
    // fallback to "mb_"-function via polyfill
5043 1
    return \mb_strlen($str);
5044
  }
5045
5046
  /**
5047
   * Case insensitive string comparisons using a "natural order" algorithm.
5048
   *
5049
   * INFO: natural order version of UTF8::strcasecmp()
5050
   *
5051
   * @param string $str1 <p>The first string.</p>
5052
   * @param string $str2 <p>The second string.</p>
5053
   *
5054
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5055
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5056
   *             <strong>0</strong> if they are equal
5057
   */
5058
  public static function strnatcasecmp($str1, $str2)
5059 47
  {
5060
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5061
  }
5062 47
5063
  /**
5064 47
   * String comparisons using a "natural order" algorithm
5065 9
   *
5066
   * INFO: natural order version of UTF8::strcmp()
5067
   *
5068 45
   * @link  http://php.net/manual/en/function.strnatcmp.php
5069
   *
5070
   * @param string $str1 <p>The first string.</p>
5071
   * @param string $str2 <p>The second string.</p>
5072 1
   *
5073 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5074
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5075 45
   *             <strong>0</strong> if they are equal
5076 45
   */
5077 37
  public static function strnatcmp($str1, $str2)
5078 37
  {
5079
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5080 45
  }
5081 2
5082
  /**
5083
   * Case-insensitive string comparison of the first n characters.
5084 43
   *
5085 20
   * @link  http://php.net/manual/en/function.strncasecmp.php
5086 20
   *
5087 41
   * @param string $str1 <p>The first string.</p>
5088
   * @param string $str2 <p>The second string.</p>
5089
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5090 43
   *
5091
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5092
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5093
   *             <strong>0</strong> if they are equal
5094
   */
5095
  public static function strncasecmp($str1, $str2, $len)
5096 43
  {
5097 2
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5098 43
  }
5099 43
5100 43
  /**
5101 1
   * String comparison of the first n characters.
5102
   *
5103
   * @link  http://php.net/manual/en/function.strncmp.php
5104 43
   *
5105 43
   * @param string $str1 <p>The first string.</p>
5106
   * @param string $str2 <p>The second string.</p>
5107
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5108
   *
5109
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5110
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5111
   *             <strong>0</strong> if they are equal
5112
   */
5113
  public static function strncmp($str1, $str2, $len)
5114
  {
5115
    $str1 = self::substr($str1, 0, $len);
5116
    $str2 = self::substr($str2, 0, $len);
5117
5118
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5115 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5116 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5119
  }
5120
5121
  /**
5122
   * Search a string for any of a set of characters.
5123
   *
5124
   * @link  http://php.net/manual/en/function.strpbrk.php
5125
   *
5126
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5127
   * @param string $char_list <p>This parameter is case sensitive.</p>
5128
   *
5129
   * @return string String starting from the character found, or false if it is not found.
5130
   */
5131
  public static function strpbrk($haystack, $char_list)
5132
  {
5133
    $haystack = (string)$haystack;
5134
    $char_list = (string)$char_list;
5135 1
5136
    if (!isset($haystack[0], $char_list[0])) {
5137 1
      return false;
5138 1
    }
5139
5140 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5141
      return substr($haystack, strpos($haystack, $m[0]));
5142
    } else {
5143
      return false;
5144
    }
5145
  }
5146
5147
  /**
5148
   * Find position of first occurrence of string in a string.
5149
   *
5150
   * @link http://php.net/manual/en/function.mb-strpos.php
5151
   *
5152
   * @param string  $haystack  <p>The string being checked.</p>
5153
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5154
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5155
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5156
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5157
   *
5158
   * @return int|false <p>
5159
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5160
   *                   If needle is not found it returns false.
5161 1
   *                   </p>
5162
   */
5163 1
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5164 1
  {
5165
    $haystack = (string)$haystack;
5166 1
    $needle = (string)$needle;
5167 1
5168
    if (!isset($haystack[0], $needle[0])) {
5169
      return false;
5170 1
    }
5171 1
5172 1
    // init
5173
    $offset = (int)$offset;
5174 1
5175 1
    // iconv and mbstring do not support integer $needle
5176
5177
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5178 1
      $needle = (string)self::chr($needle);
5179 1
    }
5180
5181 1
    if ($cleanUtf8 === true) {
5182 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5183 1
      // if invalid characters are found in $haystack before $needle
5184
      $needle = self::clean($needle);
5185 1
      $haystack = self::clean($haystack);
5186
    }
5187
5188 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5189
        $encoding === 'UTF-8'
5190
        ||
5191
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5192 1
    ) {
5193
      $encoding = 'UTF-8';
5194
    } else {
5195
      $encoding = self::normalize_encoding($encoding);
5196
    }
5197
5198
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5199
      self::checkForSupport();
5200
    }
5201
5202 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5203
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...pport['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...port['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5204
        &
5205
        self::$support['iconv'] === true
5206
        &&
5207 6
        self::$support['mbstring'] === false
5208
    ) {
5209 6
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5210 1
    }
5211
5212
    if (
5213 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5214 1
        &&
5215 1
        $encoding !== 'UTF-8'
5216 1
        &&
5217
        self::$support['mbstring'] === false
5218
        &&
5219
        self::$support['iconv'] === true
5220 1
    ) {
5221 1
      // ignore invalid negative offset to keep compatibility
5222 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5223 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5224 1
    }
5225 1
5226 1
    if (self::$support['mbstring'] === true) {
5227 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5228
    }
5229
5230 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5231 1
      $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
5232 1
      if ($returnTmp !== false) {
5233 1
        return $returnTmp;
5234 1
      }
5235 1
    }
5236 1
5237 1
    if (
5238 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5239
        &&
5240
        self::$support['iconv'] === true
5241 1
    ) {
5242 1
      // ignore invalid negative offset to keep compatibility
5243 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5244 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5245
    }
5246
5247
    // fallback via vanilla php
5248 1
5249
    $haystack = self::substr($haystack, $offset);
5250 6
5251 1
    if ($offset < 0) {
5252 1
      $offset = 0;
5253 1
    }
5254 1
5255
    $pos = strpos($haystack, $needle);
5256 1
    if ($pos === false) {
5257
      return false;
5258
    }
5259 6
5260 6
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5261
    if ($returnTmp !== false) {
5262 6
      return $returnTmp;
5263 4
    }
5264 4
5265
    // fallback to "mb_"-function via polyfill
5266 6
    return \mb_strpos($haystack, $needle, $offset);
5267
  }
5268 6
5269
  /**
5270
   * Finds the last occurrence of a character in a string within another.
5271
   *
5272
   * @link http://php.net/manual/en/function.mb-strrchr.php
5273
   *
5274
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5275
   * @param string $needle        <p>The string to find in haystack</p>
5276
   * @param bool   $before_needle [optional] <p>
5277
   *                              Determines which portion of haystack
5278
   *                              this function returns.
5279
   *                              If set to true, it returns all of haystack
5280 1
   *                              from the beginning to the last occurrence of needle.
5281
   *                              If set to false, it returns all of haystack
5282 1
   *                              from the last occurrence of needle to the end,
5283
   *                              </p>
5284 1
   * @param string $encoding      [optional] <p>
5285 1
   *                              Character encoding name to use.
5286
   *                              If it is omitted, internal character encoding is used.
5287
   *                              </p>
5288 1
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5289 1
   *
5290 1
   * @return string|false The portion of haystack or false if needle is not found.
5291
   */
5292 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5293
  {
5294
    if ($encoding !== 'UTF-8') {
5295 1
      $encoding = self::normalize_encoding($encoding);
5296 1
    }
5297
5298 1
    if ($cleanUtf8 === true) {
5299 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5300
      // if invalid characters are found in $haystack before $needle
5301 1
      $needle = self::clean($needle);
5302
      $haystack = self::clean($haystack);
5303 1
    }
5304 1
5305
    // fallback to "mb_"-function via polyfill
5306 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5307
  }
5308 1
5309
  /**
5310 1
   * Reverses characters order in the string.
5311
   *
5312 1
   * @param string $str The input string
5313
   *
5314
   * @return string The string with characters in the reverse sequence
5315
   */
5316
  public static function strrev($str)
5317
  {
5318
    $str = (string)$str;
5319
5320
    if (!isset($str[0])) {
5321
      return '';
5322
    }
5323
5324
    return implode('', array_reverse(self::split($str)));
5325
  }
5326 7
5327
  /**
5328 7
   * Finds the last occurrence of a character in a string within another, case insensitive.
5329
   *
5330
   * @link http://php.net/manual/en/function.mb-strrichr.php
5331
   *
5332
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5333
   * @param string  $needle        <p>The string to find in haystack.</p>
5334
   * @param bool    $before_needle [optional] <p>
5335
   *                               Determines which portion of haystack
5336
   *                               this function returns.
5337
   *                               If set to true, it returns all of haystack
5338
   *                               from the beginning to the last occurrence of needle.
5339
   *                               If set to false, it returns all of haystack
5340 1
   *                               from the last occurrence of needle to the end,
5341
   *                               </p>
5342 1
   * @param string  $encoding      [optional] <p>
5343
   *                               Character encoding name to use.
5344
   *                               If it is omitted, internal character encoding is used.
5345
   *                               </p>
5346
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5347
   *
5348
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5349
   */
5350 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5351
  {
5352
    if ($encoding !== 'UTF-8') {
5353
      $encoding = self::normalize_encoding($encoding);
5354 1
    }
5355
5356 1
    if ($cleanUtf8 === true) {
5357
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5358
      // if invalid characters are found in $haystack before $needle
5359
      $needle = self::clean($needle);
5360
      $haystack = self::clean($haystack);
5361
    }
5362
5363
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5364
  }
5365
5366
  /**
5367
   * Find position of last occurrence of a case-insensitive string.
5368 1
   *
5369
   * @param string  $haystack  <p>The string to look in.</p>
5370 1
   * @param string  $needle    <p>The string to look for.</p>
5371
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5372
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5373
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5374
   *
5375
   * @return int|false <p>
5376
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5377
   *                   not found, it returns false.
5378
   *                   </p>
5379
   */
5380
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5381
  {
5382
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5383
      $needle = (string)self::chr($needle);
5384
    }
5385 13
5386
    // init
5387 13
    $haystack = (string)$haystack;
5388
    $needle = (string)$needle;
5389
    $offset = (int)$offset;
5390 13
5391
    if (!isset($haystack[0], $needle[0])) {
5392 13
      return false;
5393 3
    }
5394
5395 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5396 11
        $cleanUtf8 === true
5397
        ||
5398
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5399 11
    ) {
5400 7
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5401
5402
      $needle = self::clean($needle);
5403 5
      $haystack = self::clean($haystack);
5404 1
    }
5405
5406 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5407
        $encoding === 'UTF-8'
5408 1
        ||
5409 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5410
    ) {
5411
      $encoding = 'UTF-8';
5412 1
    } else {
5413 1
      $encoding = self::normalize_encoding($encoding);
5414
    }
5415
5416 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5417
      self::checkForSupport();
5418
    }
5419 1
5420 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5421 5
        $encoding !== 'UTF-8'
5422 5
        &&
5423 5
        self::$support['mbstring'] === false
5424
    ) {
5425 5
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5426
    }
5427 5
5428 5
    if (self::$support['mbstring'] === true) {
5429
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5430
    }
5431 5
5432 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5433
      $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
5434 5
      if ($returnTmp !== false) {
5435 5
        return $returnTmp;
5436 5
      }
5437
    }
5438 5
5439 2
    // fallback via vanilla php
5440
5441 2
    return self::strrpos(self::strtonatfold($haystack), self::strtonatfold($needle), $offset, $encoding, $cleanUtf8);
5442 2
  }
5443 2
5444
  /**
5445 2
   * Find position of last occurrence of a string in a string.
5446 1
   *
5447
   * @link http://php.net/manual/en/function.mb-strrpos.php
5448 1
   *
5449 1
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5450 1
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5451
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5452 1
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5453
   *                              the end of the string.
5454
   *                              </p>
5455
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5456
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5457
   *
5458
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5459
   *                   is not found, it returns false.</p>
5460
   */
5461
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5462
  {
5463
    if ((int)$needle === $needle && $needle >= 0) {
5464
      $needle = (string)self::chr($needle);
5465
    }
5466
5467 1
    // init
5468 2
    $haystack = (string)$haystack;
5469
    $needle = (string)$needle;
5470 5
    $offset = (int)$offset;
5471
5472
    if (!isset($haystack[0], $needle[0])) {
5473
      return false;
5474
    }
5475 5
5476 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5477
        $cleanUtf8 === true
5478
        ||
5479
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5480 5
    ) {
5481 5
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5482 1
      $needle = self::clean($needle);
5483 1
      $haystack = self::clean($haystack);
5484
    }
5485 1
5486 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5487 1
        $encoding === 'UTF-8'
5488
        ||
5489 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5490
    ) {
5491 5
      $encoding = 'UTF-8';
5492 5
    } else {
5493 5
      $encoding = self::normalize_encoding($encoding);
5494 5
    }
5495 1
5496
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5497 5
      self::checkForSupport();
5498
    }
5499 5
5500 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5501
        $encoding !== 'UTF-8'
5502
        &&
5503
        self::$support['mbstring'] === false
5504
    ) {
5505
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5506
    }
5507
5508 View Code Duplication
    if (self::$support['mbstring'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5509 2
      $returnTmp = \mb_strrpos($haystack, $needle, $offset, $encoding);
5510
      if ($returnTmp !== false) {
5511 2
        return $returnTmp;
5512
      }
5513 1
    }
5514
5515 View Code Duplication
    if (self::$support['intl'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5516 1
      $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
5517 1
      if ($returnTmp !== false) {
5518
        return $returnTmp;
5519 1
      }
5520
    }
5521
5522 2
    // fallback via vanilla php
5523
5524 2
    if ($offset > 0) {
5525 1
      $haystack = self::substr($haystack, $offset);
5526
    } elseif ($offset < 0) {
5527
      $haystack = self::substr($haystack, 0, $offset);
5528 2
      $offset = 0;
5529
    }
5530
5531
    $pos = strrpos($haystack, $needle);
5532
    if ($pos === false) {
5533
      return false;
5534
    }
5535
5536
    return $offset + self::strlen(substr($haystack, 0, $pos));
5537
  }
5538
5539
  /**
5540 1
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5541
   * mask.
5542 1
   *
5543
   * @param string $str    <p>The input string.</p>
5544
   * @param string $mask   <p>The mask of chars</p>
5545
   * @param int    $offset [optional]
5546
   * @param int    $length [optional]
5547
   *
5548
   * @return int
5549
   */
5550
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5551
  {
5552
    // init
5553
    $length = (int)$length;
5554
    $offset = (int)$offset;
5555
5556
    if ($offset || 2147483647 !== $length) {
5557
      $str = self::substr($str, $offset, $length);
5558
    }
5559
5560
    $str = (string)$str;
5561
    if (!isset($str[0], $mask[0])) {
5562
      return 0;
5563
    }
5564
5565
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5566
  }
5567
5568 20
  /**
5569
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5570 20
   *
5571 2
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5572
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5573
   * @param bool    $before_needle [optional] <p>
5574 2
   *                               If <b>TRUE</b>, strstr() returns the part of the
5575 2
   *                               haystack before the first occurrence of the needle (excluding the needle).
5576
   *                               </p>
5577 2
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5578
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5579
   *
5580 20
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5581
   */
5582 20
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5583 4
  {
5584
    $haystack = (string)$haystack;
5585
    $needle = (string)$needle;
5586 19
5587 19
    if (!isset($haystack[0], $needle[0])) {
5588
      return false;
5589
    }
5590 19
5591 19
    if ($cleanUtf8 === true) {
5592
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5593 19
      // if invalid characters are found in $haystack before $needle
5594 19
      $needle = self::clean($needle);
5595 19
      $haystack = self::clean($haystack);
5596 19
    }
5597
5598 19
    if ($encoding !== 'UTF-8') {
5599
      $encoding = self::normalize_encoding($encoding);
5600 16
    }
5601 16
5602 16
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5603 16
      self::checkForSupport();
5604 5
    }
5605 5
5606 5 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5607
        $encoding !== 'UTF-8'
5608
        &&
5609 19
        self::$support['mbstring'] === false
5610
    ) {
5611 17
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5612 13
    }
5613 13
5614 13
    if (self::$support['mbstring'] === true) {
5615 8
      $returnTmp = \mb_strstr($haystack, $needle, $before_needle, $encoding);
5616 8
      if ($returnTmp !== false) {
5617 8
        return $returnTmp;
5618
      }
5619
    }
5620 19
5621
    if (self::$support['intl'] === true) {
5622 9
      $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
5623 4
      if ($returnTmp !== false) {
5624 4
        return $returnTmp;
5625 4
      }
5626 6
    }
5627 6
5628 6
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5629
5630
    if (!isset($match[1])) {
5631 9
      return false;
5632 6
    }
5633 6
5634 6
    if ($before_needle) {
5635
      return $match[1];
5636
    }
5637 19
5638
    return self::substr($haystack, self::strlen($match[1]));
5639 4
  }
5640 4
5641 2
  /**
5642 2
   * Unicode transformation for case-less matching.
5643 3
   *
5644 3
   * @link http://unicode.org/reports/tr21/tr21-5.html
5645 3
   *
5646
   * @param string  $str       <p>The input string.</p>
5647
   * @param bool    $full      [optional] <p>
5648 4
   *                           <b>true</b>, replace full case folding chars (default)<br />
5649 16
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5650
   *                           </p>
5651 19
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5652
   *
5653
   * @return string
5654 19
   */
5655 19
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5656
  {
5657 3
    // init
5658 19
    $str = (string)$str;
5659
5660 19
    if (!isset($str[0])) {
5661
      return '';
5662
    }
5663 19
5664 19
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5665 19
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5666 2
5667 19
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5668
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5669 19
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5670
    }
5671 19
5672
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5673
5674
    if ($full) {
5675
5676
      static $fullCaseFold = null;
5677
5678
      if ($fullCaseFold === null) {
5679
        $fullCaseFold = self::getData('caseFolding_full');
5680
      }
5681
5682
      /** @noinspection OffsetOperationsInspection */
5683
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5684
    }
5685
5686
    if ($cleanUtf8 === true) {
5687 26
      $str = self::clean($str);
5688
    }
5689 26
5690
    return self::strtolower($str);
5691 26
  }
5692 5
5693
  /**
5694
   * Make a string lowercase.
5695
   *
5696 22
   * @link http://php.net/manual/en/function.mb-strtolower.php
5697 6
   *
5698
   * @param string  $str       <p>The string being lowercased.</p>
5699
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5700 16
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5701
   *
5702
   * @return string str with all alphabetic characters converted to lowercase.
5703
   */
5704 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5705
  {
5706
    // init
5707
    $str = (string)$str;
5708
5709
    if (!isset($str[0])) {
5710
      return '';
5711
    }
5712 14
5713
    if ($cleanUtf8 === true) {
5714 14
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5715
      // if invalid characters are found in $haystack before $needle
5716
      $str = self::clean($str);
5717
    }
5718
5719
    if ($encoding !== 'UTF-8') {
5720
      $encoding = self::normalize_encoding($encoding);
5721
    }
5722
5723
    return \mb_strtolower($str, $encoding);
5724
  }
5725
5726
  /**
5727
   * Generic case sensitive transformation for collation matching.
5728 1
   *
5729
   * @param string $str <p>The input string</p>
5730 1
   *
5731
   * @return string
5732
   */
5733
  private static function strtonatfold($str)
5734
  {
5735
    /** @noinspection PhpUndefinedClassInspection */
5736
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5737
  }
5738
5739
  /**
5740
   * Make a string uppercase.
5741
   *
5742
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5743
   *
5744 8
   * @param string  $str       <p>The string being uppercased.</p>
5745
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5746 8
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5747 2
   *
5748
   * @return string str with all alphabetic characters converted to uppercase.
5749
   */
5750 7 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5751 7
  {
5752 7
    $str = (string)$str;
5753
5754 7
    if (!isset($str[0])) {
5755 1
      return '';
5756 1
    }
5757 7
5758
    if ($cleanUtf8 === true) {
5759
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5760 7
      // if invalid characters are found in $haystack before $needle
5761
      $str = self::clean($str);
5762 7
    }
5763 7
5764
    if ($encoding !== 'UTF-8') {
5765
      $encoding = self::normalize_encoding($encoding);
5766
    }
5767 7
5768
    return \mb_strtoupper($str, $encoding);
5769
  }
5770
5771 1
  /**
5772 1
   * Translate characters or replace sub-strings.
5773 1
   *
5774 7
   * @link  http://php.net/manual/en/function.strtr.php
5775 7
   *
5776 7
   * @param string          $str  <p>The string being translated.</p>
5777
   * @param string|string[] $from <p>The string replacing from.</p>
5778 7
   * @param string|string[] $to   <p>The string being translated to to.</p>
5779 7
   *
5780
   * @return string <p>
5781 7
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5782
   *                corresponding character in to.
5783
   *                </p>
5784
   */
5785
  public static function strtr($str, $from, $to = INF)
5786
  {
5787
    if (INF !== $to) {
5788
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5788 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5789
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5789 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5790
      $countFrom = count($from);
5791
      $countTo = count($to);
5792
5793
      if ($countFrom > $countTo) {
5794
        $from = array_slice($from, 0, $countTo);
5795
      } elseif ($countFrom < $countTo) {
5796
        $to = array_slice($to, 0, $countFrom);
5797
      }
5798
5799
      $from = array_combine($from, $to);
5800
    }
5801 1
5802
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5785 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5803 1
  }
5804
5805 1
  /**
5806 1
   * Return the width of a string.
5807
   *
5808
   * @param string  $str       <p>The input string.</p>
5809 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5810
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5811 1
   *
5812
   * @return int
5813 1
   */
5814 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5815 1
  {
5816 1
    if ($encoding !== 'UTF-8') {
5817
      $encoding = self::normalize_encoding($encoding);
5818 1
    }
5819 1
5820 1
    if ($cleanUtf8 === true) {
5821
      // iconv and mbstring are not tolerant to invalid encoding
5822 1
      // further, their behaviour is inconsistent with that of PHP's substr
5823
      $str = self::clean($str);
5824
    }
5825
5826
    // fallback to "mb_"-function via polyfill
5827
    return \mb_strwidth($str, $encoding);
5828
  }
5829
5830 1
  /**
5831
   * Get part of a string.
5832
   *
5833
   * @link http://php.net/manual/en/function.mb-substr.php
5834
   *
5835
   * @param string  $str       <p>The string being checked.</p>
5836
   * @param int     $start     <p>The first position used in str.</p>
5837
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5838
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5839
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5840
   *
5841
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5842
   */
5843
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5844
  {
5845
    // init
5846
    $str = (string)$str;
5847
5848
    if (!isset($str[0])) {
5849
      return '';
5850
    }
5851
5852
    if ($cleanUtf8 === true) {
5853
      // iconv and mbstring are not tolerant to invalid encoding
5854
      // further, their behaviour is inconsistent with that of PHP's substr
5855
      $str = self::clean($str);
5856
    }
5857
5858
    $str_length = 0;
5859
    if ($start || $length === null) {
5860
      $str_length = (int)self::strlen($str);
5861
    }
5862
5863
    if ($start && $start > $str_length) {
5864
      return false;
5865
    }
5866
5867
    if ($length === null) {
5868
      $length = $str_length;
5869
    } else {
5870
      $length = (int)$length;
5871
    }
5872
5873 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5874
        $encoding === 'UTF-8'
5875
        ||
5876
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5877
    ) {
5878
      $encoding = 'UTF-8';
5879
    } else {
5880
      $encoding = self::normalize_encoding($encoding);
5881
    }
5882
5883
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5884
      self::checkForSupport();
5885
    }
5886
5887 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5888
        $encoding !== 'UTF-8'
5889
        &&
5890
        self::$support['mbstring'] === false
5891
    ) {
5892
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5893
    }
5894
5895
    if (self::$support['mbstring'] === true) {
5896
      return \mb_substr($str, $start, $length, $encoding);
5897
    }
5898
5899
    if (
5900
        $length >= 0 // "iconv_substr()" can't handle negative length
5901
        &&
5902
        self::$support['iconv'] === true
5903
    ) {
5904
      return \iconv_substr($str, $start, $length);
5905
    }
5906
5907
    if (self::$support['intl'] === true) {
5908
      return \grapheme_substr($str, $start, $length);
5909
    }
5910
5911
    // fallback via vanilla php
5912
5913
    // split to array, and remove invalid characters
5914
    $array = self::split($str);
5915
5916
    // extract relevant part, and join to make sting again
5917
    return implode('', array_slice($array, $start, $length));
5918
  }
5919
5920
  /**
5921
   * Binary safe comparison of two strings from an offset, up to length characters.
5922
   *
5923
   * @param string  $main_str           <p>The main string being compared.</p>
5924
   * @param string  $str                <p>The secondary string being compared.</p>
5925
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5926
   *                                    the end of the string.</p>
5927
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5928
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5929
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5930
   *                                    insensitive.</p>
5931
   *
5932
   * @return int
5933
   */
5934
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5935
  {
5936
    $main_str = self::substr($main_str, $offset, $length);
5937
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5936 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5938
5939
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5936 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5937 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5936 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5937 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5940
  }
5941
5942
  /**
5943
   * Count the number of substring occurrences.
5944
   *
5945
   * @link  http://php.net/manual/en/function.substr-count.php
5946
   *
5947
   * @param string  $haystack  <p>The string to search in.</p>
5948
   * @param string  $needle    <p>The substring to search for.</p>
5949
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5950
   * @param int     $length    [optional] <p>
5951
   *                           The maximum length after the specified offset to search for the
5952
   *                           substring. It outputs a warning if the offset plus the length is
5953
   *                           greater than the haystack length.
5954
   *                           </p>
5955
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5956
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5957
   *
5958
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5959
   */
5960
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5961
  {
5962
    // init
5963
    $haystack = (string)$haystack;
5964
    $needle = (string)$needle;
5965
5966
    if (!isset($haystack[0], $needle[0])) {
5967
      return false;
5968
    }
5969
5970
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5971
      $offset = (int)$offset;
5972
      $length = (int)$length;
5973
5974
      if (
5975
          $length + $offset <= 0
5976
          &&
5977
          Bootup::is_php('7.1') === false
5978
      ) {
5979
        return false;
5980
      }
5981
5982
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5983
    }
5984
5985
    if ($encoding !== 'UTF-8') {
5986
      $encoding = self::normalize_encoding($encoding);
5987
    }
5988
5989
    if ($cleanUtf8 === true) {
5990
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5991
      // if invalid characters are found in $haystack before $needle
5992
      $needle = self::clean($needle);
5993
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5994
    }
5995
5996
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5997
      self::checkForSupport();
5998
    }
5999
6000 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6001
        $encoding !== 'UTF-8'
6002
        &&
6003
        self::$support['mbstring'] === false
6004
    ) {
6005
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6006
    }
6007
6008
    if (self::$support['mbstring'] === true) {
6009
      return \mb_substr_count($haystack, $needle, $encoding);
6010
    }
6011
6012
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6013
6014
    return count($matches);
6015
  }
6016
6017
  /**
6018
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6019
   *
6020
   * @param string $haystack <p>The string to search in.</p>
6021
   * @param string $needle   <p>The substring to search for.</p>
6022
   *
6023
   * @return string <p>Return the sub-string.</p>
6024
   */
6025 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6026
  {
6027
    // init
6028
    $haystack = (string)$haystack;
6029
    $needle = (string)$needle;
6030
6031
    if (!isset($haystack[0])) {
6032
      return '';
6033
    }
6034
6035
    if (!isset($needle[0])) {
6036
      return $haystack;
6037
    }
6038
6039
    if (self::str_istarts_with($haystack, $needle) === true) {
6040
      $haystack = self::substr($haystack, self::strlen($needle));
6041
    }
6042
6043
    return $haystack;
6044
  }
6045
6046
  /**
6047
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6048
   *
6049
   * @param string $haystack <p>The string to search in.</p>
6050
   * @param string $needle   <p>The substring to search for.</p>
6051
   *
6052
   * @return string <p>Return the sub-string.</p>
6053
   */
6054 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6055
  {
6056
    // init
6057 1
    $haystack = (string)$haystack;
6058
    $needle = (string)$needle;
6059 1
6060
    if (!isset($haystack[0])) {
6061
      return '';
6062
    }
6063
6064
    if (!isset($needle[0])) {
6065
      return $haystack;
6066
    }
6067
6068
    if (self::str_iends_with($haystack, $needle) === true) {
6069 6
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6070
    }
6071 6
6072 6
    return $haystack;
6073
  }
6074 6
6075
  /**
6076 6
   * Removes an prefix ($needle) from start of the string ($haystack).
6077 3
   *
6078
   * @param string $haystack <p>The string to search in.</p>
6079
   * @param string $needle   <p>The substring to search for.</p>
6080
   *
6081 6
   * @return string <p>Return the sub-string.</p>
6082
   */
6083 6 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6084 1
  {
6085 1
    // init
6086 1
    $haystack = (string)$haystack;
6087
    $needle = (string)$needle;
6088 6
6089
    if (!isset($haystack[0])) {
6090
      return '';
6091
    }
6092
6093
    if (!isset($needle[0])) {
6094
      return $haystack;
6095
    }
6096
6097
    if (self::str_starts_with($haystack, $needle) === true) {
6098 6
      $haystack = self::substr($haystack, self::strlen($needle));
6099
    }
6100 6
6101
    return $haystack;
6102 6
  }
6103 6
6104
  /**
6105
   * Replace text within a portion of a string.
6106 5
   *
6107 5
   * source: https://gist.github.com/stemar/8287074
6108
   *
6109 5
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6110 1
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6111 1
   * @param int|int[]       $start            <p>
6112 1
   *                                          If start is positive, the replacing will begin at the start'th offset
6113
   *                                          into string.
6114 5
   *                                          <br /><br />
6115
   *                                          If start is negative, the replacing will begin at the start'th character
6116
   *                                          from the end of string.
6117
   *                                          </p>
6118
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6119
   *                                          portion of string which is to be replaced. If it is negative, it
6120
   *                                          represents the number of characters from the end of string at which to
6121
   *                                          stop replacing. If it is not given, then it will default to strlen(
6122
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6123
   *                                          length is zero then this function will have the effect of inserting
6124
   *                                          replacement into string at the given start offset.</p>
6125
   *
6126
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6127
   */
6128
  public static function substr_replace($str, $replacement, $start, $length = null)
6129
  {
6130
    if (is_array($str)) {
6131
      $num = count($str);
6132
6133
      // $replacement
6134
      if (is_array($replacement)) {
6135
        $replacement = array_slice($replacement, 0, $num);
6136
      } else {
6137
        $replacement = array_pad(array($replacement), $num, $replacement);
6138
      }
6139
6140
      // $start
6141 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6142
        $start = array_slice($start, 0, $num);
6143
        foreach ($start as &$valueTmp) {
6144 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6145
        }
6146 1
        unset($valueTmp);
6147
      } else {
6148
        $start = array_pad(array($start), $num, $start);
6149
      }
6150
6151
      // $length
6152
      if (!isset($length)) {
6153
        $length = array_fill(0, $num, 0);
6154 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6155
        $length = array_slice($length, 0, $num);
6156
        foreach ($length as &$valueTmpV2) {
6157
          if (isset($valueTmpV2)) {
6158 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6159
          } else {
6160 1
            $valueTmpV2 = 0;
6161
          }
6162 1
        }
6163 1
        unset($valueTmpV2);
6164
      } else {
6165
        $length = array_pad(array($length), $num, $length);
6166 1
      }
6167
6168 1
      // Recursive call
6169 1
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
6170
6171
    } else {
6172 1
6173
      if (is_array($replacement)) {
6174
        if (count($replacement) > 0) {
6175 1
          $replacement = $replacement[0];
6176 1
        } else {
6177 1
          $replacement = '';
6178 1
        }
6179 1
      }
6180
    }
6181
6182 1
    // init
6183
    $str = (string)$str;
6184
    $replacement = (string)$replacement;
6185
6186
    if (!isset($str[0])) {
6187
      return $replacement;
6188
    }
6189
6190
    preg_match_all('/./us', $str, $smatches);
6191
    preg_match_all('/./us', $replacement, $rmatches);
6192
6193
    if ($length === null) {
6194
      $length = (int)self::strlen($str);
6195
    }
6196
6197
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6198
6199
    return implode('', $smatches[0]);
6200
  }
6201 10
6202
  /**
6203 10
   * Removes an suffix ($needle) from end of the string ($haystack).
6204 10
   *
6205
   * @param string $haystack <p>The string to search in.</p>
6206 10
   * @param string $needle   <p>The substring to search for.</p>
6207 3
   *
6208
   * @return string <p>Return the sub-string.</p>
6209
   */
6210 8 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6211 8
  {
6212 8
    $haystack = (string)$haystack;
6213
    $needle = (string)$needle;
6214 8
6215
    if (!isset($haystack[0])) {
6216 8
      return '';
6217
    }
6218 8
6219 1
    if (!isset($needle[0])) {
6220 1
      return $haystack;
6221 1
    }
6222
6223 8
    if (self::str_ends_with($haystack, $needle) === true) {
6224 8
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6225
    }
6226 8
6227 8
    return $haystack;
6228 8
  }
6229 8
6230 8
  /**
6231
   * Returns a case swapped version of the string.
6232 8
   *
6233 8
   * @param string  $str       <p>The input string.</p>
6234 8
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6235 8
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6236
   *
6237 8
   * @return string <p>Each character's case swapped.</p>
6238 6
   */
6239 6
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6240 6
  {
6241 6
    $str = (string)$str;
6242
6243 6
    if (!isset($str[0])) {
6244 3
      return '';
6245 3
    }
6246
6247 6
    if ($encoding !== 'UTF-8') {
6248 6
      $encoding = self::normalize_encoding($encoding);
6249
    }
6250 8
6251
    if ($cleanUtf8 === true) {
6252
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6253
      // if invalid characters are found in $haystack before $needle
6254
      $str = self::clean($str);
6255
    }
6256
6257
    $strSwappedCase = preg_replace_callback(
6258 1
        '/[\S]/u',
6259
        function ($match) use ($encoding) {
6260 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6261
6262
          if ($match[0] === $marchToUpper) {
6263
            return UTF8::strtolower($match[0], $encoding);
6264
          } else {
6265
            return $marchToUpper;
6266
          }
6267
        },
6268
        $str
6269
    );
6270
6271
    return $strSwappedCase;
6272
  }
6273
6274
  /**
6275
   * alias for "UTF8::to_ascii()"
6276
   *
6277
   * @see UTF8::to_ascii()
6278
   *
6279
   * @param string $s
6280
   * @param string $subst_chr
6281
   * @param bool   $strict
6282
   *
6283
   * @return string
6284
   *
6285
   * @deprecated
6286
   */
6287
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6288
  {
6289
    return self::to_ascii($s, $subst_chr, $strict);
6290
  }
6291
6292
  /**
6293
   * alias for "UTF8::to_iso8859()"
6294
   *
6295
   * @see UTF8::to_iso8859()
6296
   *
6297
   * @param string $str
6298
   *
6299
   * @return string|string[]
6300
   *
6301
   * @deprecated
6302
   */
6303
  public static function toIso8859($str)
6304
  {
6305
    return self::to_iso8859($str);
6306
  }
6307
6308
  /**
6309
   * alias for "UTF8::to_latin1()"
6310
   *
6311
   * @see UTF8::to_latin1()
6312
   *
6313
   * @param $str
6314
   *
6315
   * @return string
6316
   *
6317
   * @deprecated
6318
   */
6319
  public static function toLatin1($str)
6320
  {
6321
    return self::to_latin1($str);
6322
  }
6323
6324
  /**
6325
   * alias for "UTF8::to_utf8()"
6326
   *
6327
   * @see UTF8::to_utf8()
6328
   *
6329
   * @param string $str
6330
   *
6331
   * @return string
6332
   *
6333
   * @deprecated
6334
   */
6335
  public static function toUTF8($str)
6336
  {
6337
    return self::to_utf8($str);
6338
  }
6339
6340
  /**
6341
   * Convert a string into ASCII.
6342
   *
6343
   * @param string $str     <p>The input string.</p>
6344
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6345
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6346
   *                        performance</p>
6347
   *
6348
   * @return string
6349
   *
6350
   * @throws \Exception
6351
   */
6352
  public static function to_ascii($str, $unknown = '?', $strict = false)
6353
  {
6354
    static $UTF8_TO_ASCII;
6355
6356
    // init
6357
    $str = (string)$str;
6358
6359
    if (!isset($str[0])) {
6360
      return '';
6361
    }
6362
6363
    $str = self::clean($str, true, true, true);
6364
6365
    // check if we only have ASCII
6366
    if (self::is_ascii($str) === true) {
6367
      return $str;
6368
    }
6369
6370
    if ($strict === true) {
6371
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
6372
        self::checkForSupport();
6373
      }
6374
6375
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
6376
6377
        // HACK for issue from "transliterator_transliterate()"
6378
        $str = str_replace(
6379
            'ℌ',
6380
            'H',
6381
            $str
6382
        );
6383
6384
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
6385
6386
        // check again, if we only have ASCII, now ...
6387
        if (self::is_ascii($str) === true) {
6388
          return $str;
6389
        }
6390
6391
      } else {
6392
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
6393
      }
6394
    }
6395
6396
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6397
    $chars = $ar[0];
6398
    foreach ($chars as &$c) {
6399
6400
      $ordC0 = ord($c[0]);
6401
6402
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6403
        continue;
6404
      }
6405
6406
      $ordC1 = ord($c[1]);
6407
6408
      // ASCII - next please
6409
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6410
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6411
      }
6412
6413
      if ($ordC0 >= 224) {
6414
        $ordC2 = ord($c[2]);
6415
6416
        if ($ordC0 <= 239) {
6417
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6418
        }
6419
6420
        if ($ordC0 >= 240) {
6421
          $ordC3 = ord($c[3]);
6422
6423
          if ($ordC0 <= 247) {
6424
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6425
          }
6426
6427
          if ($ordC0 >= 248) {
6428
            $ordC4 = ord($c[4]);
6429
6430 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6431
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6432
            }
6433
6434
            if ($ordC0 >= 252) {
6435
              $ordC5 = ord($c[5]);
6436
6437 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6438
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6439
              }
6440
            }
6441
          }
6442
        }
6443
      }
6444
6445
      if ($ordC0 == 254 || $ordC0 == 255) {
6446
        $c = $unknown;
6447
        continue;
6448
      }
6449
6450
      if (!isset($ord)) {
6451
        $c = $unknown;
6452
        continue;
6453
      }
6454
6455
      $bank = $ord >> 8;
6456
      if (!isset($UTF8_TO_ASCII[$bank])) {
6457
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6458
        if ($UTF8_TO_ASCII[$bank] === false) {
6459
          $UTF8_TO_ASCII[$bank] = array();
6460
        }
6461
      }
6462
6463
      $newchar = $ord & 255;
6464
6465
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6466
6467
        // keep for debugging
6468
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6469
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6470
        echo "char: " . $c . "\n";
6471
        echo "ord: " . $ord . "\n";
6472
        echo "newchar: " . $newchar . "\n";
6473
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6474
        echo "bank:" . $bank . "\n\n";
6475
        */
6476
6477
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6478
      } else {
6479
6480
        // keep for debugging missing chars
6481
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6482
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6483
        echo "char: " . $c . "\n";
6484
        echo "ord: " . $ord . "\n";
6485
        echo "newchar: " . $newchar . "\n";
6486
        echo "bank:" . $bank . "\n\n";
6487
        */
6488
6489
        $c = $unknown;
6490
      }
6491
    }
6492
6493
    return implode('', $chars);
6494
  }
6495
6496
  /**
6497
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6498
   *
6499
   * @param string|string[] $str
6500
   *
6501
   * @return string|string[]
6502
   */
6503
  public static function to_iso8859($str)
6504
  {
6505
    if (is_array($str)) {
6506
6507
      /** @noinspection ForeachSourceInspection */
6508
      foreach ($str as $k => $v) {
6509
        /** @noinspection AlterInForeachInspection */
6510
        /** @noinspection OffsetOperationsInspection */
6511
        $str[$k] = self::to_iso8859($v);
6512
      }
6513
6514
      return $str;
6515
    }
6516
6517
    $str = (string)$str;
6518
6519
    if (!isset($str[0])) {
6520
      return '';
6521
    }
6522
6523
    return self::utf8_decode($str);
6524
  }
6525
6526
  /**
6527
   * alias for "UTF8::to_iso8859()"
6528
   *
6529
   * @see UTF8::to_iso8859()
6530
   *
6531
   * @param string|string[] $str
6532
   *
6533
   * @return string|string[]
6534
   */
6535
  public static function to_latin1($str)
6536
  {
6537
    return self::to_iso8859($str);
6538
  }
6539
6540
  /**
6541
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6542
   *
6543
   * <ul>
6544
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6545
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6546
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6547
   * case.</li>
6548
   * </ul>
6549
   *
6550
   * @param string|string[] $str                    <p>Any string or array.</p>
6551
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6552
   *
6553
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6554
   */
6555
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6556
  {
6557
    if (is_array($str)) {
6558
      /** @noinspection ForeachSourceInspection */
6559
      foreach ($str as $k => $v) {
6560
        /** @noinspection AlterInForeachInspection */
6561
        /** @noinspection OffsetOperationsInspection */
6562
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6563
      }
6564
6565
      return $str;
6566
    }
6567
6568
    $str = (string)$str;
6569
6570
    if (!isset($str[0])) {
6571
      return $str;
6572
    }
6573
6574
    $max = strlen($str);
6575
    $buf = '';
6576
6577
    /** @noinspection ForeachInvariantsInspection */
6578
    for ($i = 0; $i < $max; $i++) {
6579
      $c1 = $str[$i];
6580
6581
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
6582
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6583
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6584
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6585
6586
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
6587
6588
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
6589
            $buf .= $c1 . $c2;
6590
            $i++;
6591
          } else { // not valid UTF8 - convert it
6592
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6593
            $cc2 = ($c1 & "\x3f") | "\x80";
6594
            $buf .= $cc1 . $cc2;
6595
          }
6596
6597 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6598
6599
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
6600
            $buf .= $c1 . $c2 . $c3;
6601
            $i += 2;
6602
          } else { // not valid UTF8 - convert it
6603
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6604
            $cc2 = ($c1 & "\x3f") | "\x80";
6605
            $buf .= $cc1 . $cc2;
6606
          }
6607
6608
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
6609
6610 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6611
            $buf .= $c1 . $c2 . $c3 . $c4;
6612
            $i += 3;
6613
          } else { // not valid UTF8 - convert it
6614
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
6615
            $cc2 = ($c1 & "\x3f") | "\x80";
6616
            $buf .= $cc1 . $cc2;
6617
          }
6618
6619
        } else { // doesn't look like UTF8, but should be converted
6620
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
6621
          $cc2 = (($c1 & "\x3f") | "\x80");
6622
          $buf .= $cc1 . $cc2;
6623
        }
6624
6625
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
6626
6627
        $ordC1 = ord($c1);
6628
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
6629
          $buf .= self::$win1252ToUtf8[$ordC1];
6630
        } else {
6631
          $cc1 = (chr($ordC1 / 64) | "\xc0");
6632
          $cc2 = (($c1 & "\x3f") | "\x80");
6633
          $buf .= $cc1 . $cc2;
6634
        }
6635
6636
      } else { // it doesn't need conversion
6637
        $buf .= $c1;
6638
      }
6639
    }
6640
6641
    // decode unicode escape sequences
6642
    $buf = preg_replace_callback(
6643
        '/\\\\u([0-9a-f]{4})/i',
6644
        function ($match) {
6645
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6646
        },
6647
        $buf
6648
    );
6649
6650
    // decode UTF-8 codepoints
6651
    if ($decodeHtmlEntityToUtf8 === true) {
6652
      $buf = self::html_entity_decode($buf);
6653
    }
6654
6655
    return $buf;
6656
  }
6657
6658
  /**
6659
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6660
   *
6661
   * INFO: This is slower then "trim()"
6662
   *
6663
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6664
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6665
   *
6666
   * @param string $str   <p>The string to be trimmed</p>
6667
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6668
   *
6669
   * @return string <p>The trimmed string.</p>
6670
   */
6671
  public static function trim($str = '', $chars = INF)
6672
  {
6673
    $str = (string)$str;
6674
6675
    if (!isset($str[0])) {
6676
      return '';
6677
    }
6678
6679
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6680
    if ($chars === INF || !$chars) {
6681
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6682
    }
6683
6684
    return self::rtrim(self::ltrim($str, $chars), $chars);
6685
  }
6686
6687
  /**
6688
   * Makes string's first char uppercase.
6689
   *
6690
   * @param string  $str       <p>The input string.</p>
6691
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6692
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6693
   *
6694
   * @return string <p>The resulting string</p>
6695
   */
6696
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6697
  {
6698
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6699
  }
6700
6701
  /**
6702
   * alias for "UTF8::ucfirst()"
6703
   *
6704
   * @see UTF8::ucfirst()
6705
   *
6706
   * @param string  $word
6707
   * @param string  $encoding
6708
   * @param boolean $cleanUtf8
6709
   *
6710
   * @return string
6711
   */
6712
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6713
  {
6714
    return self::ucfirst($word, $encoding, $cleanUtf8);
6715
  }
6716
6717
  /**
6718
   * Uppercase for all words in the string.
6719
   *
6720
   * @param string   $str        <p>The input string.</p>
6721
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6722
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6723
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6724
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6725
   *
6726
   * @return string
6727
   */
6728
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6729
  {
6730
    if (!$str) {
6731
      return '';
6732
    }
6733
6734
    $words = self::str_to_words($str, $charlist);
6735
    $newwords = array();
6736
6737
    if (count($exceptions) > 0) {
6738
      $useExceptions = true;
6739
    } else {
6740
      $useExceptions = false;
6741
    }
6742
6743
    foreach ($words as $word) {
6744
6745
      if (!$word) {
6746
        continue;
6747
      }
6748
6749
      if (
6750
          ($useExceptions === false)
6751
          ||
6752
          (
6753
              $useExceptions === true
6754
              &&
6755
              !in_array($word, $exceptions, true)
6756
          )
6757
      ) {
6758
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6759
      }
6760
6761
      $newwords[] = $word;
6762
    }
6763
6764
    return implode('', $newwords);
6765
  }
6766
6767
  /**
6768
   * Multi decode html entity & fix urlencoded-win1252-chars.
6769
   *
6770
   * e.g:
6771
   * 'test+test'                     => 'test test'
6772
   * 'D&#252;sseldorf'               => 'Düsseldorf'
6773
   * 'D%FCsseldorf'                  => 'Düsseldorf'
6774
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
6775
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
6776
   * 'Düsseldorf'                   => 'Düsseldorf'
6777
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
6778
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
6779
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
6780
   *
6781
   * @param string $str          <p>The input string.</p>
6782
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
6783
   *
6784
   * @return string
6785
   */
6786 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6787
  {
6788
    $str = (string)$str;
6789
6790
    if (!isset($str[0])) {
6791
      return '';
6792
    }
6793
6794
    $pattern = '/%u([0-9a-f]{3,4})/i';
6795
    if (preg_match($pattern, $str)) {
6796
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
6797
    }
6798
6799
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
6800
6801
    do {
6802
      $str_compare = $str;
6803
6804
      $str = self::fix_simple_utf8(
6805
          urldecode(
6806
              self::html_entity_decode(
6807
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6808
                  $flags
6809
              )
6810
          )
6811
      );
6812
6813
    } while ($multi_decode === true && $str_compare !== $str);
6814
6815
    return (string)$str;
6816
  }
6817
6818
  /**
6819
   * Return a array with "urlencoded"-win1252 -> UTF-8
6820
   *
6821
   * @deprecated use the "UTF8::urldecode()" function to decode a string
6822
   *
6823
   * @return array
6824
   */
6825
  public static function urldecode_fix_win1252_chars()
6826
  {
6827
    static $array = array(
6828
        '%20' => ' ',
6829
        '%21' => '!',
6830
        '%22' => '"',
6831
        '%23' => '#',
6832
        '%24' => '$',
6833
        '%25' => '%',
6834
        '%26' => '&',
6835
        '%27' => "'",
6836
        '%28' => '(',
6837
        '%29' => ')',
6838
        '%2A' => '*',
6839
        '%2B' => '+',
6840
        '%2C' => ',',
6841
        '%2D' => '-',
6842
        '%2E' => '.',
6843
        '%2F' => '/',
6844
        '%30' => '0',
6845
        '%31' => '1',
6846
        '%32' => '2',
6847
        '%33' => '3',
6848
        '%34' => '4',
6849
        '%35' => '5',
6850
        '%36' => '6',
6851
        '%37' => '7',
6852
        '%38' => '8',
6853
        '%39' => '9',
6854
        '%3A' => ':',
6855
        '%3B' => ';',
6856
        '%3C' => '<',
6857
        '%3D' => '=',
6858
        '%3E' => '>',
6859
        '%3F' => '?',
6860
        '%40' => '@',
6861
        '%41' => 'A',
6862
        '%42' => 'B',
6863
        '%43' => 'C',
6864
        '%44' => 'D',
6865
        '%45' => 'E',
6866
        '%46' => 'F',
6867
        '%47' => 'G',
6868
        '%48' => 'H',
6869
        '%49' => 'I',
6870
        '%4A' => 'J',
6871
        '%4B' => 'K',
6872
        '%4C' => 'L',
6873
        '%4D' => 'M',
6874
        '%4E' => 'N',
6875
        '%4F' => 'O',
6876
        '%50' => 'P',
6877
        '%51' => 'Q',
6878
        '%52' => 'R',
6879
        '%53' => 'S',
6880
        '%54' => 'T',
6881
        '%55' => 'U',
6882
        '%56' => 'V',
6883
        '%57' => 'W',
6884
        '%58' => 'X',
6885
        '%59' => 'Y',
6886
        '%5A' => 'Z',
6887
        '%5B' => '[',
6888
        '%5C' => '\\',
6889
        '%5D' => ']',
6890
        '%5E' => '^',
6891
        '%5F' => '_',
6892
        '%60' => '`',
6893
        '%61' => 'a',
6894
        '%62' => 'b',
6895
        '%63' => 'c',
6896
        '%64' => 'd',
6897
        '%65' => 'e',
6898
        '%66' => 'f',
6899
        '%67' => 'g',
6900
        '%68' => 'h',
6901
        '%69' => 'i',
6902
        '%6A' => 'j',
6903
        '%6B' => 'k',
6904
        '%6C' => 'l',
6905
        '%6D' => 'm',
6906
        '%6E' => 'n',
6907
        '%6F' => 'o',
6908
        '%70' => 'p',
6909
        '%71' => 'q',
6910
        '%72' => 'r',
6911
        '%73' => 's',
6912
        '%74' => 't',
6913
        '%75' => 'u',
6914
        '%76' => 'v',
6915
        '%77' => 'w',
6916
        '%78' => 'x',
6917
        '%79' => 'y',
6918
        '%7A' => 'z',
6919
        '%7B' => '{',
6920
        '%7C' => '|',
6921
        '%7D' => '}',
6922
        '%7E' => '~',
6923
        '%7F' => '',
6924
        '%80' => '`',
6925
        '%81' => '',
6926
        '%82' => '‚',
6927
        '%83' => 'ƒ',
6928
        '%84' => '„',
6929
        '%85' => '…',
6930
        '%86' => '†',
6931
        '%87' => '‡',
6932
        '%88' => 'ˆ',
6933
        '%89' => '‰',
6934
        '%8A' => 'Š',
6935
        '%8B' => '‹',
6936
        '%8C' => 'Œ',
6937
        '%8D' => '',
6938
        '%8E' => 'Ž',
6939
        '%8F' => '',
6940
        '%90' => '',
6941
        '%91' => '‘',
6942
        '%92' => '’',
6943
        '%93' => '“',
6944
        '%94' => '”',
6945
        '%95' => '•',
6946
        '%96' => '–',
6947
        '%97' => '—',
6948
        '%98' => '˜',
6949
        '%99' => '™',
6950
        '%9A' => 'š',
6951
        '%9B' => '›',
6952
        '%9C' => 'œ',
6953
        '%9D' => '',
6954
        '%9E' => 'ž',
6955
        '%9F' => 'Ÿ',
6956
        '%A0' => '',
6957
        '%A1' => '¡',
6958
        '%A2' => '¢',
6959
        '%A3' => '£',
6960
        '%A4' => '¤',
6961
        '%A5' => '¥',
6962
        '%A6' => '¦',
6963
        '%A7' => '§',
6964
        '%A8' => '¨',
6965
        '%A9' => '©',
6966
        '%AA' => 'ª',
6967
        '%AB' => '«',
6968
        '%AC' => '¬',
6969
        '%AD' => '',
6970
        '%AE' => '®',
6971
        '%AF' => '¯',
6972
        '%B0' => '°',
6973
        '%B1' => '±',
6974
        '%B2' => '²',
6975
        '%B3' => '³',
6976
        '%B4' => '´',
6977
        '%B5' => 'µ',
6978
        '%B6' => '¶',
6979
        '%B7' => '·',
6980
        '%B8' => '¸',
6981
        '%B9' => '¹',
6982
        '%BA' => 'º',
6983
        '%BB' => '»',
6984
        '%BC' => '¼',
6985
        '%BD' => '½',
6986
        '%BE' => '¾',
6987
        '%BF' => '¿',
6988
        '%C0' => 'À',
6989
        '%C1' => 'Á',
6990
        '%C2' => 'Â',
6991
        '%C3' => 'Ã',
6992
        '%C4' => 'Ä',
6993
        '%C5' => 'Å',
6994
        '%C6' => 'Æ',
6995
        '%C7' => 'Ç',
6996
        '%C8' => 'È',
6997
        '%C9' => 'É',
6998
        '%CA' => 'Ê',
6999
        '%CB' => 'Ë',
7000
        '%CC' => 'Ì',
7001
        '%CD' => 'Í',
7002
        '%CE' => 'Î',
7003
        '%CF' => 'Ï',
7004
        '%D0' => 'Ð',
7005
        '%D1' => 'Ñ',
7006
        '%D2' => 'Ò',
7007
        '%D3' => 'Ó',
7008
        '%D4' => 'Ô',
7009
        '%D5' => 'Õ',
7010
        '%D6' => 'Ö',
7011
        '%D7' => '×',
7012
        '%D8' => 'Ø',
7013
        '%D9' => 'Ù',
7014
        '%DA' => 'Ú',
7015
        '%DB' => 'Û',
7016
        '%DC' => 'Ü',
7017
        '%DD' => 'Ý',
7018
        '%DE' => 'Þ',
7019
        '%DF' => 'ß',
7020
        '%E0' => 'à',
7021
        '%E1' => 'á',
7022
        '%E2' => 'â',
7023
        '%E3' => 'ã',
7024
        '%E4' => 'ä',
7025
        '%E5' => 'å',
7026
        '%E6' => 'æ',
7027
        '%E7' => 'ç',
7028
        '%E8' => 'è',
7029
        '%E9' => 'é',
7030
        '%EA' => 'ê',
7031
        '%EB' => 'ë',
7032
        '%EC' => 'ì',
7033
        '%ED' => 'í',
7034
        '%EE' => 'î',
7035
        '%EF' => 'ï',
7036
        '%F0' => 'ð',
7037
        '%F1' => 'ñ',
7038
        '%F2' => 'ò',
7039
        '%F3' => 'ó',
7040
        '%F4' => 'ô',
7041
        '%F5' => 'õ',
7042
        '%F6' => 'ö',
7043
        '%F7' => '÷',
7044
        '%F8' => 'ø',
7045
        '%F9' => 'ù',
7046
        '%FA' => 'ú',
7047
        '%FB' => 'û',
7048
        '%FC' => 'ü',
7049
        '%FD' => 'ý',
7050
        '%FE' => 'þ',
7051
        '%FF' => 'ÿ',
7052
    );
7053
7054
    return $array;
7055
  }
7056
7057
  /**
7058
   * Decodes an UTF-8 string to ISO-8859-1.
7059
   *
7060
   * @param string $str <p>The input string.</p>
7061
   *
7062
   * @return string
7063
   */
7064
  public static function utf8_decode($str)
7065
  {
7066
    // init
7067
    $str = (string)$str;
7068
7069
    if (!isset($str[0])) {
7070
      return '';
7071
    }
7072
7073
    $str = (string)self::to_utf8($str);
7074
7075
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7076
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7077
7078
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7079
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
7080
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
7081
    }
7082
7083
    /** @noinspection PhpInternalEntityUsedInspection */
7084
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
7085
  }
7086
7087
  /**
7088
   * Encodes an ISO-8859-1 string to UTF-8.
7089
   *
7090
   * @param string $str <p>The input string.</p>
7091
   *
7092
   * @return string
7093
   */
7094
  public static function utf8_encode($str)
7095
  {
7096
    // init
7097
    $str = (string)$str;
7098
7099
    if (!isset($str[0])) {
7100
      return '';
7101
    }
7102
7103
    $str = \utf8_encode($str);
7104
7105
    if (false === strpos($str, "\xC2")) {
7106
      return $str;
7107
    } else {
7108
7109
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7110
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7111
7112
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7113
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
7114
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
7115
      }
7116
7117
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7118
    }
7119
  }
7120
7121
  /**
7122
   * fix -> utf8-win1252 chars
7123
   *
7124
   * @param string $str <p>The input string.</p>
7125
   *
7126
   * @return string
7127
   *
7128
   * @deprecated use "UTF8::fix_simple_utf8()"
7129
   */
7130
  public static function utf8_fix_win1252_chars($str)
7131
  {
7132
    return self::fix_simple_utf8($str);
7133
  }
7134
7135
  /**
7136
   * Returns an array with all utf8 whitespace characters.
7137
   *
7138
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7139
   *
7140
   * @author: Derek E. [email protected]
7141
   *
7142
   * @return array <p>
7143
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7144
   *               as defined in above URL.
7145
   *               </p>
7146
   */
7147
  public static function whitespace_table()
7148
  {
7149
    return self::$whitespaceTable;
7150
  }
7151
7152
  /**
7153
   * Limit the number of words in a string.
7154
   *
7155
   * @param string $str      <p>The input string.</p>
7156
   * @param int    $words    <p>The limit of words as integer.</p>
7157
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7158
   *
7159
   * @return string
7160
   */
7161
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7162
  {
7163
    $str = (string)$str;
7164
7165
    if (!isset($str[0])) {
7166
      return '';
7167
    }
7168
7169
    $words = (int)$words;
7170
7171
    if ($words < 1) {
7172
      return '';
7173
    }
7174
7175
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7176
7177
    if (
7178
        !isset($matches[0])
7179
        ||
7180
        self::strlen($str) === self::strlen($matches[0])
7181
    ) {
7182
      return $str;
7183
    }
7184
7185
    return self::rtrim($matches[0]) . $strAddOn;
7186
  }
7187
7188
  /**
7189
   * Wraps a string to a given number of characters
7190
   *
7191
   * @link  http://php.net/manual/en/function.wordwrap.php
7192
   *
7193
   * @param string $str   <p>The input string.</p>
7194
   * @param int    $width [optional] <p>The column width.</p>
7195
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7196
   * @param bool   $cut   [optional] <p>
7197
   *                      If the cut is set to true, the string is
7198
   *                      always wrapped at or before the specified width. So if you have
7199
   *                      a word that is larger than the given width, it is broken apart.
7200
   *                      </p>
7201
   *
7202
   * @return string <p>The given string wrapped at the specified column.</p>
7203
   */
7204
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7205
  {
7206
    $str = (string)$str;
7207
    $break = (string)$break;
7208
7209
    if (!isset($str[0], $break[0])) {
7210
      return '';
7211
    }
7212
7213
    $w = '';
7214
    $strSplit = explode($break, $str);
7215
    $count = count($strSplit);
7216
7217
    $chars = array();
7218
    /** @noinspection ForeachInvariantsInspection */
7219
    for ($i = 0; $i < $count; ++$i) {
7220
7221
      if ($i) {
7222
        $chars[] = $break;
7223
        $w .= '#';
7224
      }
7225
7226
      $c = $strSplit[$i];
7227
      unset($strSplit[$i]);
7228
7229
      foreach (self::split($c) as $c) {
7230
        $chars[] = $c;
7231
        $w .= ' ' === $c ? ' ' : '?';
7232
      }
7233
    }
7234
7235
    $strReturn = '';
7236
    $j = 0;
7237
    $b = $i = -1;
7238
    $w = wordwrap($w, $width, '#', $cut);
7239
7240
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7241
      for (++$i; $i < $b; ++$i) {
7242
        $strReturn .= $chars[$j];
7243
        unset($chars[$j++]);
7244
      }
7245
7246
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7247
        unset($chars[$j++]);
7248
      }
7249
7250
      $strReturn .= $break;
7251
    }
7252
7253
    return $strReturn . implode('', $chars);
7254
  }
7255
7256
  /**
7257
   * Returns an array of Unicode White Space characters.
7258
   *
7259
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7260
   */
7261
  public static function ws()
7262
  {
7263
    return self::$whitespace;
7264
  }
7265
7266
}
7267