Completed
Push — master ( 5785c8...d32910 )
by Lars
16:54
created

UTF8::strip_whitespace()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 11
ccs 5
cts 5
cp 1
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 5
nc 2
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
7
/**
8
 * UTF8-Helper-Class
9
 *
10
 * @package voku\helper
11
 */
12
final class UTF8
13
{
14
  /**
15
   * @var array
16
   */
17
  private static $WIN1252_TO_UTF8 = array(
18
      128 => "\xe2\x82\xac", // EURO SIGN
19
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
20
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
21
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
22
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
23
      134 => "\xe2\x80\xa0", // DAGGER
24
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
25
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
26
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
27
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
28
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
29
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
30
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
31
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
32
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
33
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
34
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
35
      149 => "\xe2\x80\xa2", // BULLET
36
      150 => "\xe2\x80\x93", // EN DASH
37
      151 => "\xe2\x80\x94", // EM DASH
38
      152 => "\xcb\x9c", // SMALL TILDE
39
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
40
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
41
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
42
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
43
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
44
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
45
  );
46
47
  /**
48
   * @var array
49
   */
50
  private static $CP1252_TO_UTF8 = array(
51
      '€' => '€',
52
      '‚' => '‚',
53
      'ƒ' => 'ƒ',
54
      '„' => '„',
55
      '…' => '…',
56
      '†' => '†',
57
      '‡' => '‡',
58
      'ˆ' => 'ˆ',
59
      '‰' => '‰',
60
      'Š' => 'Š',
61
      '‹' => '‹',
62
      'Œ' => 'Œ',
63
      'Ž' => 'Ž',
64
      '‘' => '‘',
65
      '’' => '’',
66
      '“' => '“',
67
      '”' => '”',
68
      '•' => '•',
69
      '–' => '–',
70
      '—' => '—',
71
      '˜' => '˜',
72
      '™' => '™',
73
      'š' => 'š',
74
      '›' => '›',
75
      'œ' => 'œ',
76
      'ž' => 'ž',
77
      'Ÿ' => 'Ÿ',
78
  );
79
80
  /**
81
   * Bom => Byte-Length
82
   *
83
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
84
   *
85
   * @var array
86
   */
87
  private static $BOM = array(
88
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
89
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
90
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
91
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
94
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
95
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
96
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
97
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
98
  );
99
100
  /**
101
   * Numeric code point => UTF-8 Character
102
   *
103
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
104
   *
105
   * @var array
106
   */
107
  private static $WHITESPACE = array(
108
    // NUL Byte
109
    0     => "\x0",
110
    // Tab
111
    9     => "\x9",
112
    // New Line
113
    10    => "\xa",
114
    // Vertical Tab
115
    11    => "\xb",
116
    // Carriage Return
117
    13    => "\xd",
118
    // Ordinary Space
119
    32    => "\x20",
120
    // NO-BREAK SPACE
121
    160   => "\xc2\xa0",
122
    // OGHAM SPACE MARK
123
    5760  => "\xe1\x9a\x80",
124
    // MONGOLIAN VOWEL SEPARATOR
125
    6158  => "\xe1\xa0\x8e",
126
    // EN QUAD
127
    8192  => "\xe2\x80\x80",
128
    // EM QUAD
129
    8193  => "\xe2\x80\x81",
130
    // EN SPACE
131
    8194  => "\xe2\x80\x82",
132
    // EM SPACE
133
    8195  => "\xe2\x80\x83",
134
    // THREE-PER-EM SPACE
135
    8196  => "\xe2\x80\x84",
136
    // FOUR-PER-EM SPACE
137
    8197  => "\xe2\x80\x85",
138
    // SIX-PER-EM SPACE
139
    8198  => "\xe2\x80\x86",
140
    // FIGURE SPACE
141
    8199  => "\xe2\x80\x87",
142
    // PUNCTUATION SPACE
143
    8200  => "\xe2\x80\x88",
144
    // THIN SPACE
145
    8201  => "\xe2\x80\x89",
146
    //HAIR SPACE
147
    8202  => "\xe2\x80\x8a",
148
    // LINE SEPARATOR
149
    8232  => "\xe2\x80\xa8",
150
    // PARAGRAPH SEPARATOR
151
    8233  => "\xe2\x80\xa9",
152
    // NARROW NO-BREAK SPACE
153
    8239  => "\xe2\x80\xaf",
154
    // MEDIUM MATHEMATICAL SPACE
155
    8287  => "\xe2\x81\x9f",
156
    // IDEOGRAPHIC SPACE
157
    12288 => "\xe3\x80\x80",
158
  );
159
160
  /**
161
   * @var array
162
   */
163
  private static $WHITESPACE_TABLE = array(
164
      'SPACE'                     => "\x20",
165
      'NO-BREAK SPACE'            => "\xc2\xa0",
166
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
167
      'EN QUAD'                   => "\xe2\x80\x80",
168
      'EM QUAD'                   => "\xe2\x80\x81",
169
      'EN SPACE'                  => "\xe2\x80\x82",
170
      'EM SPACE'                  => "\xe2\x80\x83",
171
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
172
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
173
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
174
      'FIGURE SPACE'              => "\xe2\x80\x87",
175
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
176
      'THIN SPACE'                => "\xe2\x80\x89",
177
      'HAIR SPACE'                => "\xe2\x80\x8a",
178
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
179
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
180
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
181
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
182
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
183
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
184
  );
185
186
  /**
187
   * bidirectional text chars
188
   *
189
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
190
   *
191
   * @var array
192
   */
193
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
194
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
195
    8234 => "\xE2\x80\xAA",
196
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
197
    8235 => "\xE2\x80\xAB",
198
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
199
    8236 => "\xE2\x80\xAC",
200
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
201
    8237 => "\xE2\x80\xAD",
202
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
203
    8238 => "\xE2\x80\xAE",
204
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
205
    8294 => "\xE2\x81\xA6",
206
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
207
    8295 => "\xE2\x81\xA7",
208
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
209
    8296 => "\xE2\x81\xA8",
210
    // POP DIRECTIONAL ISOLATE
211
    8297 => "\xE2\x81\xA9",
212
  );
213
214
  /**
215
   * @var array
216
   */
217
  private static $COMMON_CASE_FOLD = array(
218
      'ſ'            => 's',
219
      "\xCD\x85"     => 'ι',
220
      'ς'            => 'σ',
221
      "\xCF\x90"     => 'β',
222
      "\xCF\x91"     => 'θ',
223
      "\xCF\x95"     => 'φ',
224
      "\xCF\x96"     => 'π',
225
      "\xCF\xB0"     => 'κ',
226
      "\xCF\xB1"     => 'ρ',
227
      "\xCF\xB5"     => 'ε',
228
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
229
      "\xE1\xBE\xBE" => 'ι',
230
  );
231
232
  /**
233
   * @var array
234
   */
235
  private static $BROKEN_UTF8_FIX = array(
236
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
237
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
238
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
239
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
240
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
241
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
242
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
243
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
244
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
245
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
246
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
247
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
248
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
249
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
250
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
251
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
252
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
253
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
254
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
255
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
256
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
257
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
258
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
259
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
260
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
261
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
262
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
263
      'ü'       => 'ü',
264
      'ä'       => 'ä',
265
      'ö'       => 'ö',
266
      'Ö'       => 'Ö',
267
      'ß'       => 'ß',
268
      'Ã '       => 'à',
269
      'á'       => 'á',
270
      'â'       => 'â',
271
      'ã'       => 'ã',
272
      'ù'       => 'ù',
273
      'ú'       => 'ú',
274
      'û'       => 'û',
275
      'Ù'       => 'Ù',
276
      'Ú'       => 'Ú',
277
      'Û'       => 'Û',
278
      'Ü'       => 'Ü',
279
      'ò'       => 'ò',
280
      'ó'       => 'ó',
281
      'ô'       => 'ô',
282
      'è'       => 'è',
283
      'é'       => 'é',
284
      'ê'       => 'ê',
285
      'ë'       => 'ë',
286
      'À'       => 'À',
287
      'Á'       => 'Á',
288
      'Â'       => 'Â',
289
      'Ã'       => 'Ã',
290
      'Ä'       => 'Ä',
291
      'Ã…'       => 'Å',
292
      'Ç'       => 'Ç',
293
      'È'       => 'È',
294
      'É'       => 'É',
295
      'Ê'       => 'Ê',
296
      'Ë'       => 'Ë',
297
      'ÃŒ'       => 'Ì',
298
      'Í'       => 'Í',
299
      'ÃŽ'       => 'Î',
300
      'Ï'       => 'Ï',
301
      'Ñ'       => 'Ñ',
302
      'Ã’'       => 'Ò',
303
      'Ó'       => 'Ó',
304
      'Ô'       => 'Ô',
305
      'Õ'       => 'Õ',
306
      'Ø'       => 'Ø',
307
      'Ã¥'       => 'å',
308
      'æ'       => 'æ',
309
      'ç'       => 'ç',
310
      'ì'       => 'ì',
311
      'í'       => 'í',
312
      'î'       => 'î',
313
      'ï'       => 'ï',
314
      'ð'       => 'ð',
315
      'ñ'       => 'ñ',
316
      'õ'       => 'õ',
317
      'ø'       => 'ø',
318
      'ý'       => 'ý',
319
      'ÿ'       => 'ÿ',
320
      '€'      => '€',
321
      '’'      => '’',
322
  );
323
324
  /**
325
   * @var array
326
   */
327
  private static $UTF8_TO_WIN1252 = array(
328
      "\xe2\x82\xac" => "\x80", // EURO SIGN
329
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
330
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
331
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
332
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
333
      "\xe2\x80\xa0" => "\x86", // DAGGER
334
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
335
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
336
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
337
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
338
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
339
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
340
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
341
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
342
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
343
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
344
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
345
      "\xe2\x80\xa2" => "\x95", // BULLET
346
      "\xe2\x80\x93" => "\x96", // EN DASH
347
      "\xe2\x80\x94" => "\x97", // EM DASH
348
      "\xcb\x9c"     => "\x98", // SMALL TILDE
349
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
350
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
351
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
352
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
353
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
354
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
355
  );
356
357
  /**
358
   * @var array
359
   */
360
  private static $UTF8_MSWORD = array(
361
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
362
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
363
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
364
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
365
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
366
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
367
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
368
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
369
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
370
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
371
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
372
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
373
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
374
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
375
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
376
  );
377
378
  /**
379
   * @var array
380
   */
381
  private static $ICONV_ENCODING = array(
382
      'ANSI_X3.4-1968',
383
      'ANSI_X3.4-1986',
384
      'ASCII',
385
      'CP367',
386
      'IBM367',
387
      'ISO-IR-6',
388
      'ISO646-US',
389
      'ISO_646.IRV:1991',
390
      'US',
391
      'US-ASCII',
392
      'CSASCII',
393
      'UTF-8',
394
      'ISO-10646-UCS-2',
395
      'UCS-2',
396
      'CSUNICODE',
397
      'UCS-2BE',
398
      'UNICODE-1-1',
399
      'UNICODEBIG',
400
      'CSUNICODE11',
401
      'UCS-2LE',
402
      'UNICODELITTLE',
403
      'ISO-10646-UCS-4',
404
      'UCS-4',
405
      'CSUCS4',
406
      'UCS-4BE',
407
      'UCS-4LE',
408
      'UTF-16',
409
      'UTF-16BE',
410
      'UTF-16LE',
411
      'UTF-32',
412
      'UTF-32BE',
413
      'UTF-32LE',
414
      'UNICODE-1-1-UTF-7',
415
      'UTF-7',
416
      'CSUNICODE11UTF7',
417
      'UCS-2-INTERNAL',
418
      'UCS-2-SWAPPED',
419
      'UCS-4-INTERNAL',
420
      'UCS-4-SWAPPED',
421
      'C99',
422
      'JAVA',
423
      'CP819',
424
      'IBM819',
425
      'ISO-8859-1',
426
      'ISO-IR-100',
427
      'ISO8859-1',
428
      'ISO_8859-1',
429
      'ISO_8859-1:1987',
430
      'L1',
431
      'LATIN1',
432
      'CSISOLATIN1',
433
      'ISO-8859-2',
434
      'ISO-IR-101',
435
      'ISO8859-2',
436
      'ISO_8859-2',
437
      'ISO_8859-2:1987',
438
      'L2',
439
      'LATIN2',
440
      'CSISOLATIN2',
441
      'ISO-8859-3',
442
      'ISO-IR-109',
443
      'ISO8859-3',
444
      'ISO_8859-3',
445
      'ISO_8859-3:1988',
446
      'L3',
447
      'LATIN3',
448
      'CSISOLATIN3',
449
      'ISO-8859-4',
450
      'ISO-IR-110',
451
      'ISO8859-4',
452
      'ISO_8859-4',
453
      'ISO_8859-4:1988',
454
      'L4',
455
      'LATIN4',
456
      'CSISOLATIN4',
457
      'CYRILLIC',
458
      'ISO-8859-5',
459
      'ISO-IR-144',
460
      'ISO8859-5',
461
      'ISO_8859-5',
462
      'ISO_8859-5:1988',
463
      'CSISOLATINCYRILLIC',
464
      'ARABIC',
465
      'ASMO-708',
466
      'ECMA-114',
467
      'ISO-8859-6',
468
      'ISO-IR-127',
469
      'ISO8859-6',
470
      'ISO_8859-6',
471
      'ISO_8859-6:1987',
472
      'CSISOLATINARABIC',
473
      'ECMA-118',
474
      'ELOT_928',
475
      'GREEK',
476
      'GREEK8',
477
      'ISO-8859-7',
478
      'ISO-IR-126',
479
      'ISO8859-7',
480
      'ISO_8859-7',
481
      'ISO_8859-7:1987',
482
      'ISO_8859-7:2003',
483
      'CSISOLATINGREEK',
484
      'HEBREW',
485
      'ISO-8859-8',
486
      'ISO-IR-138',
487
      'ISO8859-8',
488
      'ISO_8859-8',
489
      'ISO_8859-8:1988',
490
      'CSISOLATINHEBREW',
491
      'ISO-8859-9',
492
      'ISO-IR-148',
493
      'ISO8859-9',
494
      'ISO_8859-9',
495
      'ISO_8859-9:1989',
496
      'L5',
497
      'LATIN5',
498
      'CSISOLATIN5',
499
      'ISO-8859-10',
500
      'ISO-IR-157',
501
      'ISO8859-10',
502
      'ISO_8859-10',
503
      'ISO_8859-10:1992',
504
      'L6',
505
      'LATIN6',
506
      'CSISOLATIN6',
507
      'ISO-8859-11',
508
      'ISO8859-11',
509
      'ISO_8859-11',
510
      'ISO-8859-13',
511
      'ISO-IR-179',
512
      'ISO8859-13',
513
      'ISO_8859-13',
514
      'L7',
515
      'LATIN7',
516
      'ISO-8859-14',
517
      'ISO-CELTIC',
518
      'ISO-IR-199',
519
      'ISO8859-14',
520
      'ISO_8859-14',
521
      'ISO_8859-14:1998',
522
      'L8',
523
      'LATIN8',
524
      'ISO-8859-15',
525
      'ISO-IR-203',
526
      'ISO8859-15',
527
      'ISO_8859-15',
528
      'ISO_8859-15:1998',
529
      'LATIN-9',
530
      'ISO-8859-16',
531
      'ISO-IR-226',
532
      'ISO8859-16',
533
      'ISO_8859-16',
534
      'ISO_8859-16:2001',
535
      'L10',
536
      'LATIN10',
537
      'KOI8-R',
538
      'CSKOI8R',
539
      'KOI8-U',
540
      'KOI8-RU',
541
      'CP1250',
542
      'MS-EE',
543
      'WINDOWS-1250',
544
      'CP1251',
545
      'MS-CYRL',
546
      'WINDOWS-1251',
547
      'CP1252',
548
      'MS-ANSI',
549
      'WINDOWS-1252',
550
      'CP1253',
551
      'MS-GREEK',
552
      'WINDOWS-1253',
553
      'CP1254',
554
      'MS-TURK',
555
      'WINDOWS-1254',
556
      'CP1255',
557
      'MS-HEBR',
558
      'WINDOWS-1255',
559
      'CP1256',
560
      'MS-ARAB',
561
      'WINDOWS-1256',
562
      'CP1257',
563
      'WINBALTRIM',
564
      'WINDOWS-1257',
565
      'CP1258',
566
      'WINDOWS-1258',
567
      '850',
568
      'CP850',
569
      'IBM850',
570
      'CSPC850MULTILINGUAL',
571
      '862',
572
      'CP862',
573
      'IBM862',
574
      'CSPC862LATINHEBREW',
575
      '866',
576
      'CP866',
577
      'IBM866',
578
      'CSIBM866',
579
      'MAC',
580
      'MACINTOSH',
581
      'MACROMAN',
582
      'CSMACINTOSH',
583
      'MACCENTRALEUROPE',
584
      'MACICELAND',
585
      'MACCROATIAN',
586
      'MACROMANIA',
587
      'MACCYRILLIC',
588
      'MACUKRAINE',
589
      'MACGREEK',
590
      'MACTURKISH',
591
      'MACHEBREW',
592
      'MACARABIC',
593
      'MACTHAI',
594
      'HP-ROMAN8',
595
      'R8',
596
      'ROMAN8',
597
      'CSHPROMAN8',
598
      'NEXTSTEP',
599
      'ARMSCII-8',
600
      'GEORGIAN-ACADEMY',
601
      'GEORGIAN-PS',
602
      'KOI8-T',
603
      'CP154',
604
      'CYRILLIC-ASIAN',
605
      'PT154',
606
      'PTCP154',
607
      'CSPTCP154',
608
      'KZ-1048',
609
      'RK1048',
610
      'STRK1048-2002',
611
      'CSKZ1048',
612
      'MULELAO-1',
613
      'CP1133',
614
      'IBM-CP1133',
615
      'ISO-IR-166',
616
      'TIS-620',
617
      'TIS620',
618
      'TIS620-0',
619
      'TIS620.2529-1',
620
      'TIS620.2533-0',
621
      'TIS620.2533-1',
622
      'CP874',
623
      'WINDOWS-874',
624
      'VISCII',
625
      'VISCII1.1-1',
626
      'CSVISCII',
627
      'TCVN',
628
      'TCVN-5712',
629
      'TCVN5712-1',
630
      'TCVN5712-1:1993',
631
      'ISO-IR-14',
632
      'ISO646-JP',
633
      'JIS_C6220-1969-RO',
634
      'JP',
635
      'CSISO14JISC6220RO',
636
      'JISX0201-1976',
637
      'JIS_X0201',
638
      'X0201',
639
      'CSHALFWIDTHKATAKANA',
640
      'ISO-IR-87',
641
      'JIS0208',
642
      'JIS_C6226-1983',
643
      'JIS_X0208',
644
      'JIS_X0208-1983',
645
      'JIS_X0208-1990',
646
      'X0208',
647
      'CSISO87JISX0208',
648
      'ISO-IR-159',
649
      'JIS_X0212',
650
      'JIS_X0212-1990',
651
      'JIS_X0212.1990-0',
652
      'X0212',
653
      'CSISO159JISX02121990',
654
      'CN',
655
      'GB_1988-80',
656
      'ISO-IR-57',
657
      'ISO646-CN',
658
      'CSISO57GB1988',
659
      'CHINESE',
660
      'GB_2312-80',
661
      'ISO-IR-58',
662
      'CSISO58GB231280',
663
      'CN-GB-ISOIR165',
664
      'ISO-IR-165',
665
      'ISO-IR-149',
666
      'KOREAN',
667
      'KSC_5601',
668
      'KS_C_5601-1987',
669
      'KS_C_5601-1989',
670
      'CSKSC56011987',
671
      'EUC-JP',
672
      'EUCJP',
673
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
674
      'CSEUCPKDFMTJAPANESE',
675
      'MS_KANJI',
676
      'SHIFT-JIS',
677
      'SHIFT_JIS',
678
      'SJIS',
679
      'CSSHIFTJIS',
680
      'CP932',
681
      'ISO-2022-JP',
682
      'CSISO2022JP',
683
      'ISO-2022-JP-1',
684
      'ISO-2022-JP-2',
685
      'CSISO2022JP2',
686
      'CN-GB',
687
      'EUC-CN',
688
      'EUCCN',
689
      'GB2312',
690
      'CSGB2312',
691
      'GBK',
692
      'CP936',
693
      'MS936',
694
      'WINDOWS-936',
695
      'GB18030',
696
      'ISO-2022-CN',
697
      'CSISO2022CN',
698
      'ISO-2022-CN-EXT',
699
      'HZ',
700
      'HZ-GB-2312',
701
      'EUC-TW',
702
      'EUCTW',
703
      'CSEUCTW',
704
      'BIG-5',
705
      'BIG-FIVE',
706
      'BIG5',
707
      'BIGFIVE',
708
      'CN-BIG5',
709
      'CSBIG5',
710
      'CP950',
711
      'BIG5-HKSCS:1999',
712
      'BIG5-HKSCS:2001',
713
      'BIG5-HKSCS',
714
      'BIG5-HKSCS:2004',
715
      'BIG5HKSCS',
716
      'EUC-KR',
717
      'EUCKR',
718
      'CSEUCKR',
719
      'CP949',
720
      'UHC',
721
      'CP1361',
722
      'JOHAB',
723
      'ISO-2022-KR',
724
      'CSISO2022KR',
725
      'CP856',
726
      'CP922',
727
      'CP943',
728
      'CP1046',
729
      'CP1124',
730
      'CP1129',
731
      'CP1161',
732
      'IBM-1161',
733
      'IBM1161',
734
      'CSIBM1161',
735
      'CP1162',
736
      'IBM-1162',
737
      'IBM1162',
738
      'CSIBM1162',
739
      'CP1163',
740
      'IBM-1163',
741
      'IBM1163',
742
      'CSIBM1163',
743
      'DEC-KANJI',
744
      'DEC-HANYU',
745
      '437',
746
      'CP437',
747
      'IBM437',
748
      'CSPC8CODEPAGE437',
749
      'CP737',
750
      'CP775',
751
      'IBM775',
752
      'CSPC775BALTIC',
753
      '852',
754
      'CP852',
755
      'IBM852',
756
      'CSPCP852',
757
      'CP853',
758
      '855',
759
      'CP855',
760
      'IBM855',
761
      'CSIBM855',
762
      '857',
763
      'CP857',
764
      'IBM857',
765
      'CSIBM857',
766
      'CP858',
767
      '860',
768
      'CP860',
769
      'IBM860',
770
      'CSIBM860',
771
      '861',
772
      'CP-IS',
773
      'CP861',
774
      'IBM861',
775
      'CSIBM861',
776
      '863',
777
      'CP863',
778
      'IBM863',
779
      'CSIBM863',
780
      'CP864',
781
      'IBM864',
782
      'CSIBM864',
783
      '865',
784
      'CP865',
785
      'IBM865',
786
      'CSIBM865',
787
      '869',
788
      'CP-GR',
789
      'CP869',
790
      'IBM869',
791
      'CSIBM869',
792
      'CP1125',
793
      'EUC-JISX0213',
794
      'SHIFT_JISX0213',
795
      'ISO-2022-JP-3',
796
      'BIG5-2003',
797
      'ISO-IR-230',
798
      'TDS565',
799
      'ATARI',
800
      'ATARIST',
801
      'RISCOS-LATIN1',
802
  );
803
804
  /**
805
   * @var array
806
   */
807 1
  private static $SUPPORT = array();
808
809 1
  /**
810 1
   * __construct()
811
   */
812
  public function __construct()
813
  {
814
    self::checkForSupport();
815
  }
816
817
  /**
818
   * Return the character at the specified position: $str[1] like functionality.
819
   *
820 2
   * @param string $str <p>A UTF-8 string.</p>
821
   * @param int    $pos <p>The position of character to return.</p>
822 2
   *
823
   * @return string <p>Single Multi-Byte character.</p>
824
   */
825
  public static function access($str, $pos)
826
  {
827
    $str = (string)$str;
828
    $pos = (int)$pos;
829
830
    if (!isset($str[0])) {
831
      return '';
832
    }
833
834 1
    if ($pos < 0) {
835
      return '';
836 1
    }
837 1
838 1
    return self::substr($str, $pos, 1);
839
  }
840 1
841
  /**
842
   * Prepends UTF-8 BOM character to the string and returns the whole string.
843
   *
844
   * INFO: If BOM already existed there, the Input string is returned.
845
   *
846
   * @param string $str <p>The input string.</p>
847
   *
848
   * @return string <p>The output string that contains BOM.</p>
849
   */
850 1
  public static function add_bom_to_string($str)
851
  {
852 1
    if (self::string_has_bom($str) === false) {
853
      $str = self::bom() . $str;
854
    }
855
856
    return $str;
857
  }
858
859
  /**
860 2
   * Convert binary into an string.
861
   *
862 2
   * @param mixed $bin 1|0
863
   *
864
   * @return string
865
   */
866
  public static function binary_to_str($bin)
867
  {
868
    if (!isset($bin[0])) {
869
      return '';
870
    }
871
872
    return pack('H*', base_convert($bin, 2, 16));
873
  }
874 1
875
  /**
876 1
   * Returns the UTF-8 Byte Order Mark Character.
877
   *
878
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
879
   *
880
   * @return string UTF-8 Byte Order Mark
881
   */
882
  public static function bom()
883
  {
884 2
    return "\xef\xbb\xbf";
885
  }
886 2
887
  /**
888 1
   * @alias of UTF8::chr_map()
889
   *
890 1
   * @see   UTF8::chr_map()
891 1
   *
892 1
   * @param string|array $callback
893 1
   * @param string       $str
894 1
   *
895 1
   * @return array
896 2
   */
897
  public static function callback($callback, $str)
898
  {
899
    return self::chr_map($callback, $str);
900
  }
901
902
  /**
903
   * This method will auto-detect your server environment for UTF-8 support.
904
   *
905
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
906
   */
907 9
  public static function checkForSupport()
908
  {
909 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
910 9
911 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
912
913
      // http://php.net/manual/en/book.mbstring.php
914 9
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
915
916
      if (
917
          defined('MB_OVERLOAD_STRING')
918 9
          &&
919
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
920
      ) {
921
        self::$SUPPORT['mbstring_func_overload'] = true;
922
      } else {
923 9
        self::$SUPPORT['mbstring_func_overload'] = false;
924 9
      }
925 8
926
      // http://php.net/manual/en/book.iconv.php
927
      self::$SUPPORT['iconv'] = self::iconv_loaded();
928
929 8
      // http://php.net/manual/en/book.intl.php
930 6
      self::$SUPPORT['intl'] = self::intl_loaded();
931
932
      // http://php.net/manual/en/class.intlchar.php
933 7
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
934 6
935 6
      // http://php.net/manual/en/book.pcre.php
936
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
937
    }
938 7
  }
939 7
940 7
  /**
941 7
   * Generates a UTF-8 encoded character from the given code point.
942
   *
943
   * INFO: opposite to UTF8::ord()
944 1
   *
945 1
   * @param int    $code_point <p>The code point for which to generate a character.</p>
946 1
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
947 1
   *
948 1
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
949
   */
950
  public static function chr($code_point, $encoding = 'UTF-8')
951
  {
952
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
953
      self::checkForSupport();
954
    }
955
956
    if ($encoding !== 'UTF-8') {
957
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
958
    } elseif (self::$SUPPORT['intlChar'] === true) {
959
      return \IntlChar::chr($code_point);
960
    }
961
962
    // check type of code_point, only if there is no support for "\IntlChar"
963 1
    $i = (int)$code_point;
964
    if ($i !== $code_point) {
965 1
      return null;
966
    }
967 1
968
    // use static cache, only if there is no support for "\IntlChar"
969
    static $CHAR_CACHE = array();
970
    $cacheKey = $code_point . $encoding;
971
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
972
      return $CHAR_CACHE[$cacheKey];
973
    }
974
975
    if (0x80 > $code_point %= 0x200000) {
976
      $str = self::chr_and_parse_int($code_point);
977
    } elseif (0x800 > $code_point) {
978
      $str = self::chr_and_parse_int(0xC0 | $code_point >> 6) .
979
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
980
    } elseif (0x10000 > $code_point) {
981
      $str = self::chr_and_parse_int(0xE0 | $code_point >> 12) .
982 4
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
983
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
984 4
    } else {
985 3
      $str = self::chr_and_parse_int(0xF0 | $code_point >> 18) .
986
             self::chr_and_parse_int(0x80 | $code_point >> 12 & 0x3F) .
987
             self::chr_and_parse_int(0x80 | $code_point >> 6 & 0x3F) .
988 4
             self::chr_and_parse_int(0x80 | $code_point & 0x3F);
989
    }
990
991
    if ($encoding !== 'UTF-8') {
992
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
993
    }
994
995
    // add into static cache
996
    $CHAR_CACHE[$cacheKey] = $str;
997
998 2
    return $str;
999
  }
1000 2
1001 2
  /**
1002 2
   * @param int $int
1003
   *
1004 2
   * @return string
1005
   */
1006 2
  private static function chr_and_parse_int($int)
1007
  {
1008
    return chr((int)$int);
1009 2
  }
1010
1011 2
  /**
1012 2
   * Applies callback to all characters of a string.
1013 2
   *
1014
   * @param string|array $callback <p>The callback function.</p>
1015 1
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1016 1
   *
1017 1
   * @return array <p>The outcome of callback.</p>
1018
   */
1019
  public static function chr_map($callback, $str)
1020
  {
1021
    $chars = self::split($str);
1022
1023 2
    return array_map($callback, $chars);
1024
  }
1025 2
1026 2
  /**
1027
   * Generates an array of byte length of each character of a Unicode string.
1028 2
   *
1029
   * 1 byte => U+0000  - U+007F
1030
   * 2 byte => U+0080  - U+07FF
1031
   * 3 byte => U+0800  - U+FFFF
1032
   * 4 byte => U+10000 - U+10FFFF
1033
   *
1034
   * @param string $str <p>The original Unicode string.</p>
1035
   *
1036
   * @return array <p>An array of byte lengths of each character.</p>
1037
   */
1038
  public static function chr_size_list($str)
1039 1
  {
1040
    $str = (string)$str;
1041 1
1042
    if (!isset($str[0])) {
1043
      return array();
1044
    }
1045
1046
    return array_map(
1047
        function ($data) {
1048
          return UTF8::strlen($data, '8BIT');
1049
        },
1050
        self::split($str)
1051
    );
1052
  }
1053 1
1054
  /**
1055 1
   * Get a decimal code representation of a specific character.
1056
   *
1057
   * @param string $char <p>The input character.</p>
1058
   *
1059
   * @return int
1060
   */
1061
  public static function chr_to_decimal($char)
1062
  {
1063
    $char = (string)$char;
1064
    $code = self::ord($char[0]);
1065
    $bytes = 1;
1066
1067
    if (!($code & 0x80)) {
1068
      // 0xxxxxxx
1069
      return $code;
1070
    }
1071 44
1072
    if (($code & 0xe0) === 0xc0) {
1073
      // 110xxxxx
1074
      $bytes = 2;
1075
      $code &= ~0xc0;
1076
    } elseif (($code & 0xf0) === 0xe0) {
1077
      // 1110xxxx
1078
      $bytes = 3;
1079
      $code &= ~0xe0;
1080
    } elseif (($code & 0xf8) === 0xf0) {
1081
      // 11110xxx
1082
      $bytes = 4;
1083
      $code &= ~0xf0;
1084
    }
1085
1086 44
    for ($i = 2; $i <= $bytes; $i++) {
1087 44
      // 10xxxxxx
1088
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1089 44
    }
1090 44
1091
    return $code;
1092 44
  }
1093 17
1094 17
  /**
1095
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1096 44
   *
1097 12
   * @param string $char <p>The input character</p>
1098 12
   * @param string $pfix [optional]
1099
   *
1100 44
   * @return string <p>The code point encoded as U+xxxx<p>
1101 5
   */
1102 5
  public static function chr_to_hex($char, $pfix = 'U+')
1103
  {
1104 44
    $char = (string)$char;
1105
1106
    if (!isset($char[0])) {
1107
      return '';
1108
    }
1109
1110
    if ($char === '&#0;') {
1111
      $char = '';
1112
    }
1113
1114 4
    return self::int_to_hex(self::ord($char), $pfix);
1115
  }
1116 4
1117
  /**
1118 4
   * alias for "UTF8::chr_to_decimal()"
1119 1
   *
1120
   * @see UTF8::chr_to_decimal()
1121
   *
1122
   * @param string $chr
1123 4
   *
1124
   * @return int
1125
   */
1126
  public static function chr_to_int($chr)
1127
  {
1128
    return self::chr_to_decimal($chr);
1129
  }
1130 4
1131
  /**
1132 4
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1133
   *
1134
   * @param string $body     <p>The original string to be split.</p>
1135
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1136
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1137
   *
1138
   * @return string <p>The chunked string</p>
1139
   */
1140
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1141
  {
1142
    return implode($end, self::split($body, $chunklen));
1143
  }
1144
1145
  /**
1146 5
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1147
   *
1148 5
   * @param string $str                     <p>The string to be sanitized.</p>
1149 5
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1150 5
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1151
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1152 5
   *                                        => "..."</p>
1153
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1154 5
   *                                        $normalize_whitespace</p>
1155 5
   *
1156 5
   * @return string <p>Clean UTF-8 encoded string.</p>
1157
   */
1158 5
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1159
  {
1160 5
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1161 1
    // caused connection reset problem on larger strings
1162
1163 1
    $regx = '/
1164 1
      (
1165 1
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1166
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1167 1
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1168 1
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1169
        ){1,100}                      # ...one or more times
1170 5
      )
1171
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1172
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1173
    /x';
1174
    $str = preg_replace($regx, '$1', $str);
1175
1176
    $str = self::replace_diamond_question_mark($str, '');
1177
    $str = self::remove_invisible_characters($str);
1178
1179
    if ($normalize_whitespace === true) {
1180
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1181
    }
1182 6
1183
    if ($normalize_msword === true) {
1184 6
      $str = self::normalize_msword($str);
1185
    }
1186
1187
    if ($remove_bom === true) {
1188
      $str = self::remove_bom($str);
1189
    }
1190
1191
    return $str;
1192
  }
1193
1194 1
  /**
1195
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1196 1
   *
1197 1
   * @param string $str <p>The input string.</p>
1198 1
   *
1199
   * @return string
1200 1
   */
1201
  public static function cleanup($str)
1202
  {
1203
    $str = (string)$str;
1204
1205
    if (!isset($str[0])) {
1206
      return '';
1207
    }
1208
1209
    // fixed ISO <-> UTF-8 Errors
1210
    $str = self::fix_simple_utf8($str);
1211
1212
    // remove all none UTF-8 symbols
1213
    // && remove diamond question mark (�)
1214
    // && remove remove invisible characters (e.g. "\0")
1215
    // && remove BOM
1216 11
    // && normalize whitespace chars (but keep non-breaking-spaces)
1217
    $str = self::clean($str, true, true, false, true);
1218 11
1219 11
    return (string)$str;
1220
  }
1221 11
1222 5
  /**
1223
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1224
   *
1225 11
   * INFO: opposite to UTF8::string()
1226 1
   *
1227 1
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1228
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1229 11
   *                                    default, code points will be returned as integers.</p>
1230
   *
1231
   * @return array <p>The array of code points.</p>
1232
   */
1233 11
  public static function codepoints($arg, $u_style = false)
1234
  {
1235
    if (is_string($arg) === true) {
1236 11
      $arg = self::split($arg);
1237
    }
1238 1
1239 11
    $arg = array_map(
1240
        array(
1241
            '\\voku\\helper\\UTF8',
1242
            'ord',
1243 11
        ),
1244
        $arg
1245
    );
1246 11
1247 1
    if ($u_style) {
1248 1
      $arg = array_map(
1249 1
          array(
1250 11
              '\\voku\\helper\\UTF8',
1251 11
              'int_to_hex',
1252
          ),
1253
          $arg
1254
      );
1255
    }
1256 2
1257
    return $arg;
1258
  }
1259 1
1260
  /**
1261
   * Returns count of characters used in a string.
1262 2
   *
1263 1
   * @param string $str       <p>The input string.</p>
1264
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1265
   *
1266 2
   * @return array <p>An associative array of Character as keys and
1267 2
   *               their count as values.</p>
1268 2
   */
1269
  public static function count_chars($str, $cleanUtf8 = false)
1270 2
  {
1271
    return array_count_values(self::split($str, 1, $cleanUtf8));
1272 2
  }
1273 2
1274
  /**
1275
   * Converts a int-value into an UTF-8 character.
1276
   *
1277 1
   * @param mixed $int
1278
   *
1279
   * @return string
1280
   */
1281
  public static function decimal_to_chr($int)
1282
  {
1283
    if (Bootup::is_php('5.4') === true) {
1284
      $flags = ENT_QUOTES | ENT_HTML5;
1285
    } else {
1286
      $flags = ENT_QUOTES;
1287
    }
1288
1289
    return self::html_entity_decode('&#' . $int . ';', $flags);
1290
  }
1291
1292
  /**
1293
   * Encode a string with a new charset-encoding.
1294
   *
1295
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1296
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1297
   *
1298
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1299
   * @param string $str      <p>The input string</p>
1300
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1301
   *                         /> otherwise we auto-detect the current string-encoding</p>
1302
   *
1303
   * @return string
1304
   */
1305
  public static function encode($encoding, $str, $force = true)
1306
  {
1307
    $str = (string)$str;
1308
    $encoding = (string)$encoding;
1309
1310
    if (!isset($str[0], $encoding[0])) {
1311
      return $str;
1312
    }
1313
1314
    if ($encoding !== 'UTF-8') {
1315
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1316
    }
1317
1318
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1319
      self::checkForSupport();
1320
    }
1321
1322
    $encodingDetected = self::str_detect_encoding($str);
1323
1324
    if (
1325
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1326
        &&
1327
        (
1328
            $force === true
1329
            ||
1330
            $encodingDetected !== $encoding
1331
        )
1332
    ) {
1333
1334
      if (
1335
          $encoding === 'UTF-8'
1336
          &&
1337
          (
1338
              $force === true
1339
              || $encodingDetected === 'UTF-8'
1340
              || $encodingDetected === 'WINDOWS-1252'
1341
              || $encodingDetected === 'ISO-8859-1'
1342
          )
1343
      ) {
1344
        return self::to_utf8($str);
1345
      }
1346
1347
      if (
1348
          $encoding === 'ISO-8859-1'
1349
          &&
1350
          (
1351
              $force === true
1352
              || $encodingDetected === 'ISO-8859-1'
1353
              || $encodingDetected === 'UTF-8'
1354
          )
1355
      ) {
1356
        return self::to_iso8859($str);
1357
      }
1358
1359
      if (
1360
          $encoding !== 'UTF-8'
1361
          &&
1362 2
          $encoding !== 'WINDOWS-1252'
1363
          &&
1364
          self::$SUPPORT['mbstring'] === false
1365 2
      ) {
1366 2
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1367
      }
1368 2
1369 2
      $strEncoded = \mb_convert_encoding(
1370
          $str,
1371
          $encoding,
1372
          $encodingDetected
1373 2
      );
1374 2
1375
      if ($strEncoded) {
1376 2
        return $strEncoded;
1377 2
      }
1378
    }
1379 2
1380 1
    return $str;
1381 1
  }
1382 2
1383
  /**
1384
   * Reads entire file into a string.
1385
   *
1386 2
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1387 1
   *
1388
   * @link http://php.net/manual/en/function.file-get-contents.php
1389
   *
1390 1
   * @param string        $filename      <p>
1391 1
   *                                     Name of the file to read.
1392 1
   *                                     </p>
1393 1
   * @param int|false     $flags         [optional] <p>
1394
   *                                     Prior to PHP 6, this parameter is called
1395 1
   *                                     use_include_path and is a bool.
1396
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1397
   *                                     to trigger include path
1398
   *                                     search.
1399
   *                                     </p>
1400
   *                                     <p>
1401
   *                                     The value of flags can be any combination of
1402
   *                                     the following flags (with some restrictions), joined with the
1403
   *                                     binary OR (|)
1404
   *                                     operator.
1405 1
   *                                     </p>
1406
   *                                     <p>
1407 1
   *                                     <table>
1408
   *                                     Available flags
1409
   *                                     <tr valign="top">
1410
   *                                     <td>Flag</td>
1411
   *                                     <td>Description</td>
1412
   *                                     </tr>
1413
   *                                     <tr valign="top">
1414
   *                                     <td>
1415
   *                                     FILE_USE_INCLUDE_PATH
1416
   *                                     </td>
1417
   *                                     <td>
1418
   *                                     Search for filename in the include directory.
1419 9
   *                                     See include_path for more
1420
   *                                     information.
1421 9
   *                                     </td>
1422 9
   *                                     </tr>
1423 3
   *                                     <tr valign="top">
1424
   *                                     <td>
1425 3
   *                                     FILE_TEXT
1426 3
   *                                     </td>
1427 3
   *                                     <td>
1428 9
   *                                     As of PHP 6, the default encoding of the read
1429 2
   *                                     data is UTF-8. You can specify a different encoding by creating a
1430 2
   *                                     custom context or by changing the default using
1431 2
   *                                     stream_default_encoding. This flag cannot be
1432 2
   *                                     used with FILE_BINARY.
1433 9
   *                                     </td>
1434
   *                                     </tr>
1435 8
   *                                     <tr valign="top">
1436
   *                                     <td>
1437 2
   *                                     FILE_BINARY
1438 2
   *                                     </td>
1439
   *                                     <td>
1440 8
   *                                     With this flag, the file is read in binary mode. This is the default
1441
   *                                     setting and cannot be used with FILE_TEXT.
1442 8
   *                                     </td>
1443 6
   *                                     </tr>
1444 6
   *                                     </table>
1445 6
   *                                     </p>
1446
   * @param resource|null $context       [optional] <p>
1447 6
   *                                     A valid context resource created with
1448 3
   *                                     stream_context_create. If you don't need to use a
1449 3
   *                                     custom context, you can skip this parameter by &null;.
1450 5
   *                                     </p>
1451
   * @param int|null      $offset        [optional] <p>
1452
   *                                     The offset where the reading starts.
1453
   *                                     </p>
1454
   * @param int|null      $maxlen        [optional] <p>
1455 8
   *                                     Maximum length of data read. The default is to read until end
1456 8
   *                                     of file is reached.
1457 5
   *                                     </p>
1458 8
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1459
   *
1460
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1461 2
   *                                     or pdf, because they used non default utf-8 chars</p>
1462 2
   *
1463 8
   * @return string <p>The function returns the read data or false on failure.</p>
1464 8
   */
1465 9
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1466
  {
1467 9
    // init
1468
    $timeout = (int)$timeout;
1469
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1470
1471
    if ($timeout && $context === null) {
1472
      $context = stream_context_create(
1473
          array(
1474
              'http' =>
1475
                  array(
1476
                      'timeout' => $timeout,
1477
                  ),
1478
          )
1479
      );
1480
    }
1481
1482
    if (!$flags) {
1483
      $flags = false;
1484
    }
1485
1486
    if ($offset === null) {
1487
      $offset = 0;
1488
    }
1489
1490
    if (is_int($maxlen) === true) {
1491
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1492
    } else {
1493
      $data = file_get_contents($filename, $flags, $context, $offset);
1494
    }
1495
1496
    // return false on error
1497
    if ($data === false) {
1498
      return false;
1499
    }
1500
1501
    if ($convertToUtf8 === true) {
1502
      $data = self::encode('UTF-8', $data, false);
1503
      $data = self::cleanup($data);
1504
    }
1505
1506
    return $data;
1507
  }
1508
1509
  /**
1510
   * Checks if a file starts with BOM (Byte Order Mark) character.
1511
   *
1512
   * @param string $file_path <p>Path to a valid file.</p>
1513
   *
1514
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1515
   */
1516
  public static function file_has_bom($file_path)
1517
  {
1518
    return self::string_has_bom(file_get_contents($file_path));
1519
  }
1520 1
1521
  /**
1522 1
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1523 1
   *
1524 1
   * @param mixed  $var
1525 1
   * @param int    $normalization_form
1526
   * @param string $leading_combining
1527
   *
1528 1
   * @return mixed
1529
   */
1530
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1531
  {
1532
    switch (gettype($var)) {
1533 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1534
        foreach ($var as $k => $v) {
1535
          /** @noinspection AlterInForeachInspection */
1536
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1537
        }
1538
        break;
1539 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1540 1
        foreach ($var as $k => $v) {
1541
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1542 1
        }
1543 1
        break;
1544 1
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1545 1
1546
        if (false !== strpos($var, "\r")) {
1547
          // Workaround https://bugs.php.net/65732
1548 1
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1549
        }
1550
1551
        if (self::is_ascii($var) === false) {
1552
          /** @noinspection PhpUndefinedClassInspection */
1553
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1554
            $n = '-';
1555
          } else {
1556
            /** @noinspection PhpUndefinedClassInspection */
1557
            $n = \Normalizer::normalize($var, $normalization_form);
1558
1559 1
            if (isset($n[0])) {
1560
              $var = $n;
1561 1
            } else {
1562
              $var = self::encode('UTF-8', $var);
1563
            }
1564
          }
1565
1566
          if (
1567
              $var[0] >= "\x80"
1568
              &&
1569
              isset($n[0], $leading_combining[0])
1570
              &&
1571
              preg_match('/^\p{Mn}/u', $var)
1572
          ) {
1573
            // Prevent leading combining chars
1574
            // for NFC-safe concatenations.
1575
            $var = $leading_combining . $var;
1576
          }
1577 7
        }
1578
1579 7
        break;
1580 7
    }
1581
1582 7
    return $var;
1583
  }
1584 7
1585 2
  /**
1586
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
   *
1588 7
   * Gets a specific external variable by name and optionally filters it
1589 1
   *
1590 1
   * @link  http://php.net/manual/en/function.filter-input.php
1591 1
   *
1592
   * @param int    $type          <p>
1593 7
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1594
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1595
   *                              <b>INPUT_ENV</b>.
1596
   *                              </p>
1597
   * @param string $variable_name <p>
1598
   *                              Name of a variable to get.
1599
   *                              </p>
1600
   * @param int    $filter        [optional] <p>
1601
   *                              The ID of the filter to apply. The
1602
   *                              manual page lists the available filters.
1603 1
   *                              </p>
1604
   * @param mixed  $options       [optional] <p>
1605 1
   *                              Associative array of options or bitwise disjunction of flags. If filter
1606
   *                              accepts options, flags can be provided in "flags" field of array.
1607 1
   *                              </p>
1608
   *
1609
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1610 1
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1611 1
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1612
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1613 1
   * @since 5.2.0
1614
   */
1615 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1616 1
  {
1617 1
    if (4 > func_num_args()) {
1618 1
      $var = filter_input($type, $variable_name, $filter);
1619 1
    } else {
1620 1
      $var = filter_input($type, $variable_name, $filter, $options);
1621
    }
1622 1
1623
    return self::filter($var);
1624
  }
1625
1626
  /**
1627
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1628
   *
1629
   * Gets external variables and optionally filters them
1630
   *
1631
   * @link  http://php.net/manual/en/function.filter-input-array.php
1632 1
   *
1633
   * @param int   $type       <p>
1634 1
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1635
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1636
   *                          <b>INPUT_ENV</b>.
1637
   *                          </p>
1638 1
   * @param mixed $definition [optional] <p>
1639
   *                          An array defining the arguments. A valid key is a string
1640
   *                          containing a variable name and a valid value is either a filter type, or an array
1641
   *                          optionally specifying the filter, flags and options. If the value is an
1642
   *                          array, valid keys are filter which specifies the
1643
   *                          filter type,
1644
   *                          flags which specifies any flags that apply to the
1645
   *                          filter, and options which specifies any options that
1646
   *                          apply to the filter. See the example below for a better understanding.
1647
   *                          </p>
1648
   *                          <p>
1649
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1650
   *                          input array are filtered by this filter.
1651
   *                          </p>
1652
   * @param bool  $add_empty  [optional] <p>
1653
   *                          Add missing keys as <b>NULL</b> to the return value.
1654 1
   *                          </p>
1655
   *
1656 1
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1657 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1658
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1659
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1660 1
   * fails.
1661
   * @since 5.2.0
1662 1
   */
1663 1 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1664 1
  {
1665 1
    if (2 > func_num_args()) {
1666 1
      $a = filter_input_array($type);
1667 1
    } else {
1668 1
      $a = filter_input_array($type, $definition, $add_empty);
1669 1
    }
1670 1
1671 1
    return self::filter($a);
1672 1
  }
1673
1674
  /**
1675
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1676
   *
1677
   * Filters a variable with a specified filter
1678
   *
1679
   * @link  http://php.net/manual/en/function.filter-var.php
1680
   *
1681
   * @param mixed $variable <p>
1682
   *                        Value to filter.
1683
   *                        </p>
1684
   * @param int   $filter   [optional] <p>
1685
   *                        The ID of the filter to apply. The
1686
   *                        manual page lists the available filters.
1687
   *                        </p>
1688
   * @param mixed $options  [optional] <p>
1689
   *                        Associative array of options or bitwise disjunction of flags. If filter
1690
   *                        accepts options, flags can be provided in "flags" field of array. For
1691
   *                        the "callback" filter, callable type should be passed. The
1692 1
   *                        callback must accept one argument, the value to be filtered, and return
1693 1
   *                        the value after filtering/sanitizing it.
1694
   *                        </p>
1695
   *                        <p>
1696
   *                        <code>
1697
   *                        // for filters that accept options, use this format
1698
   *                        $options = array(
1699
   *                        'options' => array(
1700
   *                        'default' => 3, // value to return if the filter fails
1701
   *                        // other options here
1702
   *                        'min_range' => 0
1703
   *                        ),
1704
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1705
   *                        );
1706
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1707
   *                        // for filter that only accept flags, you can pass them directly
1708
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1709
   *                        // for filter that only accept flags, you can also pass as an array
1710
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1711
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1712
   *                        // callback validate filter
1713
   *                        function foo($value)
1714
   *                        {
1715
   *                        // Expected format: Surname, GivenNames
1716
   *                        if (strpos($value, ", ") === false) return false;
1717
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1718
   *                        $empty = (empty($surname) || empty($givennames));
1719
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1720
   *                        if ($empty || $notstrings) {
1721
   *                        return false;
1722
   *                        } else {
1723
   *                        return $value;
1724
   *                        }
1725
   *                        }
1726
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1727
   *                        </code>
1728
   *                        </p>
1729
   *
1730
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1731
   * @since 5.2.0
1732
   */
1733 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1734
  {
1735
    if (3 > func_num_args()) {
1736
      $variable = filter_var($variable, $filter);
1737
    } else {
1738
      $variable = filter_var($variable, $filter, $options);
1739
    }
1740
1741
    return self::filter($variable);
1742
  }
1743
1744
  /**
1745
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1746
   *
1747
   * Gets multiple variables and optionally filters them
1748
   *
1749
   * @link  http://php.net/manual/en/function.filter-var-array.php
1750
   *
1751
   * @param array $data       <p>
1752 1
   *                          An array with string keys containing the data to filter.
1753
   *                          </p>
1754 1
   * @param mixed $definition [optional] <p>
1755 1
   *                          An array defining the arguments. A valid key is a string
1756
   *                          containing a variable name and a valid value is either a
1757 1
   *                          filter type, or an
1758
   *                          array optionally specifying the filter, flags and options.
1759
   *                          If the value is an array, valid keys are filter
1760
   *                          which specifies the filter type,
1761
   *                          flags which specifies any flags that apply to the
1762
   *                          filter, and options which specifies any options that
1763
   *                          apply to the filter. See the example below for a better understanding.
1764
   *                          </p>
1765
   *                          <p>
1766
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1767
   *                          input array are filtered by this filter.
1768
   *                          </p>
1769
   * @param bool  $add_empty  [optional] <p>
1770
   *                          Add missing keys as <b>NULL</b> to the return value.
1771
   *                          </p>
1772 1
   *
1773
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1774 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1775
   * the variable is not set.
1776
   * @since 5.2.0
1777
   */
1778 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1779
  {
1780
    if (2 > func_num_args()) {
1781
      $a = filter_var_array($data);
1782
    } else {
1783
      $a = filter_var_array($data, $definition, $add_empty);
1784
    }
1785
1786 1
    return self::filter($a);
1787
  }
1788 1
1789 1
  /**
1790
   * Check if the number of unicode characters are not more than the specified integer.
1791
   *
1792 1
   * @param string $str      The original string to be checked.
1793 1
   * @param int    $box_size The size in number of chars to be checked against string.
1794
   *
1795
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1796 1
   */
1797
  public static function fits_inside($str, $box_size)
1798
  {
1799
    return (self::strlen($str) <= $box_size);
1800
  }
1801
1802
  /**
1803
   * Try to fix simple broken UTF-8 strings.
1804
   *
1805
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1806
   *
1807
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1808
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1809
   * See: http://en.wikipedia.org/wiki/Windows-1252
1810 1
   *
1811
   * @param string $str <p>The input string</p>
1812 1
   *
1813
   * @return string
1814
   */
1815 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1816
  {
1817
    // init
1818
    $str = (string)$str;
1819
1820
    if (!isset($str[0])) {
1821
      return '';
1822
    }
1823
1824
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1825
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1826 2
1827
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1828
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1829 2
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1830
    }
1831 2
1832 2
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1833 1
  }
1834 1
1835
  /**
1836 2
   * Fix a double (or multiple) encoded UTF8 string.
1837 1
   *
1838 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1839
   *
1840 2
   * @return mixed
1841 2
   */
1842 2
  public static function fix_utf8($str)
1843
  {
1844 2
    if (is_array($str) === true) {
1845
1846
      /** @noinspection ForeachSourceInspection */
1847
      foreach ($str as $k => $v) {
1848
        /** @noinspection AlterInForeachInspection */
1849
        /** @noinspection OffsetOperationsInspection */
1850
        $str[$k] = self::fix_utf8($v);
1851
      }
1852
1853
      return $str;
1854
    }
1855
1856
    $last = '';
1857
    while ($last !== $str) {
1858
      $last = $str;
1859
      $str = self::to_utf8(
1860
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1859 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
Security Bug introduced by
It seems like self::utf8_decode($str) targeting voku\helper\UTF8::utf8_decode() can also be of type false; however, voku\helper\UTF8::to_utf8() does only seem to accept string|array<integer,string>, did you maybe forget to handle an error condition?
Loading history...
1861
      );
1862
    }
1863
1864
    return $str;
1865
  }
1866
1867
  /**
1868
   * Get character of a specific character.
1869
   *
1870
   * @param string $char
1871
   *
1872
   * @return string <p>'RTL' or 'LTR'</p>
1873
   */
1874
  public static function getCharDirection($char)
1875
  {
1876
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1877
      self::checkForSupport();
1878
    }
1879
1880
    if (self::$SUPPORT['intlChar'] === true) {
1881
      $tmpReturn = \IntlChar::charDirection($char);
1882
1883
      // from "IntlChar"-Class
1884
      $charDirection = array(
1885
          'RTL' => array(1, 13, 14, 15, 21),
1886
          'LTR' => array(0, 11, 12, 20),
1887
      );
1888
1889
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1890
        return 'LTR';
1891
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1892
        return 'RTL';
1893
      }
1894
    }
1895
1896
    $c = static::chr_to_decimal($char);
1897
1898
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1899
      return 'LTR';
1900
    }
1901
1902
    if (0x85e >= $c) {
1903
1904
      if (0x5be === $c ||
1905
          0x5c0 === $c ||
1906
          0x5c3 === $c ||
1907
          0x5c6 === $c ||
1908
          (0x5d0 <= $c && 0x5ea >= $c) ||
1909
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1910
          0x608 === $c ||
1911
          0x60b === $c ||
1912
          0x60d === $c ||
1913
          0x61b === $c ||
1914
          (0x61e <= $c && 0x64a >= $c) ||
1915
          (0x66d <= $c && 0x66f >= $c) ||
1916
          (0x671 <= $c && 0x6d5 >= $c) ||
1917
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1918
          (0x6ee <= $c && 0x6ef >= $c) ||
1919
          (0x6fa <= $c && 0x70d >= $c) ||
1920
          0x710 === $c ||
1921
          (0x712 <= $c && 0x72f >= $c) ||
1922
          (0x74d <= $c && 0x7a5 >= $c) ||
1923
          0x7b1 === $c ||
1924
          (0x7c0 <= $c && 0x7ea >= $c) ||
1925
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1926 9
          0x7fa === $c ||
1927
          (0x800 <= $c && 0x815 >= $c) ||
1928 9
          0x81a === $c ||
1929
          0x824 === $c ||
1930 9
          0x828 === $c ||
1931 6
          (0x830 <= $c && 0x83e >= $c) ||
1932
          (0x840 <= $c && 0x858 >= $c) ||
1933
          0x85e === $c
1934 9
      ) {
1935 7
        return 'RTL';
1936
      }
1937
1938
    } elseif (0x200f === $c) {
1939 9
1940 9
      return 'RTL';
1941
1942 9
    } elseif (0xfb1d <= $c) {
1943 9
1944 9
      if (0xfb1d === $c ||
1945 9
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1946 9
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1947 6
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1948
          0xfb3e === $c ||
1949
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1950 9
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1951 2
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1952 2
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1953
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1954 9
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1955 4
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1956 4
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1957 4
          (0xfe76 <= $c && 0xfefc >= $c) ||
1958
          (0x10800 <= $c && 0x10805 >= $c) ||
1959
          0x10808 === $c ||
1960 4
          (0x1080a <= $c && 0x10835 >= $c) ||
1961
          (0x10837 <= $c && 0x10838 >= $c) ||
1962
          0x1083c === $c ||
1963 9
          (0x1083f <= $c && 0x10855 >= $c) ||
1964
          (0x10857 <= $c && 0x1085f >= $c) ||
1965 9
          (0x10900 <= $c && 0x1091b >= $c) ||
1966 9
          (0x10920 <= $c && 0x10939 >= $c) ||
1967
          0x1093f === $c ||
1968 7
          0x10a00 === $c ||
1969
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1970 7
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1971 6
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1972
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1973 4
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1974
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1975 9
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1976
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1977 9
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1978
          (0x10b78 <= $c && 0x10b7f >= $c)
1979
      ) {
1980 9
        return 'RTL';
1981 9
      }
1982 9
    }
1983
1984 9
    return 'LTR';
1985
  }
1986 9
1987
  /**
1988 9
   * get data from "/data/*.ser"
1989
   *
1990
   * @param string $file
1991
   *
1992
   * @return bool|string|array|int <p>Will return false on error.</p>
1993
   */
1994
  private static function getData($file)
1995
  {
1996
    $file = __DIR__ . '/data/' . $file . '.php';
1997
    if (file_exists($file)) {
1998
      /** @noinspection PhpIncludeInspection */
1999
      return require $file;
2000
    } else {
2001
      return false;
2002
    }
2003
  }
2004
2005
  /**
2006
   * Check for php-support.
2007
   *
2008
   * @param string|null $key
2009
   *
2010
   * @return bool[]|bool|null return the full support-array, if $key === null<br />
2011
   *                          return bool-value, if $key is used and available<br />
2012
   *                          otherwise return null
2013
   */
2014
  public static function getSupportInfo($key = null)
2015
  {
2016
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2017
      self::checkForSupport();
2018
    }
2019
2020
    if ($key === null) {
2021
      return self::$SUPPORT;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return self::$SUPPORT; (array) is incompatible with the return type documented by voku\helper\UTF8::getSupportInfo of type boolean[]|boolean|null.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
2022
    }
2023
2024
    if (!isset(self::$SUPPORT[$key])) {
2025
      return null;
2026
    }
2027
2028
    return self::$SUPPORT[$key];
2029
  }
2030
2031
  /**
2032
   * alias for "UTF8::string_has_bom()"
2033
   *
2034
   * @see UTF8::string_has_bom()
2035
   *
2036
   * @param string $str
2037
   *
2038
   * @return bool
2039
   *
2040
   * @deprecated
2041
   */
2042
  public static function hasBom($str)
2043
  {
2044
    return self::string_has_bom($str);
2045
  }
2046
2047
  /**
2048
   * Converts a hexadecimal-value into an UTF-8 character.
2049
   *
2050
   * @param string $hexdec <p>The hexadecimal value.</p>
2051
   *
2052
   * @return string|false <p>One single UTF-8 character.</p>
2053
   */
2054
  public static function hex_to_chr($hexdec)
2055
  {
2056
    return self::decimal_to_chr(hexdec($hexdec));
2057
  }
2058
2059
  /**
2060
   * Converts hexadecimal U+xxxx code point representation to integer.
2061
   *
2062
   * INFO: opposite to UTF8::int_to_hex()
2063
   *
2064
   * @param string $hexdec <p>The hexadecimal code point representation.</p>
2065
   *
2066
   * @return int|false <p>The code point, or false on failure.</p>
2067
   */
2068
  public static function hex_to_int($hexdec)
2069
  {
2070
    $hexdec = (string)$hexdec;
2071
2072
    if (!isset($hexdec[0])) {
2073
      return false;
2074
    }
2075
2076
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexdec, $match)) {
2077
      return intval($match[1], 16);
2078
    }
2079
2080
    return false;
2081
  }
2082
2083
  /**
2084
   * alias for "UTF8::html_entity_decode()"
2085
   *
2086
   * @see UTF8::html_entity_decode()
2087
   *
2088
   * @param string $str
2089
   * @param int    $flags
2090
   * @param string $encoding
2091
   *
2092
   * @return string
2093
   */
2094 2
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2095
  {
2096 2
    return self::html_entity_decode($str, $flags, $encoding);
2097 1
  }
2098 1
2099
  /**
2100 2
   * Converts a UTF-8 string to a series of HTML numbered entities.
2101
   *
2102 2
   * INFO: opposite to UTF8::html_decode()
2103 1
   *
2104
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2105
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2106 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2107 2
   *
2108 2
   * @return string <p>HTML numbered entities.</p>
2109 2
   */
2110 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2111 1
  {
2112
    // init
2113 1
    $str = (string)$str;
2114 1
2115 1
    if (!isset($str[0])) {
2116 1
      return '';
2117 1
    }
2118 2
2119
    if ($encoding !== 'UTF-8') {
2120 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2121
    }
2122
2123
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2124
    if (function_exists('mb_encode_numericentity')) {
2125
2126
      $startCode = 0x00;
2127
      if ($keepAsciiChars === true) {
2128
        $startCode = 0x80;
2129
      }
2130
2131
      return mb_encode_numericentity(
2132
          $str,
2133
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2134
          $encoding
2135
      );
2136
    }
2137
2138
    return implode(
2139
        '',
2140
        array_map(
2141
            function ($data) use ($keepAsciiChars, $encoding) {
2142
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2143
            },
2144
            self::split($str)
2145
        )
2146
    );
2147
  }
2148
2149
  /**
2150
   * UTF-8 version of html_entity_decode()
2151
   *
2152
   * The reason we are not using html_entity_decode() by itself is because
2153
   * while it is not technically correct to leave out the semicolon
2154
   * at the end of an entity most browsers will still interpret the entity
2155
   * correctly. html_entity_decode() does not convert entities without
2156
   * semicolons, so we are left with our own little solution here. Bummer.
2157
   *
2158
   * Convert all HTML entities to their applicable characters
2159
   *
2160
   * INFO: opposite to UTF8::html_encode()
2161
   *
2162
   * @link http://php.net/manual/en/function.html-entity-decode.php
2163
   *
2164
   * @param string $str      <p>
2165
   *                         The input string.
2166
   *                         </p>
2167
   * @param int    $flags    [optional] <p>
2168
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2169
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2170
   *                         <table>
2171
   *                         Available <i>flags</i> constants
2172
   *                         <tr valign="top">
2173
   *                         <td>Constant Name</td>
2174
   *                         <td>Description</td>
2175
   *                         </tr>
2176
   *                         <tr valign="top">
2177
   *                         <td><b>ENT_COMPAT</b></td>
2178
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2179
   *                         </tr>
2180
   *                         <tr valign="top">
2181
   *                         <td><b>ENT_QUOTES</b></td>
2182
   *                         <td>Will convert both double and single quotes.</td>
2183
   *                         </tr>
2184
   *                         <tr valign="top">
2185
   *                         <td><b>ENT_NOQUOTES</b></td>
2186
   *                         <td>Will leave both double and single quotes unconverted.</td>
2187
   *                         </tr>
2188
   *                         <tr valign="top">
2189
   *                         <td><b>ENT_HTML401</b></td>
2190
   *                         <td>
2191
   *                         Handle code as HTML 4.01.
2192
   *                         </td>
2193
   *                         </tr>
2194
   *                         <tr valign="top">
2195
   *                         <td><b>ENT_XML1</b></td>
2196
   *                         <td>
2197
   *                         Handle code as XML 1.
2198
   *                         </td>
2199
   *                         </tr>
2200
   *                         <tr valign="top">
2201
   *                         <td><b>ENT_XHTML</b></td>
2202
   *                         <td>
2203
   *                         Handle code as XHTML.
2204
   *                         </td>
2205
   *                         </tr>
2206
   *                         <tr valign="top">
2207
   *                         <td><b>ENT_HTML5</b></td>
2208
   *                         <td>
2209
   *                         Handle code as HTML 5.
2210
   *                         </td>
2211
   *                         </tr>
2212
   *                         </table>
2213
   *                         </p>
2214
   * @param string $encoding [optional] <p>Encoding to use.</p>
2215
   *
2216
   * @return string <p>The decoded string.</p>
2217
   */
2218
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2219
  {
2220
    // init
2221
    $str = (string)$str;
2222
2223
    if (!isset($str[0])) {
2224
      return '';
2225
    }
2226
2227
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2228
      return $str;
2229
    }
2230
2231
    if (
2232 1
        strpos($str, '&') === false
2233
        ||
2234 1
        (
2235
            strpos($str, '&#') === false
2236
            &&
2237
            strpos($str, ';') === false
2238 1
        )
2239
    ) {
2240
      return $str;
2241
    }
2242
2243
    if ($encoding !== 'UTF-8') {
2244
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2245
    }
2246 1
2247
    if ($flags === null) {
2248 1
      if (Bootup::is_php('5.4') === true) {
2249
        $flags = ENT_QUOTES | ENT_HTML5;
2250
      } else {
2251
        $flags = ENT_QUOTES;
2252
      }
2253
    }
2254
2255
    do {
2256
      $str_compare = $str;
2257
2258
      $str = preg_replace_callback(
2259
          "/&#\d{2,6};/",
2260
          function ($matches) use ($encoding) {
2261 3
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2262
2263 3
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2264 3
              return $returnTmp;
2265
            } else {
2266 3
              return $matches[0];
2267
            }
2268 3
          },
2269
          $str
2270
      );
2271
2272
      // decode numeric & UTF16 two byte entities
2273
      $str = html_entity_decode(
2274
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2275
          $flags,
2276
          $encoding
2277
      );
2278
2279 1
    } while ($str_compare !== $str);
2280
2281 1
    return $str;
2282
  }
2283
2284
  /**
2285
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2286
   *
2287
   * @link http://php.net/manual/en/function.htmlentities.php
2288
   *
2289 2
   * @param string $str           <p>
2290
   *                              The input string.
2291 2
   *                              </p>
2292
   * @param int    $flags         [optional] <p>
2293
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2294
   *                              invalid code unit sequences and the used document type. The default is
2295
   *                              ENT_COMPAT | ENT_HTML401.
2296
   *                              <table>
2297
   *                              Available <i>flags</i> constants
2298
   *                              <tr valign="top">
2299
   *                              <td>Constant Name</td>
2300
   *                              <td>Description</td>
2301
   *                              </tr>
2302
   *                              <tr valign="top">
2303 2
   *                              <td><b>ENT_COMPAT</b></td>
2304
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2305 2
   *                              </tr>
2306
   *                              <tr valign="top">
2307
   *                              <td><b>ENT_QUOTES</b></td>
2308
   *                              <td>Will convert both double and single quotes.</td>
2309
   *                              </tr>
2310
   *                              <tr valign="top">
2311
   *                              <td><b>ENT_NOQUOTES</b></td>
2312
   *                              <td>Will leave both double and single quotes unconverted.</td>
2313
   *                              </tr>
2314
   *                              <tr valign="top">
2315
   *                              <td><b>ENT_IGNORE</b></td>
2316
   *                              <td>
2317 1
   *                              Silently discard invalid code unit sequences instead of returning
2318
   *                              an empty string. Using this flag is discouraged as it
2319 1
   *                              may have security implications.
2320
   *                              </td>
2321
   *                              </tr>
2322
   *                              <tr valign="top">
2323
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2324
   *                              <td>
2325
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2326
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2327
   *                              </td>
2328
   *                              </tr>
2329
   *                              <tr valign="top">
2330
   *                              <td><b>ENT_DISALLOWED</b></td>
2331
   *                              <td>
2332
   *                              Replace invalid code points for the given document type with a
2333
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2334
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2335
   *                              instance, to ensure the well-formedness of XML documents with
2336
   *                              embedded external content.
2337
   *                              </td>
2338
   *                              </tr>
2339
   *                              <tr valign="top">
2340
   *                              <td><b>ENT_HTML401</b></td>
2341
   *                              <td>
2342
   *                              Handle code as HTML 4.01.
2343
   *                              </td>
2344
   *                              </tr>
2345
   *                              <tr valign="top">
2346
   *                              <td><b>ENT_XML1</b></td>
2347
   *                              <td>
2348
   *                              Handle code as XML 1.
2349
   *                              </td>
2350
   *                              </tr>
2351
   *                              <tr valign="top">
2352
   *                              <td><b>ENT_XHTML</b></td>
2353
   *                              <td>
2354
   *                              Handle code as XHTML.
2355
   *                              </td>
2356
   *                              </tr>
2357
   *                              <tr valign="top">
2358
   *                              <td><b>ENT_HTML5</b></td>
2359 1
   *                              <td>
2360
   *                              Handle code as HTML 5.
2361 1
   *                              </td>
2362
   *                              </tr>
2363
   *                              </table>
2364
   *                              </p>
2365
   * @param string $encoding      [optional] <p>
2366
   *                              Like <b>htmlspecialchars</b>,
2367
   *                              <b>htmlentities</b> takes an optional third argument
2368
   *                              <i>encoding</i> which defines encoding used in
2369
   *                              conversion.
2370
   *                              Although this argument is technically optional, you are highly
2371
   *                              encouraged to specify the correct value for your code.
2372
   *                              </p>
2373
   * @param bool   $double_encode [optional] <p>
2374
   *                              When <i>double_encode</i> is turned off PHP will not
2375
   *                              encode existing html entities. The default is to convert everything.
2376
   *                              </p>
2377
   *
2378
   *
2379
   * @return string the encoded string.
2380
   * </p>
2381
   * <p>
2382
   * If the input <i>string</i> contains an invalid code unit
2383
   * sequence within the given <i>encoding</i> an empty string
2384
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2385
   * <b>ENT_SUBSTITUTE</b> flags are set.
2386
   */
2387 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2388
  {
2389 1
    if ($encoding !== 'UTF-8') {
2390
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2391
    }
2392
2393
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2394
2395
    if ($encoding !== 'UTF-8') {
2396
      return $str;
2397
    }
2398
2399
    $byteLengths = self::chr_size_list($str);
2400
    $search = array();
2401 1
    $replacements = array();
2402
    foreach ($byteLengths as $counter => $byteLength) {
2403 1
      if ($byteLength >= 3) {
2404
        $char = self::access($str, $counter);
2405
2406
        if (!isset($replacements[$char])) {
2407
          $search[$char] = $char;
2408
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2404 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2409
        }
2410
      }
2411
    }
2412
2413
    return str_replace($search, $replacements, $str);
2414
  }
2415
2416 16
  /**
2417
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2418 16
   *
2419
   * INFO: Take a look at "UTF8::htmlentities()"
2420
   *
2421
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2422
   *
2423
   * @param string $str           <p>
2424
   *                              The string being converted.
2425
   *                              </p>
2426
   * @param int    $flags         [optional] <p>
2427
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2428
   *                              invalid code unit sequences and the used document type. The default is
2429
   *                              ENT_COMPAT | ENT_HTML401.
2430
   *                              <table>
2431 28
   *                              Available <i>flags</i> constants
2432
   *                              <tr valign="top">
2433 28
   *                              <td>Constant Name</td>
2434
   *                              <td>Description</td>
2435 28
   *                              </tr>
2436 5
   *                              <tr valign="top">
2437
   *                              <td><b>ENT_COMPAT</b></td>
2438
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2439 28
   *                              </tr>
2440
   *                              <tr valign="top">
2441
   *                              <td><b>ENT_QUOTES</b></td>
2442
   *                              <td>Will convert both double and single quotes.</td>
2443
   *                              </tr>
2444
   *                              <tr valign="top">
2445
   *                              <td><b>ENT_NOQUOTES</b></td>
2446
   *                              <td>Will leave both double and single quotes unconverted.</td>
2447
   *                              </tr>
2448
   *                              <tr valign="top">
2449 1
   *                              <td><b>ENT_IGNORE</b></td>
2450
   *                              <td>
2451 1
   *                              Silently discard invalid code unit sequences instead of returning
2452
   *                              an empty string. Using this flag is discouraged as it
2453 1
   *                              may have security implications.
2454 1
   *                              </td>
2455
   *                              </tr>
2456
   *                              <tr valign="top">
2457 1
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2458 1
   *                              <td>
2459
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2460 1
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2461
   *                              </td>
2462
   *                              </tr>
2463
   *                              <tr valign="top">
2464
   *                              <td><b>ENT_DISALLOWED</b></td>
2465
   *                              <td>
2466
   *                              Replace invalid code points for the given document type with a
2467
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2468
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2469
   *                              instance, to ensure the well-formedness of XML documents with
2470
   *                              embedded external content.
2471 16
   *                              </td>
2472
   *                              </tr>
2473
   *                              <tr valign="top">
2474 16
   *                              <td><b>ENT_HTML401</b></td>
2475
   *                              <td>
2476
   *                              Handle code as HTML 4.01.
2477 16
   *                              </td>
2478
   *                              </tr>
2479 16
   *                              <tr valign="top">
2480 16
   *                              <td><b>ENT_XML1</b></td>
2481 15
   *                              <td>
2482 16
   *                              Handle code as XML 1.
2483 6
   *                              </td>
2484
   *                              </tr>
2485 15
   *                              <tr valign="top">
2486
   *                              <td><b>ENT_XHTML</b></td>
2487
   *                              <td>
2488
   *                              Handle code as XHTML.
2489
   *                              </td>
2490
   *                              </tr>
2491
   *                              <tr valign="top">
2492
   *                              <td><b>ENT_HTML5</b></td>
2493
   *                              <td>
2494
   *                              Handle code as HTML 5.
2495
   *                              </td>
2496
   *                              </tr>
2497
   *                              </table>
2498
   *                              </p>
2499
   * @param string $encoding      [optional] <p>
2500
   *                              Defines encoding used in conversion.
2501
   *                              </p>
2502
   *                              <p>
2503
   *                              For the purposes of this function, the encodings
2504
   *                              ISO-8859-1, ISO-8859-15,
2505
   *                              UTF-8, cp866,
2506
   *                              cp1251, cp1252, and
2507
   *                              KOI8-R are effectively equivalent, provided the
2508
   *                              <i>string</i> itself is valid for the encoding, as
2509
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2510
   *                              the same positions in all of these encodings.
2511
   *                              </p>
2512
   * @param bool   $double_encode [optional] <p>
2513
   *                              When <i>double_encode</i> is turned off PHP will not
2514
   *                              encode existing html entities, the default is to convert everything.
2515
   *                              </p>
2516
   *
2517
   * @return string The converted string.
2518
   * </p>
2519
   * <p>
2520
   * If the input <i>string</i> contains an invalid code unit
2521
   * sequence within the given <i>encoding</i> an empty string
2522
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2523
   * <b>ENT_SUBSTITUTE</b> flags are set.
2524
   */
2525
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2526
  {
2527
    if ($encoding !== 'UTF-8') {
2528
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2529
    }
2530
2531
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2532
  }
2533
2534
  /**
2535
   * Checks whether iconv is available on the server.
2536 1
   *
2537
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2538 1
   */
2539
  public static function iconv_loaded()
2540 1
  {
2541
    $return = extension_loaded('iconv') ? true : false;
2542
2543
    // INFO: "default_charset" is already set by the "Bootup"-class
2544
2545 1
    if (Bootup::is_php('5.6') === false) {
2546
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2547 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2548
      iconv_set_encoding('output_encoding', 'UTF-8');
2549 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2550 1
    }
2551
2552 1
    return $return;
2553
  }
2554
2555
  /**
2556
   * alias for "UTF8::decimal_to_chr()"
2557
   *
2558
   * @see UTF8::decimal_to_chr()
2559
   *
2560
   * @param mixed $int
2561
   *
2562
   * @return string
2563 1
   */
2564
  public static function int_to_chr($int)
2565 1
  {
2566
    return self::decimal_to_chr($int);
2567 1
  }
2568
2569
  /**
2570
   * Converts Integer to hexadecimal U+xxxx code point representation.
2571
   *
2572 1
   * INFO: opposite to UTF8::hex_to_int()
2573 1
   *
2574 1
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2575 1
   * @param string $pfix [optional]
2576 1
   *
2577
   * @return string <p>The code point, or empty string on failure.</p>
2578 1
   */
2579
  public static function int_to_hex($int, $pfix = 'U+')
2580
  {
2581
    if ((int)$int === $int) {
2582
      $hex = dechex($int);
2583
2584
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2585
2586
      return $pfix . $hex;
2587
    }
2588
2589
    return '';
2590
  }
2591
2592
  /**
2593 4
   * Checks whether intl-char is available on the server.
2594
   *
2595 4
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2596
   */
2597 4
  public static function intlChar_loaded()
2598
  {
2599 4
    return (
2600 4
        Bootup::is_php('7.0') === true
2601 4
        &&
2602 4
        class_exists('IntlChar') === true
2603 4
    );
2604 4
  }
2605 4
2606 4
  /**
2607 4
   * Checks whether intl is available on the server.
2608 2
   *
2609 2
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2610 4
   */
2611 4
  public static function intl_loaded()
2612 4
  {
2613
    return extension_loaded('intl') ? true : false;
2614 4
  }
2615 4
2616 4
  /**
2617 4
   * alias for "UTF8::is_ascii()"
2618 4
   *
2619 4
   * @see UTF8::is_ascii()
2620 4
   *
2621 4
   * @param string $str
2622 4
   *
2623 3
   * @return boolean
2624 3
   *
2625 4
   * @deprecated
2626 4
   */
2627 4
  public static function isAscii($str)
2628
  {
2629 4
    return self::is_ascii($str);
2630 3
  }
2631 2
2632
  /**
2633 3
   * alias for "UTF8::is_base64()"
2634
   *
2635
   * @see UTF8::is_base64()
2636
   *
2637 3
   * @param string $str
2638
   *
2639 3
   * @return bool
2640
   *
2641
   * @deprecated
2642
   */
2643
  public static function isBase64($str)
2644
  {
2645
    return self::is_base64($str);
2646
  }
2647
2648
  /**
2649
   * alias for "UTF8::is_binary()"
2650
   *
2651
   * @see UTF8::is_binary()
2652
   *
2653 3
   * @param string $str
2654
   *
2655 3
   * @return bool
2656
   *
2657 3
   * @deprecated
2658
   */
2659 3
  public static function isBinary($str)
2660 3
  {
2661 3
    return self::is_binary($str);
2662 3
  }
2663 3
2664 3
  /**
2665 3
   * alias for "UTF8::is_bom()"
2666 3
   *
2667 3
   * @see UTF8::is_bom()
2668 1
   *
2669 1
   * @param string $utf8_chr
2670 3
   *
2671 3
   * @return boolean
2672 3
   *
2673
   * @deprecated
2674 3
   */
2675 3
  public static function isBom($utf8_chr)
2676 3
  {
2677 3
    return self::is_bom($utf8_chr);
2678 3
  }
2679 3
2680 3
  /**
2681 3
   * alias for "UTF8::is_html()"
2682 3
   *
2683 1
   * @see UTF8::is_html()
2684 1
   *
2685 3
   * @param string $str
2686 3
   *
2687 3
   * @return boolean
2688
   *
2689 3
   * @deprecated
2690 1
   */
2691 1
  public static function isHtml($str)
2692
  {
2693 1
    return self::is_html($str);
2694
  }
2695
2696
  /**
2697 3
   * alias for "UTF8::is_json()"
2698
   *
2699 3
   * @see UTF8::is_json()
2700
   *
2701
   * @param string $str
2702
   *
2703
   * @return bool
2704
   *
2705
   * @deprecated
2706
   */
2707
  public static function isJson($str)
2708
  {
2709
    return self::is_json($str);
2710
  }
2711
2712 43
  /**
2713
   * alias for "UTF8::is_utf16()"
2714 43
   *
2715
   * @see UTF8::is_utf16()
2716 43
   *
2717 3
   * @param string $str
2718
   *
2719
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2720 41
   *
2721 1
   * @deprecated
2722 1
   */
2723
  public static function isUtf16($str)
2724
  {
2725
    return self::is_utf16($str);
2726
  }
2727
2728
  /**
2729
   * alias for "UTF8::is_utf32()"
2730 41
   *
2731
   * @see UTF8::is_utf32()
2732
   *
2733
   * @param string $str
2734
   *
2735
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2736
   *
2737
   * @deprecated
2738
   */
2739
  public static function isUtf32($str)
2740 41
  {
2741
    return self::is_utf32($str);
2742 41
  }
2743 41
2744 41
  /**
2745
   * alias for "UTF8::is_utf8()"
2746
   *
2747 41
   * @see UTF8::is_utf8()
2748 41
   *
2749 41
   * @param string $str
2750
   * @param bool   $strict
2751
   *
2752 41
   * @return bool
2753
   *
2754 36
   * @deprecated
2755 41
   */
2756
  public static function isUtf8($str, $strict = false)
2757 34
  {
2758 34
    return self::is_utf8($str, $strict);
2759 34
  }
2760 34
2761 39
  /**
2762
   * Checks if a string is 7 bit ASCII.
2763 21
   *
2764 21
   * @param string $str <p>The string to check.</p>
2765 21
   *
2766 21
   * @return bool <p>
2767 33
   *              <strong>true</strong> if it is ASCII<br />
2768
   *              <strong>false</strong> otherwise
2769 9
   *              </p>
2770 9
   */
2771 9
  public static function is_ascii($str)
2772 9
  {
2773 16
    $str = (string)$str;
2774
2775
    if (!isset($str[0])) {
2776
      return true;
2777
    }
2778
2779
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2780
  }
2781
2782 3
  /**
2783 3
   * Returns true if the string is base64 encoded, false otherwise.
2784 3
   *
2785 3
   * @param string $str <p>The input string.</p>
2786 9
   *
2787
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2788 3
   */
2789 3
  public static function is_base64($str)
2790 3
  {
2791 3
    $str = (string)$str;
2792 3
2793
    if (!isset($str[0])) {
2794
      return false;
2795
    }
2796 5
2797
    $base64String = (string)base64_decode($str, true);
2798 41
    if ($base64String && base64_encode($base64String) === $str) {
2799
      return true;
2800
    } else {
2801 36
      return false;
2802
    }
2803 33
  }
2804 33
2805 33
  /**
2806 33
   * Check if the input is binary... (is look like a hack).
2807
   *
2808
   * @param mixed $input
2809
   *
2810
   * @return bool
2811 33
   */
2812
  public static function is_binary($input)
2813
  {
2814
    $input = (string)$input;
2815
2816
    if (!isset($input[0])) {
2817 33
      return false;
2818 33
    }
2819 33
2820 33
    if (preg_match('~^[01]+$~', $input)) {
2821
      return true;
2822 33
    }
2823
2824 33
    $testLength = strlen($input);
2825 33
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2826 5
      return true;
2827
    }
2828
2829 33
    if (substr_count($input, "\x00") > 0) {
2830 33
      return true;
2831 33
    }
2832 33
2833 33
    return false;
2834
  }
2835
2836
  /**
2837
   * Check if the file is binary.
2838 18
   *
2839
   * @param string $file
2840
   *
2841 41
   * @return boolean
2842
   */
2843 20
  public static function is_binary_file($file)
2844
  {
2845
    try {
2846
      $fp = fopen($file, 'rb');
2847
      $block = fread($fp, 512);
2848
      fclose($fp);
2849
    } catch (\Exception $e) {
2850
      $block = '';
2851
    }
2852
2853
    return self::is_binary($block);
2854
  }
2855
2856
  /**
2857
   * Checks if the given string is equal to any "Byte Order Mark".
2858
   *
2859
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2860
   *
2861
   * @param string $str <p>The input string.</p>
2862
   *
2863
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2864
   */
2865
  public static function is_bom($str)
2866
  {
2867
    foreach (self::$BOM as $bomString => $bomByteLength) {
2868
      if ($str === $bomString) {
2869
        return true;
2870
      }
2871
    }
2872
2873
    return false;
2874
  }
2875
2876
  /**
2877
   * Check if the string contains any html-tags <lall>.
2878
   *
2879
   * @param string $str <p>The input string.</p>
2880
   *
2881
   * @return boolean
2882
   */
2883 2
  public static function is_html($str)
2884
  {
2885 2
    $str = (string)$str;
2886
2887 2
    if (!isset($str[0])) {
2888 2
      return false;
2889 2
    }
2890
2891
    // init
2892
    $matches = array();
2893 2
2894
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2895
2896
    if (count($matches) === 0) {
2897
      return false;
2898
    } else {
2899
      return true;
2900
    }
2901
  }
2902
2903
  /**
2904
   * Try to check if "$str" is an json-string.
2905
   *
2906
   * @param string $str <p>The input string.</p>
2907
   *
2908
   * @return bool
2909
   */
2910
  public static function is_json($str)
2911
  {
2912
    $str = (string)$str;
2913
2914
    if (!isset($str[0])) {
2915
      return false;
2916
    }
2917
2918
    $json = self::json_decode($str);
2919
2920
    if (
2921
        (
2922
            is_object($json) === true
2923
            ||
2924
            is_array($json) === true
2925
        )
2926
        &&
2927
        json_last_error() === JSON_ERROR_NONE
2928
    ) {
2929
      return true;
2930
    } else {
2931
      return false;
2932 2
    }
2933
  }
2934 2
2935
  /**
2936 2
   * Check if the string is UTF-16.
2937
   *
2938
   * @param string $str <p>The input string.</p>
2939 2
   *
2940
   * @return int|false <p>
2941
   *                   <strong>false</strong> if is't not UTF-16,<br />
2942 2
   *                   <strong>1</strong> for UTF-16LE,<br />
2943
   *                   <strong>2</strong> for UTF-16BE.
2944
   *                   </p>
2945
   */
2946 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2947
  {
2948
    $str = self::remove_bom($str);
2949
2950
    if (self::is_binary($str) === true) {
2951
2952 6
      $maybeUTF16LE = 0;
2953
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2954 6
      if ($test) {
2955
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2956
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2957
        if ($test3 === $test) {
2958
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 2948 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2959
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2960
            if (in_array($test3char, $strChars, true) === true) {
2961
              $maybeUTF16LE++;
2962
            }
2963
          }
2964
        }
2965 24
      }
2966
2967 24
      $maybeUTF16BE = 0;
2968
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2969 24
      if ($test) {
2970 2
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2971
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2972
        if ($test3 === $test) {
2973
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 2948 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2974 23
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2975 2
            if (in_array($test3char, $strChars, true) === true) {
2976
              $maybeUTF16BE++;
2977
            }
2978 23
          }
2979
        }
2980 23
      }
2981
2982
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2983
        if ($maybeUTF16LE > $maybeUTF16BE) {
2984
          return 1;
2985
        } else {
2986
          return 2;
2987
        }
2988
      }
2989
2990 1
    }
2991
2992 1
    return false;
2993
  }
2994
2995
  /**
2996 1
   * Check if the string is UTF-32.
2997
   *
2998
   * @param string $str
2999
   *
3000
   * @return int|false <p>
3001
   *                   <strong>false</strong> if is't not UTF-16,<br />
3002
   *                   <strong>1</strong> for UTF-32LE,<br />
3003
   *                   <strong>2</strong> for UTF-32BE.
3004
   *                   </p>
3005
   */
3006 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3007 1
  {
3008
    $str = self::remove_bom($str);
3009 1
3010 1
    if (self::is_binary($str) === true) {
3011 1
3012
      $maybeUTF32LE = 0;
3013 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3014
      if ($test) {
3015
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3016
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3017
        if ($test3 === $test) {
3018
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 3008 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3019
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3020
            if (in_array($test3char, $strChars, true) === true) {
3021
              $maybeUTF32LE++;
3022 2
            }
3023
          }
3024 2
        }
3025
      }
3026 2
3027 2
      $maybeUTF32BE = 0;
3028 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3029
      if ($test) {
3030 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3031
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3032
        if ($test3 === $test) {
3033
          $strChars = self::count_chars($str, true);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::remove_bom($str) on line 3008 can also be of type false; however, voku\helper\UTF8::count_chars() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3034
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3035
            if (in_array($test3char, $strChars, true) === true) {
3036
              $maybeUTF32BE++;
3037
            }
3038
          }
3039
        }
3040 1
      }
3041
3042 1
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3043
        if ($maybeUTF32LE > $maybeUTF32BE) {
3044
          return 1;
3045
        } else {
3046 1
          return 2;
3047
        }
3048
      }
3049
3050
    }
3051
3052
    return false;
3053
  }
3054
3055
  /**
3056
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3057
   *
3058 1
   * @see    http://hsivonen.iki.fi/php-utf8/
3059
   *
3060 1
   * @param string $str    <p>The string to be checked.</p>
3061
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3062
   *
3063
   * @return bool
3064
   */
3065
  public static function is_utf8($str, $strict = false)
3066
  {
3067
    $str = (string)$str;
3068
3069
    if (!isset($str[0])) {
3070 16
      return true;
3071
    }
3072 16
3073
    if ($strict === true) {
3074 16
      if (self::is_utf16($str) !== false) {
3075 2
        return false;
3076
      }
3077
3078 16
      if (self::is_utf32($str) !== false) {
3079 1
        return false;
3080
      }
3081
    }
3082 16
3083 4
    if (self::pcre_utf8_support() !== true) {
3084
3085
      // If even just the first character can be matched, when the /u
3086 15
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3087 14
      // invalid, nothing at all will match, even if the string contains
3088
      // some valid sequences
3089
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3090 4
3091 4
    } else {
3092 4
3093
      $mState = 0; // cached expected number of octets after the current octet
3094
      // until the beginning of the next UTF8 character sequence
3095 4
      $mUcs4 = 0; // cached Unicode character
3096 4
      $mBytes = 1; // cached expected number of octets in the current sequence
3097 4
3098 4
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3099 4
        self::checkForSupport();
3100 4
      }
3101 4
3102 4 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3103 4
        $len = \mb_strlen($str, '8BIT');
3104 4
      } else {
3105 4
        $len = strlen($str);
3106 4
      }
3107 4
3108 4
      /** @noinspection ForeachInvariantsInspection */
3109 4
      for ($i = 0; $i < $len; $i++) {
3110
        $in = ord($str[$i]);
3111 4
        if ($mState === 0) {
3112 4
          // When mState is zero we expect either a US-ASCII character or a
3113 4
          // multi-octet sequence.
3114
          if (0 === (0x80 & $in)) {
3115 4
            // US-ASCII, pass straight through.
3116
            $mBytes = 1;
3117 4 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3118
            // First octet of 2 octet sequence.
3119
            $mUcs4 = $in;
3120
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
3121
            $mState = 1;
3122
            $mBytes = 2;
3123
          } elseif (0xE0 === (0xF0 & $in)) {
3124
            // First octet of 3 octet sequence.
3125
            $mUcs4 = $in;
3126
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
3127 13
            $mState = 2;
3128
            $mBytes = 3;
3129 13 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3130 13
            // First octet of 4 octet sequence.
3131
            $mUcs4 = $in;
3132 13
            $mUcs4 = ($mUcs4 & 0x07) << 18;
3133 1
            $mState = 3;
3134 1
            $mBytes = 4;
3135 1
          } elseif (0xF8 === (0xFC & $in)) {
3136
            /* First octet of 5 octet sequence.
3137 13
            *
3138
            * This is illegal because the encoded codepoint must be either
3139
            * (a) not the shortest form or
3140
            * (b) outside the Unicode range of 0-0x10FFFF.
3141
            * Rather than trying to resynchronize, we will carry on until the end
3142
            * of the sequence and let the later error handling code catch it.
3143
            */
3144
            $mUcs4 = $in;
3145
            $mUcs4 = ($mUcs4 & 0x03) << 24;
3146
            $mState = 4;
3147
            $mBytes = 5;
3148 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3149
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
3150 18
            $mUcs4 = $in;
3151
            $mUcs4 = ($mUcs4 & 1) << 30;
3152 18
            $mState = 5;
3153 18
            $mBytes = 6;
3154
          } else {
3155 18
            /* Current octet is neither in the US-ASCII range nor a legal first
3156
             * octet of a multi-octet sequence.
3157 18
             */
3158
            return false;
3159 2
          }
3160
        } else {
3161 2
          // When mState is non-zero, we expect a continuation of the multi-octet
3162
          // sequence
3163 1
          if (0x80 === (0xC0 & $in)) {
3164 1
            // Legal continuation.
3165
            $shift = ($mState - 1) * 6;
3166 2
            $tmp = $in;
3167 2
            $tmp = ($tmp & 0x0000003F) << $shift;
3168
            $mUcs4 |= $tmp;
3169 18
            /**
3170 18
             * End of the multi-octet sequence. mUcs4 now contains the final
3171 1
             * Unicode code point to be output
3172 1
             */
3173
            if (0 === --$mState) {
3174 18
              /*
3175 18
              * Check for illegal sequences and code points.
3176
              */
3177 18
              // From Unicode 3.1, non-shortest form is illegal
3178
              if (
3179
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
3180
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
3181
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
3182
                  (4 < $mBytes) ||
3183
                  // From Unicode 3.2, surrogate characters are illegal.
3184
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3185
                  // Code points outside the Unicode range are illegal.
3186
                  ($mUcs4 > 0x10FFFF)
3187
              ) {
3188
                return false;
3189
              }
3190
              // initialize UTF8 cache
3191
              $mState = 0;
3192
              $mUcs4 = 0;
3193
              $mBytes = 1;
3194
            }
3195
          } else {
3196
            /**
3197
             *((0xC0 & (*in) != 0x80) && (mState != 0))
3198
             * Incomplete multi-octet sequence.
3199
             */
3200
            return false;
3201
          }
3202
        }
3203
      }
3204
3205
      return true;
3206
    }
3207
  }
3208
3209
  /**
3210
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3211
   * Decodes a JSON string
3212
   *
3213
   * @link http://php.net/manual/en/function.json-decode.php
3214
   *
3215
   * @param string $json    <p>
3216
   *                        The <i>json</i> string being decoded.
3217
   *                        </p>
3218
   *                        <p>
3219
   *                        This function only works with UTF-8 encoded strings.
3220
   *                        </p>
3221
   *                        <p>PHP implements a superset of
3222
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3223
   *                        only supports these values when they are nested inside an array or an object.
3224
   *                        </p>
3225
   * @param bool   $assoc   [optional] <p>
3226
   *                        When <b>TRUE</b>, returned objects will be converted into
3227
   *                        associative arrays.
3228
   *                        </p>
3229
   * @param int    $depth   [optional] <p>
3230 17
   *                        User specified recursion depth.
3231
   *                        </p>
3232 17
   * @param int    $options [optional] <p>
3233 3
   *                        Bitmask of JSON decode options. Currently only
3234
   *                        <b>JSON_BIGINT_AS_STRING</b>
3235
   *                        is supported (default is to cast large integers as floats)
3236 16
   *                        </p>
3237
   *
3238
   * @return mixed the value encoded in <i>json</i> in appropriate
3239
   * PHP type. Values true, false and
3240 16
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3241
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3242
   * <i>json</i> cannot be decoded or if the encoded
3243
   * data is deeper than the recursion limit.
3244
   */
3245 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3246
  {
3247
    $json = (string)self::filter($json);
3248 16
3249 16
    if (Bootup::is_php('5.4') === true) {
3250 15
      $json = json_decode($json, $assoc, $depth, $options);
3251
    } else {
3252
      $json = json_decode($json, $assoc, $depth);
3253 9
    }
3254 9
3255 9
    return $json;
3256
  }
3257 9
3258 1
  /**
3259
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3260
   * Returns the JSON representation of a value.
3261 9
   *
3262 4
   * @link http://php.net/manual/en/function.json-encode.php
3263
   *
3264
   * @param mixed $value   <p>
3265 9
   *                       The <i>value</i> being encoded. Can be any type except
3266 5
   *                       a resource.
3267
   *                       </p>
3268
   *                       <p>
3269 9
   *                       All string data must be UTF-8 encoded.
3270
   *                       </p>
3271
   *                       <p>PHP implements a superset of
3272
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3273
   *                       only supports these values when they are nested inside an array or an object.
3274
   *                       </p>
3275
   * @param int   $options [optional] <p>
3276
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3277
   *                       <b>JSON_HEX_TAG</b>,
3278
   *                       <b>JSON_HEX_AMP</b>,
3279
   *                       <b>JSON_HEX_APOS</b>,
3280
   *                       <b>JSON_NUMERIC_CHECK</b>,
3281
   *                       <b>JSON_PRETTY_PRINT</b>,
3282
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3283
   *                       <b>JSON_FORCE_OBJECT</b>,
3284
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3285 1
   *                       constants is described on
3286
   *                       the JSON constants page.
3287
   *                       </p>
3288 1
   * @param int   $depth   [optional] <p>
3289
   *                       Set the maximum depth. Must be greater than zero.
3290 1
   *                       </p>
3291 1
   *
3292 1
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3293
   */
3294 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3295 1
  {
3296
    $value = self::filter($value);
3297
3298
    if (Bootup::is_php('5.5') === true) {
3299
      $json = json_encode($value, $options, $depth);
3300
    } else {
3301
      $json = json_encode($value, $options);
3302
    }
3303 41
3304
    return $json;
3305
  }
3306 41
3307
  /**
3308
   * Makes string's first char lowercase.
3309
   *
3310
   * @param string $str <p>The input string</p>
3311
   *
3312
   * @return string <p>The resulting string</p>
3313
   */
3314
  public static function lcfirst($str)
3315
  {
3316
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3317 1
  }
3318
3319 1
  /**
3320 1
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3321
   *
3322
   * @param string $str   <p>The string to be trimmed</p>
3323 1
   * @param string $chars <p>Optional characters to be stripped</p>
3324 1
   *
3325 1
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3326
   */
3327 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3328 1
  {
3329
    $str = (string)$str;
3330
3331 1
    if (!isset($str[0])) {
3332
      return '';
3333
    }
3334
3335 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3336 1
    if ($chars === INF || !$chars) {
3337 1
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3338
    }
3339
3340 1
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3341
  }
3342
3343 1
  /**
3344
   * Returns the UTF-8 character with the maximum code point in the given data.
3345
   *
3346
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3347 1
   *
3348
   * @return string <p>The character with the highest code point than others.</p>
3349 1
   */
3350 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3351 1
  {
3352 1
    if (is_array($arg) === true) {
3353 1
      $arg = implode('', $arg);
3354
    }
3355
3356
    return self::chr(max(self::codepoints($arg)));
3357
  }
3358
3359
  /**
3360
   * Calculates and returns the maximum number of bytes taken by any
3361
   * UTF-8 encoded character in the given string.
3362
   *
3363
   * @param string $str <p>The original Unicode string.</p>
3364
   *
3365 5
   * @return int <p>Max byte lengths of the given chars.</p>
3366
   */
3367 5
  public static function max_chr_width($str)
3368
  {
3369
    $bytes = self::chr_size_list($str);
3370
    if (count($bytes) > 0) {
3371
      return (int)max($bytes);
3372
    } else {
3373
      return 0;
3374
    }
3375
  }
3376
3377 10
  /**
3378
   * Checks whether mbstring is available on the server.
3379 10
   *
3380 10
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3381 5
   */
3382 5
  public static function mbstring_loaded()
3383 10
  {
3384
    $return = extension_loaded('mbstring') ? true : false;
3385 10
3386
    if ($return === true) {
3387
      \mb_internal_encoding('UTF-8');
3388
    }
3389
3390
    return $return;
3391
  }
3392
3393
  /**
3394
   * Returns the UTF-8 character with the minimum code point in the given data.
3395
   *
3396 1
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3397
   *
3398 1
   * @return string <p>The character with the lowest code point than others.</p>
3399 1
   */
3400 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3401
  {
3402 1
    if (is_array($arg) === true) {
3403 1
      $arg = implode('', $arg);
3404 1
    }
3405 1
3406 1
    return self::chr(min(self::codepoints($arg)));
3407
  }
3408 1
3409
  /**
3410
   * alias for "UTF8::normalize_encoding()"
3411
   *
3412
   * @see UTF8::normalize_encoding()
3413
   *
3414
   * @param string $encoding
3415
   * @param mixed  $fallback
3416
   *
3417
   * @return string
3418
   *
3419
   * @deprecated
3420
   */
3421
  public static function normalizeEncoding($encoding, $fallback = false)
3422
  {
3423
    return self::normalize_encoding($encoding, $fallback);
3424 45
  }
3425
3426
  /**
3427 45
   * Normalize the encoding-"name" input.
3428
   *
3429
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3430
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3431 45
   *
3432 45
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3433 45
   */
3434 45
  public static function normalize_encoding($encoding, $fallback = false)
3435
  {
3436 45
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3437
3438
    if (!$encoding) {
3439 45
      return $fallback;
3440 45
    }
3441
3442 45
    if ('UTF-8' === $encoding) {
3443
      return $encoding;
3444
    }
3445
3446
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3447
      return $encoding;
3448
    }
3449
3450
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3451
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3452
    }
3453 45
3454
    $encodingOrig = $encoding;
3455 45
    $encoding = strtoupper($encoding);
3456
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3457 45
3458 45
    $equivalences = array(
3459 45
        'ISO88591'    => 'ISO-8859-1',
3460
        'ISO8859'     => 'ISO-8859-1',
3461 45
        'ISO'         => 'ISO-8859-1',
3462 45
        'LATIN1'      => 'ISO-8859-1',
3463 45
        'LATIN'       => 'ISO-8859-1',
3464
        'WIN1252'     => 'ISO-8859-1',
3465 45
        'WINDOWS1252' => 'ISO-8859-1',
3466
        'UTF16'       => 'UTF-16',
3467
        'UTF32'       => 'UTF-32',
3468
        'UTF8'        => 'UTF-8',
3469
        'UTF'         => 'UTF-8',
3470
        'UTF7'        => 'UTF-7',
3471
        '8BIT'        => 'CP850',
3472
        'BINARY'      => 'CP850',
3473
    );
3474
3475
    if (!empty($equivalences[$encodingUpperHelper])) {
3476 23
      $encoding = $equivalences[$encodingUpperHelper];
3477
    }
3478 23
3479
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3480 23
3481 5
    return $encoding;
3482
  }
3483
3484
  /**
3485 19
   * Normalize some MS Word special characters.
3486 3
   *
3487
   * @param string $str <p>The string to be normalized.</p>
3488
   *
3489 18
   * @return string
3490
   */
3491 18 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3492
  {
3493
    // init
3494
    $str = (string)$str;
3495
3496
    if (!isset($str[0])) {
3497
      return '';
3498
    }
3499
3500
    static $UTF8_MSWORD_KEYS_CACHE = null;
3501
    static $UTF8_MSWORD_VALUES_CACHE = null;
3502 52
3503
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3504 52
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3505
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3506 52
    }
3507
3508 52
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3509 40
  }
3510
3511
  /**
3512 18
   * Normalize the whitespace.
3513
   *
3514
   * @param string $str                     <p>The string to be normalized.</p>
3515 18
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3516 17
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3517
   *                                        bidirectional text chars.</p>
3518 17
   *
3519 17
   * @return string
3520 17
   */
3521 2
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3522 2
  {
3523
    // init
3524
    $str = (string)$str;
3525 18
3526
    if (!isset($str[0])) {
3527 18
      return '';
3528 18
    }
3529 18
3530
    static $WHITESPACE_CACHE = array();
3531 18
    $cacheKey = (int)$keepNonBreakingSpace;
3532 18
3533 18
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3534
3535
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3536
3537 18
      if ($keepNonBreakingSpace === true) {
3538
        /** @noinspection OffsetOperationsInspection */
3539 18
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3540
      }
3541
3542
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3543
    }
3544
3545
    if ($keepBidiUnicodeControls === false) {
3546
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3547
3548
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3549
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3550
      }
3551
3552
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3553
    }
3554
3555
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3556
  }
3557
3558
  /**
3559
   * Strip all whitespace characters. This includes tabs and newline
3560 1
   * characters, as well as multibyte whitespace such as the thin space
3561
   * and ideographic space.
3562 1
   *
3563 1
   * @param string $str
3564
   *
3565
   * @return string
3566
   */
3567
  public static function strip_whitespace($str)
3568 1
  {
3569 1
    // init
3570 1
    $str = (string)$str;
3571 1
3572
    if (!isset($str[0])) {
3573
      return '';
3574 1
    }
3575
3576
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3577
  }
3578
3579
  /**
3580
   * Format a number with grouped thousands.
3581
   *
3582
   * @param float  $number
3583
   * @param int    $decimals
3584
   * @param string $dec_point
3585
   * @param string $thousands_sep
3586 36
   *
3587
   * @return string
3588 36
   *    *
3589
   * @deprecated Because this has nothing to do with UTF8. :/
3590 36
   */
3591 2
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3592
  {
3593
    $thousands_sep = (string)$thousands_sep;
3594
    $dec_point = (string)$dec_point;
3595 36
    $number = (float)$number;
3596 36
3597
    if (
3598 36
        isset($thousands_sep[1], $dec_point[1])
3599
        &&
3600
        Bootup::is_php('5.4') === true
3601
    ) {
3602 36
      return str_replace(
3603
          array(
3604 36
              '.',
3605 6
              ',',
3606 6
          ),
3607
          array(
3608 36
              $dec_point,
3609 36
              $thousands_sep,
3610 36
          ),
3611 36
          number_format($number, $decimals, '.', ',')
3612 36
      );
3613
    }
3614 36
3615
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3616
  }
3617
3618
  /**
3619
   * Calculates Unicode code point of the given UTF-8 encoded character.
3620
   *
3621
   * INFO: opposite to UTF8::chr()
3622
   *
3623
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3624
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3625
   *
3626
   * @return int <p>
3627
   *             Unicode code point of the given character,<br />
3628
   *             0 on invalid UTF-8 byte sequence.
3629
   *             </p>
3630
   */
3631
  public static function ord($chr, $encoding = 'UTF-8')
3632
  {
3633
3634
    if ($encoding !== 'UTF-8') {
3635
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3636
3637
      // check again, if it's still not UTF-8
3638
      /** @noinspection NotOptimalIfConditionsInspection */
3639
      if ($encoding !== 'UTF-8') {
3640
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3641
      }
3642
    }
3643
3644
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3645
      self::checkForSupport();
3646 36
    }
3647 5
3648
    if (self::$SUPPORT['intlChar'] === true) {
3649 5
      $tmpReturn = \IntlChar::ord($chr);
3650 5
      if ($tmpReturn) {
3651
        return $tmpReturn;
3652
      }
3653 36
    }
3654
3655
    // use static cache, if there is no support for "\IntlChar"
3656
    static $CHAR_CACHE = array();
3657 36
    if (isset($CHAR_CACHE[$chr]) === true) {
3658
      return $CHAR_CACHE[$chr];
3659
    }
3660
3661
    $chr_orig = $chr;
3662
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3663
    $chr = unpack('C*', self::substr($chr, 0, 4, '8BIT'));
3664
    $code = $chr ? $chr[1] : 0;
3665
3666
    if (0xF0 <= $code && isset($chr[4])) {
3667
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3668
    }
3669
3670 12
    if (0xE0 <= $code && isset($chr[3])) {
3671
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3672
    }
3673
3674
    if (0xC0 <= $code && isset($chr[2])) {
3675
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3676 12
    }
3677 2
3678 1
    return $CHAR_CACHE[$chr_orig] = $code;
3679 2
  }
3680 1
3681 2
  /**
3682
   * Parses the string into an array (into the the second parameter).
3683 2
   *
3684
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3685
   *          if the second parameter is not set!
3686 2
   *
3687
   * @link http://php.net/manual/en/function.parse-str.php
3688
   *
3689
   * @param string  $str       <p>The input string.</p>
3690
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3691
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3692 12
   *
3693 3
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3694
   */
3695
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3696
  {
3697
    if ($cleanUtf8 === true) {
3698
      $str = self::clean($str);
3699
    }
3700 12
3701 9
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3702
    $return = \mb_parse_str($str, $result);
3703
    if ($return === false || empty($result)) {
3704
      return false;
3705
    }
3706
3707
    return true;
3708
  }
3709
3710 6
  /**
3711 6
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3712 6
   *
3713 6
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3714 6
   */
3715 6
  public static function pcre_utf8_support()
3716 6
  {
3717 6
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3718 6
    return (bool)@preg_match('//u', '');
3719 6
  }
3720 6
3721 6
  /**
3722 6
   * Create an array containing a range of UTF-8 characters.
3723 6
   *
3724 6
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3725 6
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3726 6
   *
3727 6
   * @return array
3728 6
   */
3729 6
  public static function range($var1, $var2)
3730 6
  {
3731
    if (!$var1 || !$var2) {
3732 6
      return array();
3733 6
    }
3734 6
3735 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3736
      $start = (int)$var1;
3737
    } elseif (ctype_xdigit($var1)) {
3738
      $start = (int)self::hex_to_int($var1);
3739
    } else {
3740
      $start = self::ord($var1);
3741
    }
3742
3743
    if (!$start) {
3744
      return array();
3745
    }
3746
3747 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3748
      $end = (int)$var2;
3749
    } elseif (ctype_xdigit($var2)) {
3750
      $end = (int)self::hex_to_int($var2);
3751
    } else {
3752
      $end = self::ord($var2);
3753
    }
3754
3755
    if (!$end) {
3756
      return array();
3757
    }
3758
3759
    return array_map(
3760
        array(
3761
            '\\voku\\helper\\UTF8',
3762
            'chr',
3763
        ),
3764
        range($start, $end)
3765
    );
3766
  }
3767
3768
  /**
3769
   * Multi decode html entity & fix urlencoded-win1252-chars.
3770
   *
3771
   * e.g:
3772
   * 'test+test'                     => 'test+test'
3773
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3774
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3775
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3776
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3777
   * 'Düsseldorf'                   => 'Düsseldorf'
3778 14
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3779
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3780 14
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3781
   *
3782
   * @param string $str          <p>The input string.</p>
3783 14
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3784 14
   *
3785 1
   * @return string
3786 1
   */
3787 13 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3788
  {
3789 14
    $str = (string)$str;
3790
3791 14
    if (!isset($str[0])) {
3792 14
      return '';
3793
    }
3794 14
3795
    $pattern = '/%u([0-9a-f]{3,4})/i';
3796
    if (preg_match($pattern, $str)) {
3797
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3798
    }
3799
3800
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3801
3802
    do {
3803
      $str_compare = $str;
3804
3805
      $str = self::fix_simple_utf8(
3806 1
          rawurldecode(
3807
              self::html_entity_decode(
3808 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3809
                  $flags
3810 1
              )
3811
          )
3812
      );
3813
3814 1
    } while ($multi_decode === true && $str_compare !== $str);
3815
3816 1
    return (string)$str;
3817
  }
3818
3819
  /**
3820 1
   * alias for "UTF8::remove_bom()"
3821 1
   *
3822
   * @see UTF8::remove_bom()
3823
   *
3824 1
   * @param string $str
3825 1
   *
3826 1
   * @return string
3827 1
   *
3828
   * @deprecated
3829 1
   */
3830
  public static function removeBOM($str)
3831
  {
3832 1
    return self::remove_bom($str);
3833
  }
3834
3835 1
  /**
3836
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3837
   *
3838
   * @param string $str <p>The input string.</p>
3839
   *
3840
   * @return string <p>String without UTF-BOM</p>
3841
   */
3842
  public static function remove_bom($str)
3843
  {
3844
    $str = (string)$str;
3845
3846
    if (!isset($str[0])) {
3847
      return '';
3848
    }
3849
3850
    foreach (self::$BOM as $bomString => $bomByteLength) {
3851 2
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::substr($str, $bomByteLength, null, '8BIT') on line 3852 can also be of type false; however, voku\helper\UTF8::strpos() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3852
        $str = self::substr($str, $bomByteLength, null, '8BIT');
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::substr($str, $bomByteLength, null, '8BIT') on line 3852 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
3853 2
      }
3854
    }
3855
3856 2
    return $str;
3857 2
  }
3858
3859 2
  /**
3860
   * Removes duplicate occurrences of a string in another string.
3861 2
   *
3862 2
   * @param string          $str  <p>The base string.</p>
3863
   * @param string|string[] $what <p>String to search for in the base string.</p>
3864 2
   *
3865
   * @return string <p>The result string with removed duplicates.</p>
3866
   */
3867 2
  public static function remove_duplicates($str, $what = ' ')
3868 2
  {
3869 2
    if (is_string($what) === true) {
3870 2
      $what = array($what);
3871 2
    }
3872
3873 2
    if (is_array($what) === true) {
3874 2
      /** @noinspection ForeachSourceInspection */
3875 2
      foreach ($what as $item) {
3876 2
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3877 2
      }
3878 2
    }
3879
3880 2
    return $str;
3881 2
  }
3882 2
3883 2
  /**
3884 2
   * Remove invisible characters from a string.
3885 2
   *
3886
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3887 2
   *
3888
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3889
   *
3890 2
   * @param string $str
3891
   * @param bool   $url_encoded
3892
   * @param string $replacement
3893
   *
3894
   * @return string
3895
   */
3896
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3897
  {
3898
    // init
3899
    $non_displayables = array();
3900
3901
    // every control character except newline (dec 10),
3902
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3903
    if ($url_encoded) {
3904
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3905
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3906
    }
3907
3908
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3909
3910
    do {
3911 1
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3912
    } while ($count !== 0);
3913 1
3914
    return $str;
3915 1
  }
3916
3917
  /**
3918
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
3919
   *
3920
   * @param string $str                <p>The input string</p>
3921
   * @param string $replacementChar    <p>The replacement character.</p>
3922
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
3923
   *
3924
   * @return string
3925
   */
3926
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
3927
  {
3928
    $str = (string)$str;
3929
3930
    if (!isset($str[0])) {
3931
      return '';
3932
    }
3933
3934
    if ($processInvalidUtf8 === true) {
3935
      $replacementCharHelper = $replacementChar;
3936
      if ($replacementChar === '') {
3937
        $replacementCharHelper = 'none';
3938
      }
3939
3940
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3941
        self::checkForSupport();
3942
      }
3943
3944
      if (self::$SUPPORT['mbstring'] === false) {
3945
        trigger_error('UTF8::replace_diamond_question_mark() without mbstring cannot handle all chars correctly', E_USER_WARNING);
3946
      }
3947 12
3948
      $save = \mb_substitute_character();
3949 12
      \mb_substitute_character($replacementCharHelper);
3950
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3951
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
3952
      \mb_substitute_character($save);
3953
    }
3954
3955
    return str_replace(
3956
        array(
3957
            "\xEF\xBF\xBD",
3958
            '�',
3959 1
        ),
3960
        array(
3961 1
            $replacementChar,
3962
            $replacementChar,
3963 1
        ),
3964
        $str
3965 1
    );
3966
  }
3967
3968
  /**
3969
   * Strip whitespace or other characters from end of a UTF-8 string.
3970
   *
3971
   * @param string $str   <p>The string to be trimmed.</p>
3972
   * @param string $chars <p>Optional characters to be stripped.</p>
3973
   *
3974
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3975
   */
3976 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3977 1
  {
3978
    $str = (string)$str;
3979 1
3980
    if (!isset($str[0])) {
3981 1
      return '';
3982 1
    }
3983 1
3984
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3985 1
    if ($chars === INF || !$chars) {
3986 1
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3987 1
    }
3988 1
3989
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
3990
  }
3991 1
3992
  /**
3993
   * rxClass
3994
   *
3995
   * @param string $s
3996
   * @param string $class
3997
   *
3998
   * @return string
3999
   */
4000
  private static function rxClass($s, $class = '')
4001
  {
4002 21
    static $RX_CLASSS_CACHE = array();
4003
4004
    $cacheKey = $s . $class;
4005 21
4006 21
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4007
      return $RX_CLASSS_CACHE[$cacheKey];
4008 21
    }
4009 1
4010
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4011
    $class = array($class);
4012 20
4013
    /** @noinspection SuspiciousLoopInspection */
4014
    foreach (self::str_split($s) as $s) {
4015
      if ('-' === $s) {
4016 20
        $class[0] = '-' . $class[0];
4017 20
      } elseif (!isset($s[2])) {
4018
        $class[0] .= preg_quote($s, '/');
4019 20
      } elseif (1 === self::strlen($s)) {
4020 20
        $class[0] .= $s;
4021
      } else {
4022
        $class[] = $s;
4023 1
      }
4024 1
    }
4025
4026
    if ($class[0]) {
4027 1
      $class[0] = '[' . $class[0] . ']';
4028 1
    }
4029 1
4030 1
    if (1 === count($class)) {
4031 1
      $return = $class[0];
4032
    } else {
4033 1
      $return = '(?:' . implode('|', $class) . ')';
4034
    }
4035 1
4036
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4037
4038
    return $return;
4039
  }
4040
4041
  /**
4042
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
4043
   */
4044
  public static function showSupport()
4045 1
  {
4046
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4047 1
      self::checkForSupport();
4048
    }
4049 1
4050
    foreach (self::$SUPPORT as $utf8Support) {
4051 1
      echo $utf8Support . "\n<br>";
4052
    }
4053
  }
4054
4055
  /**
4056
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4057
   *
4058
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4059
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4060
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4061
   *
4062
   * @return string <p>The HTML numbered entity.</p>
4063
   */
4064
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4065 7
  {
4066
    // init
4067 7
    $char = (string)$char;
4068
4069
    if (!isset($char[0])) {
4070
      return '';
4071
    }
4072
4073
    if (
4074
        $keepAsciiChars === true
4075
        &&
4076
        self::is_ascii($char) === true
4077
    ) {
4078
      return $char;
4079
    }
4080
4081
    if ($encoding !== 'UTF-8') {
4082
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4083 1
    }
4084
4085 1
    return '&#' . self::ord($char, $encoding) . ';';
4086 1
  }
4087
4088 1
  /**
4089
   * Convert a string to an array of Unicode characters.
4090 1
   *
4091
   * @param string  $str       <p>The string to split into array.</p>
4092 1
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4093 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4094 1
   *
4095 1
   * @return string[] <p>An array containing chunks of the string.</p>
4096
   */
4097 1
  public static function split($str, $length = 1, $cleanUtf8 = false)
4098
  {
4099 1
    $str = (string)$str;
4100 1
4101 1
    if (!isset($str[0])) {
4102 1
      return array();
4103 1
    }
4104 1
4105
    // init
4106 1
    $ret = array();
4107
4108 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4109
      self::checkForSupport();
4110
    }
4111
4112 1
    if (self::$SUPPORT['pcre_utf8'] === true) {
4113
4114
      if ($cleanUtf8 === true) {
4115
        $str = self::clean($str);
4116
      }
4117
4118
      preg_match_all('/./us', $str, $retArray);
4119
      if (isset($retArray[0])) {
4120
        $ret = $retArray[0];
4121
      }
4122
      unset($retArray);
4123
4124
    } else {
4125
4126
      // fallback
4127
4128
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4129 9
        self::checkForSupport();
4130
      }
4131 9
4132 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4133
        $len = \mb_strlen($str, '8BIT');
4134
      } else {
4135
        $len = strlen($str);
4136
      }
4137
4138
      /** @noinspection ForeachInvariantsInspection */
4139
      for ($i = 0; $i < $len; $i++) {
4140
4141
        if (($str[$i] & "\x80") === "\x00") {
4142
4143
          $ret[] = $str[$i];
4144
4145
        } elseif (
4146
            isset($str[$i + 1])
4147 1
            &&
4148
            ($str[$i] & "\xE0") === "\xC0"
4149 1
        ) {
4150
4151
          if (($str[$i + 1] & "\xC0") === "\x80") {
4152
            $ret[] = $str[$i] . $str[$i + 1];
4153
4154
            $i++;
4155
          }
4156
4157 View Code Duplication
        } elseif (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4158
            isset($str[$i + 2])
4159
            &&
4160
            ($str[$i] & "\xF0") === "\xE0"
4161
        ) {
4162
4163
          if (
4164 12
              ($str[$i + 1] & "\xC0") === "\x80"
4165
              &&
4166 12
              ($str[$i + 2] & "\xC0") === "\x80"
4167 11
          ) {
4168 11
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4169 12
4170
            $i += 2;
4171
          }
4172
4173
        } elseif (
4174
            isset($str[$i + 3])
4175
            &&
4176
            ($str[$i] & "\xF8") === "\xF0"
4177
        ) {
4178
4179 View Code Duplication
          if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4180
              ($str[$i + 1] & "\xC0") === "\x80"
4181
              &&
4182 9
              ($str[$i + 2] & "\xC0") === "\x80"
4183
              &&
4184 9
              ($str[$i + 3] & "\xC0") === "\x80"
4185 1
          ) {
4186
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4187
4188 8
            $i += 3;
4189 2
          }
4190 2
4191
        }
4192 8
      }
4193 8
    }
4194 1
4195
    if ($length > 1) {
4196
      $ret = array_chunk($ret, $length);
4197 7
4198
      return array_map(
4199 7
          function ($item) {
4200
            return implode('', $item);
4201
          }, $ret
4202 1
      );
4203
    }
4204
4205
    /** @noinspection OffsetOperationsInspection */
4206
    if (isset($ret[0]) && $ret[0] === '') {
4207
      return array();
4208
    }
4209
4210
    return $ret;
4211
  }
4212
4213
  /**
4214
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4215
   *
4216
   * @param string $str <p>The input string.</p>
4217
   *
4218 1
   * @return false|string <p>
4219
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4220 1
   *                      otherwise it will return false.
4221
   *                      </p>
4222
   */
4223
  public static function str_detect_encoding($str)
4224
  {
4225
    //
4226
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4227
    //
4228
4229
    if (self::is_binary($str) === true) {
4230
      if (self::is_utf16($str) === 1) {
4231
        return 'UTF-16LE';
4232 2
      } elseif (self::is_utf16($str) === 2) {
4233
        return 'UTF-16BE';
4234 2
      } elseif (self::is_utf32($str) === 1) {
4235 2
        return 'UTF-32LE';
4236
      } elseif (self::is_utf32($str) === 2) {
4237 2
        return 'UTF-32BE';
4238 2
      }
4239 2
    }
4240
4241 2
    //
4242 2
    // 2.) simple check for ASCII chars
4243
    //
4244
4245
    if (self::is_ascii($str) === true) {
4246
      return 'ASCII';
4247
    }
4248
4249
    //
4250
    // 3.) simple check for UTF-8 chars
4251
    //
4252 3
4253
    if (self::is_utf8($str) === true) {
4254 3
      return 'UTF-8';
4255 3
    }
4256 3
4257
    //
4258 3
    // 4.) check via "\mb_detect_encoding()"
4259
    //
4260 3
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4261
4262
    $detectOrder = array(
4263
        'ISO-8859-1',
4264
        'ISO-8859-2',
4265
        'ISO-8859-3',
4266
        'ISO-8859-4',
4267
        'ISO-8859-5',
4268
        'ISO-8859-6',
4269
        'ISO-8859-7',
4270
        'ISO-8859-8',
4271
        'ISO-8859-9',
4272
        'ISO-8859-10',
4273
        'ISO-8859-13',
4274
        'ISO-8859-14',
4275
        'ISO-8859-15',
4276
        'ISO-8859-16',
4277
        'WINDOWS-1251',
4278
        'WINDOWS-1252',
4279
        'WINDOWS-1254',
4280
        'ISO-2022-JP',
4281
        'JIS',
4282 2
        'EUC-JP',
4283
    );
4284
4285 2
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4286
    if ($encoding) {
4287 2
      return $encoding;
4288
    }
4289
4290
    //
4291
    // 5.) check via "iconv()"
4292
    //
4293
4294
    $md5 = md5($str);
4295
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4296
      # INFO: //IGNORE and //TRANSLIT still throw notice
4297
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4298
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4299
        return $encodingTmp;
4300
      }
4301
    }
4302
4303
    return false;
4304
  }
4305
4306
  /**
4307
   * Check if the string ends with the given substring.
4308
   *
4309
   * @param string $haystack <p>The string to search in.</p>
4310
   * @param string $needle   <p>The substring to search for.</p>
4311
   *
4312
   * @return bool
4313
   */
4314 8 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4315
  {
4316 8
    $haystack = (string)$haystack;
4317 8
    $needle = (string)$needle;
4318
4319 8
    if (!isset($haystack[0], $needle[0])) {
4320 3
      return false;
4321
    }
4322
4323 7
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
4324 1
      return true;
4325 1
    }
4326 1
4327
    return false;
4328
  }
4329
4330 7
  /**
4331 1
   * Check if the string ends with the given substring, case insensitive.
4332 7
   *
4333 7
   * @param string $haystack <p>The string to search in.</p>
4334 7
   * @param string $needle   <p>The substring to search for.</p>
4335
   *
4336
   * @return bool
4337
   */
4338 7 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4339
  {
4340
    $haystack = (string)$haystack;
4341
    $needle = (string)$needle;
4342
4343
    if (!isset($haystack[0], $needle[0])) {
4344
      return false;
4345
    }
4346
4347
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4348
      return true;
4349
    }
4350
4351
    return false;
4352
  }
4353
4354
  /**
4355 8
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4356
   *
4357 8
   * @link  http://php.net/manual/en/function.str-ireplace.php
4358 2
   *
4359
   * @param mixed $search  <p>
4360
   *                       Every replacement with search array is
4361 6
   *                       performed on the result of previous replacement.
4362
   *                       </p>
4363
   * @param mixed $replace <p>
4364
   *                       </p>
4365 6
   * @param mixed $subject <p>
4366
   *                       If subject is an array, then the search and
4367
   *                       replace is performed with every entry of
4368
   *                       subject, and the return value is an array as
4369
   *                       well.
4370
   *                       </p>
4371
   * @param int   $count   [optional] <p>
4372 6
   *                       The number of matched and replaced needles will
4373
   *                       be returned in count which is passed by
4374
   *                       reference.
4375
   *                       </p>
4376
   *
4377
   * @return mixed <p>A string or an array of replacements.</p>
4378
   */
4379
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4380
  {
4381
    $search = (array)$search;
4382
4383
    /** @noinspection AlterInForeachInspection */
4384
    foreach ($search as &$s) {
4385
      if ('' === $s .= '') {
4386
        $s = '/^(?<=.)$/';
4387 62
      } else {
4388
        $s = '/' . preg_quote($s, '/') . '/ui';
4389 62
      }
4390
    }
4391 62
4392 4
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4393
    $count = $replace; // used as reference parameter
4394
4395
    return $subject;
4396
  }
4397 61
4398 2
  /**
4399 61
   * Check if the string starts with the given substring, case insensitive.
4400 60
   *
4401 60
   * @param string $haystack <p>The string to search in.</p>
4402 2
   * @param string $needle   <p>The substring to search for.</p>
4403
   *
4404
   * @return bool
4405
   */
4406 61 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4407 61
  {
4408 1
    $haystack = (string)$haystack;
4409
    $needle = (string)$needle;
4410
4411 61
    if (!isset($haystack[0], $needle[0])) {
4412 2
      return false;
4413 2
    }
4414
4415 61
    if (self::stripos($haystack, $needle) === 0) {
4416
      return true;
4417
    }
4418
4419
    return false;
4420
  }
4421
4422
  /**
4423
   * Limit the number of characters in a string, but also after the next word.
4424
   *
4425
   * @param string $str
4426
   * @param int    $length
4427
   * @param string $strAddOn
4428
   *
4429
   * @return string
4430 1
   */
4431
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4432 1
  {
4433
    $str = (string)$str;
4434
4435
    if (!isset($str[0])) {
4436
      return '';
4437
    }
4438
4439
    $length = (int)$length;
4440
4441
    if (self::strlen($str) <= $length) {
4442
      return $str;
4443
    }
4444
4445
    if (self::substr($str, $length - 1, 1) === ' ') {
4446
      return self::substr($str, 0, $length - 1) . $strAddOn;
4447
    }
4448
4449 2
    $str = self::substr($str, 0, $length);
4450
    $array = explode(' ', $str);
4451 2
    array_pop($array);
4452
    $new_str = implode(' ', $array);
4453
4454
    if ($new_str === '') {
4455
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4456
    } else {
4457
      $str = $new_str . $strAddOn;
4458
    }
4459
4460
    return $str;
4461
  }
4462
4463
  /**
4464
   * Pad a UTF-8 string to given length with another string.
4465
   *
4466
   * @param string $str        <p>The input string.</p>
4467 1
   * @param int    $pad_length <p>The length of return string.</p>
4468
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4469 1
   * @param int    $pad_type   [optional] <p>
4470
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4471
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4472
   *                           </p>
4473
   *
4474
   * @return string <strong>Returns the padded string</strong>
4475
   */
4476
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4477
  {
4478
    $str_length = self::strlen($str);
4479
4480
    if (
4481
        is_int($pad_length) === true
4482
        &&
4483
        $pad_length > 0
4484
        &&
4485 2
        $pad_length >= $str_length
4486
    ) {
4487 2
      $ps_length = self::strlen($pad_string);
4488 2
4489
      $diff = $pad_length - $str_length;
4490 2
4491
      switch ($pad_type) {
4492 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4493
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4494
          $pre = self::substr($pre, 0, $diff);
4495
          $post = '';
4496
          break;
4497
4498
        case STR_PAD_BOTH:
4499
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4500
          $pre = self::substr($pre, 0, (int)$diff / 2);
4501
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4502
          $post = self::substr($post, 0, (int)ceil($diff / 2));
4503 1
          break;
4504
4505 1
        case STR_PAD_RIGHT:
4506 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4507
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4508 1
          $post = self::substr($post, 0, $diff);
4509 1
          $pre = '';
4510
      }
4511
4512 1
      return $pre . $str . $post;
4513 1
    }
4514
4515 1
    return $str;
4516
  }
4517
4518
  /**
4519
   * Repeat a string.
4520
   *
4521
   * @param string $str        <p>
4522
   *                           The string to be repeated.
4523
   *                           </p>
4524
   * @param int    $multiplier <p>
4525
   *                           Number of time the input string should be
4526
   *                           repeated.
4527
   *                           </p>
4528
   *                           <p>
4529
   *                           multiplier has to be greater than or equal to 0.
4530
   *                           If the multiplier is set to 0, the function
4531
   *                           will return an empty string.
4532
   *                           </p>
4533
   *
4534
   * @return string <p>The repeated string.</p>
4535 15
   */
4536
  public static function str_repeat($str, $multiplier)
4537 15
  {
4538 15
    $str = self::filter($str);
4539
4540 15
    return str_repeat($str, $multiplier);
4541 2
  }
4542
4543
  /**
4544
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4545 14
   *
4546
   * Replace all occurrences of the search string with the replacement string
4547
   *
4548
   * @link http://php.net/manual/en/function.str-replace.php
4549 14
   *
4550
   * @param mixed $search  <p>
4551
   *                       The value being searched for, otherwise known as the needle.
4552
   *                       An array may be used to designate multiple needles.
4553 14
   *                       </p>
4554
   * @param mixed $replace <p>
4555
   *                       The replacement value that replaces found search
4556 2
   *                       values. An array may be used to designate multiple replacements.
4557 2
   *                       </p>
4558 2
   * @param mixed $subject <p>
4559
   *                       The string or array being searched and replaced on,
4560 14
   *                       otherwise known as the haystack.
4561
   *                       </p>
4562
   *                       <p>
4563
   *                       If subject is an array, then the search and
4564
   *                       replace is performed with every entry of
4565
   *                       subject, and the return value is an array as
4566 14
   *                       well.
4567 2
   *                       </p>
4568 14
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4569 14
   *
4570 14
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4571 1
   */
4572
  public static function str_replace($search, $replace, $subject, &$count = null)
4573
  {
4574 14
    return str_replace($search, $replace, $subject, $count);
4575 14
  }
4576
4577
  /**
4578
   * Replace the first "$search"-term with the "$replace"-term.
4579
   *
4580
   * @param string $search
4581
   * @param string $replace
4582
   * @param string $subject
4583
   *
4584
   * @return string
4585
   */
4586
  public static function str_replace_first($search, $replace, $subject)
4587
  {
4588
    $pos = self::strpos($subject, $search);
4589
4590
    if ($pos !== false) {
4591
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4592
    }
4593
4594
    return $subject;
4595
  }
4596
4597
  /**
4598
   * Shuffles all the characters in the string.
4599
   *
4600
   * @param string $str <p>The input string</p>
4601
   *
4602
   * @return string <p>The shuffled string.</p>
4603
   */
4604
  public static function str_shuffle($str)
4605
  {
4606
    $array = self::split($str);
4607
4608
    shuffle($array);
4609
4610
    return implode('', $array);
4611
  }
4612
4613
  /**
4614
   * Sort all characters according to code points.
4615
   *
4616
   * @param string $str    <p>A UTF-8 string.</p>
4617
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4618
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4619
   *
4620 1
   * @return string <p>String of sorted characters.</p>
4621
   */
4622 1
  public static function str_sort($str, $unique = false, $desc = false)
4623 1
  {
4624 1
    $array = self::codepoints($str);
4625
4626 1
    if ($unique) {
4627
      $array = array_flip(array_flip($array));
4628
    }
4629
4630
    if ($desc) {
4631
      arsort($array);
4632
    } else {
4633 1
      asort($array);
4634
    }
4635
4636
    return self::string($array);
4637
  }
4638
4639
  /**
4640
   * Split a string into an array.
4641
   *
4642
   * @param string $str
4643 4
   * @param int    $len
4644
   *
4645 4
   * @return array
4646
   */
4647 4
  public static function str_split($str, $len = 1)
4648 2
  {
4649
    // init
4650
    $len = (int)$len;
4651 3
    $str = (string)$str;
4652
4653
    if (!isset($str[0])) {
4654
      return array();
4655
    }
4656
4657
    if ($len < 1) {
4658
      return str_split($str, $len);
4659
    }
4660
4661
    /** @noinspection PhpInternalEntityUsedInspection */
4662
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4663
    $a = $a[0];
4664
4665
    if ($len === 1) {
4666
      return $a;
4667
    }
4668
4669
    $arrayOutput = array();
4670
    $p = -1;
4671
4672
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4673
    foreach ($a as $l => $a) {
4674
      if ($l % $len) {
4675
        $arrayOutput[$p] .= $a;
4676
      } else {
4677 1
        $arrayOutput[++$p] = $a;
4678
      }
4679 1
    }
4680 1
4681 1
    return $arrayOutput;
4682
  }
4683 1
4684
  /**
4685
   * Check if the string starts with the given substring.
4686
   *
4687
   * @param string $haystack <p>The string to search in.</p>
4688
   * @param string $needle   <p>The substring to search for.</p>
4689
   *
4690 1
   * @return bool
4691
   */
4692 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4693
  {
4694
    $haystack = (string)$haystack;
4695
    $needle = (string)$needle;
4696
4697
    if (!isset($haystack[0], $needle[0])) {
4698
      return false;
4699
    }
4700
4701
    if (self::strpos($haystack, $needle) === 0) {
4702
      return true;
4703
    }
4704
4705
    return false;
4706
  }
4707 1
4708
  /**
4709 1
   * Get a binary representation of a specific string.
4710
   *
4711
   * @param string $str <p>The input string.</p>
4712
   *
4713
   * @return string
4714
   */
4715
  public static function str_to_binary($str)
4716
  {
4717
    $str = (string)$str;
4718
4719
    $value = unpack('H*', $str);
4720
4721
    return base_convert($value[1], 16, 2);
4722
  }
4723
4724
  /**
4725
   * Convert a string into an array of words.
4726
   *
4727
   * @param string $str
4728
   * @param string $charlist
4729 11
   *
4730
   * @return array
4731 11
   */
4732
  public static function str_to_words($str, $charlist = '')
4733 11
  {
4734 2
    $str = (string)$str;
4735 2
4736
    if (!isset($str[0])) {
4737 11
      return array('');
4738
    }
4739 11
4740 2
    $charlist = self::rxClass($charlist, '\pL');
4741
4742
    return \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4743
  }
4744 10
4745 10
  /**
4746
   * alias for "UTF8::to_ascii()"
4747
   *
4748
   * @see UTF8::to_ascii()
4749 10
   *
4750
   * @param string $str
4751 10
   * @param string $unknown
4752
   * @param bool   $strict
4753
   *
4754 3
   * @return string
4755 3
   */
4756 3
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4757
  {
4758 10
    return self::to_ascii($str, $unknown, $strict);
4759
  }
4760
4761
  /**
4762
   * Counts number of words in the UTF-8 string.
4763
   *
4764 10
   * @param string $str      <p>The input string.</p>
4765 1
   * @param int    $format   [optional] <p>
4766 10
   *                         <strong>0</strong> => return a number of words (default)<br />
4767 10
   *                         <strong>1</strong> => return an array of words<br />
4768 10
   *                         <strong>2</strong> => return an array of words with word-offset as key
4769 1
   *                         </p>
4770
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4771
   *
4772
   * @return array|int <p>The number of words in the string</p>
4773
   */
4774 10
  public static function str_word_count($str, $format = 0, $charlist = '')
4775 10
  {
4776 10
    $strParts = self::str_to_words($str, $charlist);
4777 10
4778
    $len = count($strParts);
4779
4780
    if ($format === 1) {
4781
4782
      $numberOfWords = array();
4783
      for ($i = 1; $i < $len; $i += 2) {
4784
        $numberOfWords[] = $strParts[$i];
4785
      }
4786
4787
    } elseif ($format === 2) {
4788
4789
      $numberOfWords = array();
4790
      $offset = self::strlen($strParts[0]);
4791
      for ($i = 1; $i < $len; $i += 2) {
4792
        $numberOfWords[$offset] = $strParts[$i];
4793
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4794
      }
4795
4796
    } else {
4797
4798
      $numberOfWords = ($len - 1) / 2;
4799
4800
    }
4801
4802
    return $numberOfWords;
4803
  }
4804
4805
  /**
4806
   * Case-insensitive string comparison.
4807
   *
4808
   * INFO: Case-insensitive version of UTF8::strcmp()
4809
   *
4810
   * @param string $str1
4811
   * @param string $str2
4812
   *
4813 10
   * @return int <p>
4814
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4815
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4816 10
   *             <strong>0</strong> if they are equal.
4817 10
   *             </p>
4818
   */
4819 10
  public static function strcasecmp($str1, $str2)
4820 2
  {
4821 2
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4822
  }
4823 10
4824 10
  /**
4825 2
   * alias for "UTF8::strstr()"
4826
   *
4827
   * @see UTF8::strstr()
4828 8
   *
4829
   * @param string  $haystack
4830
   * @param string  $needle
4831
   * @param bool    $before_needle
4832
   * @param string  $encoding
4833
   * @param boolean $cleanUtf8
4834
   *
4835
   * @return string|false
4836
   */
4837
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4838
  {
4839
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4840
  }
4841
4842
  /**
4843
   * Case-sensitive string comparison.
4844
   *
4845 2
   * @param string $str1
4846
   * @param string $str2
4847 2
   *
4848
   * @return int  <p>
4849
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4850
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4851
   *              <strong>0</strong> if they are equal.
4852
   *              </p>
4853
   */
4854 2
  public static function strcmp($str1, $str2)
4855 1
  {
4856 1
    /** @noinspection PhpUndefinedClassInspection */
4857
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4858
        \Normalizer::normalize($str1, \Normalizer::NFD),
4859
        \Normalizer::normalize($str2, \Normalizer::NFD)
4860 2
    );
4861 2
  }
4862 2
4863 2
  /**
4864
   * Find length of initial segment not matching mask.
4865
   *
4866
   * @param string $str
4867
   * @param string $charList
4868
   * @param int    $offset
4869
   * @param int    $length
4870
   *
4871
   * @return int|null
4872
   */
4873
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4874
  {
4875
    if ('' === $charList .= '') {
4876
      return null;
4877
    }
4878
4879
    if ($offset || 2147483647 !== $length) {
4880
      $str = (string)self::substr($str, $offset, $length);
4881
    }
4882 11
4883
    $str = (string)$str;
4884 11
    if (!isset($str[0])) {
4885 11
      return null;
4886 11
    }
4887
4888 11
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4889 1
      /** @noinspection OffsetOperationsInspection */
4890 1
      return self::strlen($length[1]);
4891 1
    }
4892
4893 11
    return self::strlen($str);
4894
  }
4895 11
4896
  /**
4897 11
   * alias for "UTF8::stristr()"
4898 1
   *
4899 1
   * @see UTF8::stristr()
4900
   *
4901
   * @param string  $haystack
4902 11
   * @param string  $needle
4903 11
   * @param bool    $before_needle
4904
   * @param string  $encoding
4905 11
   * @param boolean $cleanUtf8
4906
   *
4907 11
   * @return string|false
4908
   */
4909
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4910
  {
4911
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4912
  }
4913
4914
  /**
4915
   * Create a UTF-8 string from code points.
4916
   *
4917
   * INFO: opposite to UTF8::codepoints()
4918
   *
4919
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4920
   *
4921 21
   * @return string <p>UTF-8 encoded string.</p>
4922
   */
4923
  public static function string(array $array)
4924 21
  {
4925
    return implode(
4926 21
        '',
4927 6
        array_map(
4928
            array(
4929
                '\\voku\\helper\\UTF8',
4930 19
                'chr',
4931
            ),
4932
            $array
4933
        )
4934
    );
4935
  }
4936 19
4937 2
  /**
4938 2
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4939
   *
4940 19
   * @param string $str <p>The input string.</p>
4941
   *
4942
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4943
   */
4944
  public static function string_has_bom($str)
4945
  {
4946
    foreach (self::$BOM as $bomString => $bomByteLength) {
4947
      if (0 === strpos($str, $bomString)) {
4948
        return true;
4949
      }
4950 3
    }
4951
4952 3
    return false;
4953
  }
4954
4955
  /**
4956
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4957
   *
4958
   * @link http://php.net/manual/en/function.strip-tags.php
4959
   *
4960
   * @param string  $str            <p>
4961
   *                                The input string.
4962
   *                                </p>
4963
   * @param string  $allowable_tags [optional] <p>
4964
   *                                You can use the optional second parameter to specify tags which should
4965
   *                                not be stripped.
4966 16
   *                                </p>
4967
   *                                <p>
4968 16
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
4969
   *                                can not be changed with allowable_tags.
4970 16
   *                                </p>
4971 2
   * @param boolean $cleanUtf8      [optional] <p>Clean non UTF-8 chars from the string.</p>
4972
   *
4973
   * @return string <p>The stripped string.</p>
4974 15
   */
4975
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
4976
  {
4977
    $str = (string)$str;
4978
4979
    if (!isset($str[0])) {
4980 15
      return '';
4981 2
    }
4982 2
4983
    if ($cleanUtf8) {
4984 15
      $str = self::clean($str);
4985
    }
4986
4987
    return strip_tags($str, $allowable_tags);
4988
  }
4989
4990
  /**
4991
   * Finds position of first occurrence of a string within another, case insensitive.
4992
   *
4993
   * @link http://php.net/manual/en/function.mb-stripos.php
4994
   *
4995
   * @param string  $haystack  <p>
4996
   *                           The string from which to get the position of the first occurrence
4997
   *                           of needle
4998
   *                           </p>
4999
   * @param string  $needle    <p>
5000
   *                           The string to find in haystack
5001 1
   *                           </p>
5002
   * @param int     $offset    [optional] <p>
5003 1
   *                           The position in haystack
5004 1
   *                           to start searching
5005 1
   *                           </p>
5006 1
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5007 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5008
   *
5009 1
   * @return int|false <p>
5010 1
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
5011 1
   *                   or false if needle is not found.
5012 1
   *                   </p>
5013 1
   */
5014
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5015 1
  {
5016 1
    $haystack = (string)$haystack;
5017
    $needle = (string)$needle;
5018 1
    $offset = (int)$offset;
5019
5020
    if (!isset($haystack[0], $needle[0])) {
5021
      return false;
5022
    }
5023
5024
    if ($cleanUtf8 === true) {
5025
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5026
      // if invalid characters are found in $haystack before $needle
5027
      $haystack = self::clean($haystack);
5028
      $needle = self::clean($needle);
5029
    }
5030 1
5031 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5032 1
        $encoding === 'UTF-8'
5033 1
        ||
5034 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5035
    ) {
5036 1
      $encoding = 'UTF-8';
5037
    } else {
5038
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5039
    }
5040 1
5041 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5042
      self::checkForSupport();
5043 1
    }
5044
5045
    if (
5046
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5047
        &&
5048
        self::$SUPPORT['intl'] === true
5049
        &&
5050
        Bootup::is_php('5.4') === true
5051
    ) {
5052
      return \grapheme_stripos($haystack, $needle, $offset);
5053
    }
5054
5055
    // fallback to "mb_"-function via polyfill
5056
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5057
  }
5058
5059 47
  /**
5060
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5061
   *
5062 47
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5063
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5064 47
   * @param bool    $before_needle [optional] <p>
5065 9
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5066
   *                               haystack before the first occurrence of the needle (excluding the needle).
5067
   *                               </p>
5068 45
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5069
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5070
   *
5071
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5072 1
   */
5073 1 View Code Duplication
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5074
  {
5075 45
    $haystack = (string)$haystack;
5076 45
    $needle = (string)$needle;
5077 37
    $before_needle = (bool)$before_needle;
5078 37
5079
    if (!isset($haystack[0], $needle[0])) {
5080 45
      return false;
5081 2
    }
5082
5083
    if ($encoding !== 'UTF-8') {
5084 43
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5085 20
    }
5086 20
5087 41
    if ($cleanUtf8 === true) {
5088
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5089
      // if invalid characters are found in $haystack before $needle
5090 43
      $needle = self::clean($needle);
5091
      $haystack = self::clean($haystack);
5092
    }
5093
5094
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5095
      self::checkForSupport();
5096 43
    }
5097 2
5098 43
    if (
5099 43
        $encoding !== 'UTF-8'
5100 43
        &&
5101 1
        self::$SUPPORT['mbstring'] === false
5102
    ) {
5103
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5104 43
    }
5105 43
5106
    if (self::$SUPPORT['mbstring'] === true) {
5107
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5108
    }
5109
5110
    if (
5111
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5112
        &&
5113
        self::$SUPPORT['intl'] === true
5114
        &&
5115
        Bootup::is_php('5.4') === true
5116
    ) {
5117
      return \grapheme_stristr($haystack, $needle, $before_needle);
5118
    }
5119
5120
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5121
5122
    if (!isset($match[1])) {
5123
      return false;
5124
    }
5125
5126
    if ($before_needle) {
5127
      return $match[1];
5128
    }
5129
5130
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5091 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5131
  }
5132
5133
  /**
5134
   * Get the string length, not the byte-length!
5135 1
   *
5136
   * @link     http://php.net/manual/en/function.mb-strlen.php
5137 1
   *
5138 1
   * @param string  $str       <p>The string being checked for length.</p>
5139
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5140 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5141
   *
5142
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5143
   *             character counted as +1)</p>
5144
   */
5145
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5146
  {
5147
    $str = (string)$str;
5148
5149
    if (!isset($str[0])) {
5150
      return 0;
5151
    }
5152
5153 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5154
        $encoding === 'UTF-8'
5155
        ||
5156
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5157
    ) {
5158
      $encoding = 'UTF-8';
5159
    } else {
5160
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5161 1
    }
5162
5163 1
    switch ($encoding) {
5164 1
      case 'ASCII':
5165
      case 'CP850':
5166 1
        if (
5167 1
            $encoding === 'CP850'
5168
            &&
5169
            self::$SUPPORT['mbstring_func_overload'] === false
5170 1
        ) {
5171 1
          return strlen($str);
5172 1
        } else {
5173
          return \mb_strlen($str, '8BIT');
5174 1
        }
5175 1
    }
5176
5177
    if ($cleanUtf8 === true) {
5178 1
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5179 1
      // if invalid characters are found in $str
5180
      $str = self::clean($str);
5181 1
    }
5182 1
5183 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5184
      self::checkForSupport();
5185 1
    }
5186
5187 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5188
        $encoding !== 'UTF-8'
5189
        &&
5190
        self::$SUPPORT['mbstring'] === false
5191
        &&
5192 1
        self::$SUPPORT['iconv'] === false
5193
    ) {
5194
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5195
    }
5196
5197
    if (
5198
        $encoding !== 'UTF-8'
5199
        &&
5200
        self::$SUPPORT['iconv'] === true
5201
        &&
5202
        self::$SUPPORT['mbstring'] === false
5203
    ) {
5204
      return \iconv_strlen($str, $encoding);
5205
    }
5206
5207 6
    if (self::$SUPPORT['mbstring'] === true) {
5208
      return \mb_strlen($str, $encoding);
5209 6
    }
5210 1
5211
    if (self::$SUPPORT['iconv'] === true) {
5212
      return \iconv_strlen($str, $encoding);
5213 1
    }
5214 1
5215 1
    if (
5216 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5217
        &&
5218
        self::$SUPPORT['intl'] === true
5219
        &&
5220 1
        Bootup::is_php('5.4') === true
5221 1
    ) {
5222 1
      return \grapheme_strlen($str);
5223 1
    }
5224 1
5225 1
    // fallback via vanilla php
5226 1
    preg_match_all('/./us', $str, $parts);
5227 1
    $returnTmp = count($parts[0]);
5228
    if ($returnTmp !== 0) {
5229
      return $returnTmp;
5230
    }
5231 1
5232 1
    // fallback to "mb_"-function via polyfill
5233 1
    return \mb_strlen($str, $encoding);
5234 1
  }
5235 1
5236 1
  /**
5237 1
   * Case insensitive string comparisons using a "natural order" algorithm.
5238 1
   *
5239
   * INFO: natural order version of UTF8::strcasecmp()
5240
   *
5241 1
   * @param string $str1 <p>The first string.</p>
5242 1
   * @param string $str2 <p>The second string.</p>
5243 1
   *
5244 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5245
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5246
   *             <strong>0</strong> if they are equal
5247
   */
5248 1
  public static function strnatcasecmp($str1, $str2)
5249
  {
5250 6
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5251 1
  }
5252 1
5253 1
  /**
5254 1
   * String comparisons using a "natural order" algorithm
5255
   *
5256 1
   * INFO: natural order version of UTF8::strcmp()
5257
   *
5258
   * @link  http://php.net/manual/en/function.strnatcmp.php
5259 6
   *
5260 6
   * @param string $str1 <p>The first string.</p>
5261
   * @param string $str2 <p>The second string.</p>
5262 6
   *
5263 4
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5264 4
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5265
   *             <strong>0</strong> if they are equal
5266 6
   */
5267
  public static function strnatcmp($str1, $str2)
5268 6
  {
5269
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5270
  }
5271
5272
  /**
5273
   * Case-insensitive string comparison of the first n characters.
5274
   *
5275
   * @link  http://php.net/manual/en/function.strncasecmp.php
5276
   *
5277
   * @param string $str1 <p>The first string.</p>
5278
   * @param string $str2 <p>The second string.</p>
5279
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5280 1
   *
5281
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5282 1
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5283
   *             <strong>0</strong> if they are equal
5284 1
   */
5285 1
  public static function strncasecmp($str1, $str2, $len)
5286
  {
5287
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5288 1
  }
5289 1
5290 1
  /**
5291
   * String comparison of the first n characters.
5292 1
   *
5293
   * @link  http://php.net/manual/en/function.strncmp.php
5294
   *
5295 1
   * @param string $str1 <p>The first string.</p>
5296 1
   * @param string $str2 <p>The second string.</p>
5297
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5298 1
   *
5299 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5300
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5301 1
   *             <strong>0</strong> if they are equal
5302
   */
5303 1
  public static function strncmp($str1, $str2, $len)
5304 1
  {
5305
    $str1 = self::substr($str1, 0, $len);
5306 1
    $str2 = self::substr($str2, 0, $len);
5307
5308 1
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 5305 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 5306 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5309
  }
5310 1
5311
  /**
5312 1
   * Search a string for any of a set of characters.
5313
   *
5314
   * @link  http://php.net/manual/en/function.strpbrk.php
5315
   *
5316
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5317
   * @param string $char_list <p>This parameter is case sensitive.</p>
5318
   *
5319
   * @return string String starting from the character found, or false if it is not found.
5320
   */
5321
  public static function strpbrk($haystack, $char_list)
5322
  {
5323
    $haystack = (string)$haystack;
5324
    $char_list = (string)$char_list;
5325
5326 7
    if (!isset($haystack[0], $char_list[0])) {
5327
      return false;
5328 7
    }
5329
5330
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5331
      return substr($haystack, strpos($haystack, $m[0]));
5332
    } else {
5333
      return false;
5334
    }
5335
  }
5336
5337
  /**
5338
   * Find position of first occurrence of string in a string.
5339
   *
5340 1
   * @link http://php.net/manual/en/function.mb-strpos.php
5341
   *
5342 1
   * @param string  $haystack  <p>The string being checked.</p>
5343
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
5344
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5345
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5346
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5347
   *
5348
   * @return int|false <p>
5349
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5350
   *                   If needle is not found it returns false.
5351
   *                   </p>
5352
   */
5353
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5354 1
  {
5355
    $haystack = (string)$haystack;
5356 1
    $needle = (string)$needle;
5357
5358
    if (!isset($haystack[0], $needle[0])) {
5359
      return false;
5360
    }
5361
5362
    // init
5363
    $offset = (int)$offset;
5364
5365
    // iconv and mbstring do not support integer $needle
5366
5367
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5368 1
      $needle = (string)self::chr($needle);
5369
    }
5370 1
5371
    if ($cleanUtf8 === true) {
5372
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5373
      // if invalid characters are found in $haystack before $needle
5374
      $needle = self::clean($needle);
5375
      $haystack = self::clean($haystack);
5376
    }
5377
5378 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5379
        $encoding === 'UTF-8'
5380
        ||
5381
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5382
    ) {
5383
      $encoding = 'UTF-8';
5384
    } else {
5385 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5386
    }
5387 13
5388
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5389
      self::checkForSupport();
5390 13
    }
5391
5392 13
    if (
5393 3
        $encoding === 'CP850'
5394
        &&
5395
        self::$SUPPORT['mbstring_func_overload'] === false
5396 11
    ) {
5397
      return strpos($haystack, $needle, $offset);
5398
    }
5399 11
5400 7 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5401
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5402
        &
5403 5
        self::$SUPPORT['iconv'] === true
5404 1
        &&
5405
        self::$SUPPORT['mbstring'] === false
5406
    ) {
5407
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5408 1
    }
5409 1
5410
    if (
5411
        $offset >= 0 // iconv_strpos() can't handle negative offset
5412 1
        &&
5413 1
        $encoding !== 'UTF-8'
5414
        &&
5415
        self::$SUPPORT['mbstring'] === false
5416 1
        &&
5417
        self::$SUPPORT['iconv'] === true
5418
    ) {
5419 1
      // ignore invalid negative offset to keep compatibility
5420
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5421 5
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5422 5
    }
5423 5
5424
    if (self::$SUPPORT['mbstring'] === true) {
5425 5
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5426
    }
5427 5
5428 5
    if (
5429
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5430
        &&
5431 5
        self::$SUPPORT['intl'] === true
5432
        &&
5433
        Bootup::is_php('5.4') === true
5434 5
    ) {
5435 5
      return \grapheme_strpos($haystack, $needle, $offset);
5436 5
    }
5437
5438 5
    if (
5439 2
        $offset >= 0 // iconv_strpos() can't handle negative offset
5440
        &&
5441 2
        self::$SUPPORT['iconv'] === true
5442 2
    ) {
5443 2
      // ignore invalid negative offset to keep compatibility
5444
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
5445 2
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5446 1
    }
5447
5448 1
    // fallback via vanilla php
5449 1
5450 1
    $haystack = self::substr($haystack, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, $offset) on line 5450 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5451
5452 1
    if ($offset < 0) {
5453
      $offset = 0;
5454
    }
5455
5456
    $pos = strpos($haystack, $needle);
5457
    if ($pos === false) {
5458
      return false;
5459
    }
5460
5461
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5462
    if ($returnTmp !== false) {
5463
      return $returnTmp;
5464
    }
5465
5466
    // fallback to "mb_"-function via polyfill
5467 1
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5468 2
  }
5469
5470 5
  /**
5471
   * Finds the last occurrence of a character in a string within another.
5472
   *
5473
   * @link http://php.net/manual/en/function.mb-strrchr.php
5474
   *
5475 5
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5476
   * @param string $needle        <p>The string to find in haystack</p>
5477
   * @param bool   $before_needle [optional] <p>
5478
   *                              Determines which portion of haystack
5479
   *                              this function returns.
5480 5
   *                              If set to true, it returns all of haystack
5481 5
   *                              from the beginning to the last occurrence of needle.
5482 1
   *                              If set to false, it returns all of haystack
5483 1
   *                              from the last occurrence of needle to the end,
5484
   *                              </p>
5485 1
   * @param string $encoding      [optional] <p>
5486 1
   *                              Character encoding name to use.
5487 1
   *                              If it is omitted, internal character encoding is used.
5488
   *                              </p>
5489 1
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5490
   *
5491 5
   * @return string|false The portion of haystack or false if needle is not found.
5492 5
   */
5493 5 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5494 5
  {
5495 1
    if ($encoding !== 'UTF-8') {
5496
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5497 5
    }
5498
5499 5
    if ($cleanUtf8 === true) {
5500
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5501
      // if invalid characters are found in $haystack before $needle
5502
      $needle = self::clean($needle);
5503
      $haystack = self::clean($haystack);
5504
    }
5505
5506
    // fallback to "mb_"-function via polyfill
5507
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5508
  }
5509 2
5510
  /**
5511 2
   * Reverses characters order in the string.
5512
   *
5513 1
   * @param string $str The input string
5514
   *
5515
   * @return string The string with characters in the reverse sequence
5516 1
   */
5517 1
  public static function strrev($str)
5518
  {
5519 1
    $str = (string)$str;
5520
5521
    if (!isset($str[0])) {
5522 2
      return '';
5523
    }
5524 2
5525 1
    return implode('', array_reverse(self::split($str)));
5526
  }
5527
5528 2
  /**
5529
   * Finds the last occurrence of a character in a string within another, case insensitive.
5530
   *
5531
   * @link http://php.net/manual/en/function.mb-strrichr.php
5532
   *
5533
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5534
   * @param string  $needle        <p>The string to find in haystack.</p>
5535
   * @param bool    $before_needle [optional] <p>
5536
   *                               Determines which portion of haystack
5537
   *                               this function returns.
5538
   *                               If set to true, it returns all of haystack
5539
   *                               from the beginning to the last occurrence of needle.
5540 1
   *                               If set to false, it returns all of haystack
5541
   *                               from the last occurrence of needle to the end,
5542 1
   *                               </p>
5543
   * @param string  $encoding      [optional] <p>
5544
   *                               Character encoding name to use.
5545
   *                               If it is omitted, internal character encoding is used.
5546
   *                               </p>
5547
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5548
   *
5549
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5550
   */
5551 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5552
  {
5553
    if ($encoding !== 'UTF-8') {
5554
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5555
    }
5556
5557
    if ($cleanUtf8 === true) {
5558
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5559
      // if invalid characters are found in $haystack before $needle
5560
      $needle = self::clean($needle);
5561
      $haystack = self::clean($haystack);
5562
    }
5563
5564
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5565
  }
5566
5567
  /**
5568 20
   * Find position of last occurrence of a case-insensitive string.
5569
   *
5570 20
   * @param string  $haystack  <p>The string to look in.</p>
5571 2
   * @param string  $needle    <p>The string to look for.</p>
5572
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5573
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5574 2
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5575 2
   *
5576
   * @return int|false <p>
5577 2
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5578
   *                   not found, it returns false.
5579
   *                   </p>
5580 20
   */
5581
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5582 20
  {
5583 4
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5584
      $needle = (string)self::chr($needle);
5585
    }
5586 19
5587 19
    // init
5588
    $haystack = (string)$haystack;
5589
    $needle = (string)$needle;
5590 19
    $offset = (int)$offset;
5591 19
5592
    if (!isset($haystack[0], $needle[0])) {
5593 19
      return false;
5594 19
    }
5595 19
5596 19 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5597
        $cleanUtf8 === true
5598 19
        ||
5599
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5600 16
    ) {
5601 16
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5602 16
5603 16
      $needle = self::clean($needle);
5604 5
      $haystack = self::clean($haystack);
5605 5
    }
5606 5
5607 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5608
        $encoding === 'UTF-8'
5609 19
        ||
5610
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5611 17
    ) {
5612 13
      $encoding = 'UTF-8';
5613 13
    } else {
5614 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5615 8
    }
5616 8
5617 8
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5618
      self::checkForSupport();
5619
    }
5620 19
5621
    if (
5622 9
        $encoding !== 'UTF-8'
5623 4
        &&
5624 4
        self::$SUPPORT['mbstring'] === false
5625 4
    ) {
5626 6
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5627 6
    }
5628 6
5629
    if (self::$SUPPORT['mbstring'] === true) {
5630
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5631 9
    }
5632 6
5633 6
    if (
5634 6
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5635
        &&
5636
        self::$SUPPORT['intl'] === true
5637 19
        &&
5638
        Bootup::is_php('5.4') === true
5639 4
    ) {
5640 4
      return \grapheme_strripos($haystack, $needle, $offset);
5641 2
    }
5642 2
5643 3
    // fallback via vanilla php
5644 3
5645 3
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5604 can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $needle defined by self::clean($needle) on line 5603 can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5646
  }
5647
5648 4
  /**
5649 16
   * Find position of last occurrence of a string in a string.
5650
   *
5651 19
   * @link http://php.net/manual/en/function.mb-strrpos.php
5652
   *
5653
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5654 19
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5655 19
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5656
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5657 3
   *                              the end of the string.
5658 19
   *                              </p>
5659
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5660 19
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5661
   *
5662
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5663 19
   *                   is not found, it returns false.</p>
5664 19
   */
5665 19
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5666 2
  {
5667 19
    if ((int)$needle === $needle && $needle >= 0) {
5668
      $needle = (string)self::chr($needle);
5669 19
    }
5670
5671 19
    // init
5672
    $haystack = (string)$haystack;
5673
    $needle = (string)$needle;
5674
    $offset = (int)$offset;
5675
5676
    if (!isset($haystack[0], $needle[0])) {
5677
      return false;
5678
    }
5679
5680 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5681
        $cleanUtf8 === true
5682
        ||
5683
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5684
    ) {
5685
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5686
      $needle = self::clean($needle);
5687 26
      $haystack = self::clean($haystack);
5688
    }
5689 26
5690 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5691 26
        $encoding === 'UTF-8'
5692 5
        ||
5693
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5694
    ) {
5695
      $encoding = 'UTF-8';
5696 22
    } else {
5697 6
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5698
    }
5699
5700 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5701
      self::checkForSupport();
5702
    }
5703
5704
    if (
5705
        $encoding !== 'UTF-8'
5706
        &&
5707
        self::$SUPPORT['mbstring'] === false
5708
    ) {
5709
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5710
    }
5711
5712 14
    if (self::$SUPPORT['mbstring'] === true) {
5713
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5714 14
    }
5715
5716
    if (
5717
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5718
        &&
5719
        self::$SUPPORT['intl'] === true
5720
        &&
5721
        Bootup::is_php('5.4') === true
5722
    ) {
5723
      return \grapheme_strrpos($haystack, $needle, $offset);
5724
    }
5725
5726
    // fallback via vanilla php
5727
5728 1
    if ($offset > 0) {
5729
      $haystack = self::substr($haystack, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, $offset) on line 5729 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5730 1
    } elseif ($offset < 0) {
5731
      $haystack = self::substr($haystack, 0, $offset);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::substr($haystack, 0, $offset) on line 5731 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5732
      $offset = 0;
5733
    }
5734
5735
    $pos = strrpos($haystack, $needle);
5736
    if ($pos === false) {
5737
      return false;
5738
    }
5739
5740
    return $offset + self::strlen(substr($haystack, 0, $pos));
5741
  }
5742
5743
  /**
5744 8
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5745
   * mask.
5746 8
   *
5747 2
   * @param string $str    <p>The input string.</p>
5748
   * @param string $mask   <p>The mask of chars</p>
5749
   * @param int    $offset [optional]
5750 7
   * @param int    $length [optional]
5751 7
   *
5752 7
   * @return int
5753
   */
5754 7
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
5755 1
  {
5756 1
    // init
5757 7
    $length = (int)$length;
5758
    $offset = (int)$offset;
5759
5760 7
    if ($offset || 2147483647 !== $length) {
5761
      $str = self::substr($str, $offset, $length);
5762 7
    }
5763 7
5764
    $str = (string)$str;
5765
    if (!isset($str[0], $mask[0])) {
5766
      return 0;
5767 7
    }
5768
5769
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5770
  }
5771 1
5772 1
  /**
5773 1
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5774 7
   *
5775 7
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5776 7
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5777
   * @param bool    $before_needle [optional] <p>
5778 7
   *                               If <b>TRUE</b>, strstr() returns the part of the
5779 7
   *                               haystack before the first occurrence of the needle (excluding the needle).
5780
   *                               </p>
5781 7
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5782
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
5783
   *
5784
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5785
   */
5786 View Code Duplication
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5787
  {
5788
    $haystack = (string)$haystack;
5789
    $needle = (string)$needle;
5790
5791
    if (!isset($haystack[0], $needle[0])) {
5792
      return false;
5793
    }
5794
5795
    if ($cleanUtf8 === true) {
5796
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5797
      // if invalid characters are found in $haystack before $needle
5798
      $needle = self::clean($needle);
5799
      $haystack = self::clean($haystack);
5800
    }
5801 1
5802
    if ($encoding !== 'UTF-8') {
5803 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5804
    }
5805 1
5806 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5807
      self::checkForSupport();
5808
    }
5809 1
5810
    if (
5811 1
        $encoding !== 'UTF-8'
5812
        &&
5813 1
        self::$SUPPORT['mbstring'] === false
5814 1
    ) {
5815 1
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5816 1
    }
5817
5818 1
    if (self::$SUPPORT['mbstring'] === true) {
5819 1
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5820 1
    }
5821
5822 1
    if (
5823
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5824
        &&
5825
        self::$SUPPORT['intl'] === true
5826
        &&
5827
        Bootup::is_php('5.4') === true
5828
    ) {
5829
      return \grapheme_strstr($haystack, $needle, $before_needle);
5830 1
    }
5831
5832
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5833
5834
    if (!isset($match[1])) {
5835
      return false;
5836
    }
5837
5838
    if ($before_needle) {
5839
      return $match[1];
5840
    }
5841
5842
    return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 5799 can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5843
  }
5844
5845
  /**
5846
   * Unicode transformation for case-less matching.
5847
   *
5848
   * @link http://unicode.org/reports/tr21/tr21-5.html
5849
   *
5850
   * @param string  $str       <p>The input string.</p>
5851
   * @param bool    $full      [optional] <p>
5852
   *                           <b>true</b>, replace full case folding chars (default)<br />
5853
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5854
   *                           </p>
5855
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5856
   *
5857
   * @return string
5858
   */
5859
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5860
  {
5861
    // init
5862
    $str = (string)$str;
5863
5864
    if (!isset($str[0])) {
5865
      return '';
5866
    }
5867
5868
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5869
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5870
5871
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5872
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
5873
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
5874
    }
5875
5876
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5877
5878
    if ($full) {
5879
5880
      static $FULL_CASE_FOLD = null;
5881
5882
      if ($FULL_CASE_FOLD === null) {
5883
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
5884
      }
5885
5886
      /** @noinspection OffsetOperationsInspection */
5887
      $str = str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
5888
    }
5889
5890
    if ($cleanUtf8 === true) {
5891
      $str = self::clean($str);
5892
    }
5893
5894
    return self::strtolower($str);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 5891 can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5895
  }
5896
5897
  /**
5898
   * Make a string lowercase.
5899
   *
5900
   * @link http://php.net/manual/en/function.mb-strtolower.php
5901
   *
5902
   * @param string  $str       <p>The string being lowercased.</p>
5903
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5904
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5905
   *
5906
   * @return string str with all alphabetic characters converted to lowercase.
5907
   */
5908 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5909
  {
5910
    // init
5911
    $str = (string)$str;
5912
5913
    if (!isset($str[0])) {
5914
      return '';
5915
    }
5916
5917
    if ($cleanUtf8 === true) {
5918
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5919
      // if invalid characters are found in $haystack before $needle
5920
      $str = self::clean($str);
5921
    }
5922
5923
    if ($encoding !== 'UTF-8') {
5924
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5925
    }
5926
5927
    return \mb_strtolower($str, $encoding);
5928
  }
5929
5930
  /**
5931
   * Generic case sensitive transformation for collation matching.
5932
   *
5933
   * @param string $str <p>The input string</p>
5934
   *
5935
   * @return string
5936
   */
5937
  private static function strtonatfold($str)
5938
  {
5939
    /** @noinspection PhpUndefinedClassInspection */
5940
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5941
  }
5942
5943
  /**
5944
   * Make a string uppercase.
5945
   *
5946
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5947
   *
5948
   * @param string  $str       <p>The string being uppercased.</p>
5949
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5950
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5951
   *
5952
   * @return string str with all alphabetic characters converted to uppercase.
5953
   */
5954 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5955
  {
5956
    $str = (string)$str;
5957
5958
    if (!isset($str[0])) {
5959
      return '';
5960
    }
5961
5962
    if ($cleanUtf8 === true) {
5963
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5964
      // if invalid characters are found in $haystack before $needle
5965
      $str = self::clean($str);
5966
    }
5967
5968
    if ($encoding !== 'UTF-8') {
5969
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5970
    }
5971
5972
    return \mb_strtoupper($str, $encoding);
5973
  }
5974
5975
  /**
5976
   * Translate characters or replace sub-strings.
5977
   *
5978
   * @link  http://php.net/manual/en/function.strtr.php
5979
   *
5980
   * @param string          $str  <p>The string being translated.</p>
5981
   * @param string|string[] $from <p>The string replacing from.</p>
5982
   * @param string|string[] $to   <p>The string being translated to to.</p>
5983
   *
5984
   * @return string <p>
5985
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5986
   *                corresponding character in to.
5987
   *                </p>
5988
   */
5989
  public static function strtr($str, $from, $to = INF)
5990
  {
5991
    if (INF !== $to) {
5992
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5992 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5993
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5993 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5994
      $countFrom = count($from);
5995
      $countTo = count($to);
5996
5997
      if ($countFrom > $countTo) {
5998
        $from = array_slice($from, 0, $countTo);
5999
      } elseif ($countFrom < $countTo) {
6000
        $to = array_slice($to, 0, $countFrom);
6001
      }
6002
6003
      $from = array_combine($from, $to);
6004
    }
6005
6006
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5989 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
6007
  }
6008
6009
  /**
6010
   * Return the width of a string.
6011
   *
6012
   * @param string  $str       <p>The input string.</p>
6013
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6014
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6015
   *
6016
   * @return int
6017
   */
6018
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6019
  {
6020
    if ($encoding !== 'UTF-8') {
6021
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6022
    }
6023
6024
    if ($cleanUtf8 === true) {
6025
      // iconv and mbstring are not tolerant to invalid encoding
6026
      // further, their behaviour is inconsistent with that of PHP's substr
6027
      $str = self::clean($str);
6028
    }
6029
6030
    // fallback to "mb_"-function via polyfill
6031
    return \mb_strwidth($str, $encoding);
6032
  }
6033
6034
  /**
6035
   * Get part of a string.
6036
   *
6037
   * @link http://php.net/manual/en/function.mb-substr.php
6038
   *
6039
   * @param string  $str       <p>The string being checked.</p>
6040
   * @param int     $start     <p>The first position used in str.</p>
6041
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6042
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6043
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6044
   *
6045
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
6046
   */
6047
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6048
  {
6049
    // init
6050
    $str = (string)$str;
6051
6052
    if (!isset($str[0])) {
6053
      return '';
6054
    }
6055
6056
    if ($cleanUtf8 === true) {
6057 1
      // iconv and mbstring are not tolerant to invalid encoding
6058
      // further, their behaviour is inconsistent with that of PHP's substr
6059 1
      $str = self::clean($str);
6060
    }
6061
6062
    $str_length = 0;
6063
    if ($start || $length === null) {
6064
      $str_length = (int)self::strlen($str, $encoding);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 6059 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6065
    }
6066
6067
    if ($start && $start > $str_length) {
6068
      return false;
6069 6
    }
6070
6071 6
    if ($length === null) {
6072 6
      $length = $str_length;
6073
    } else {
6074 6
      $length = (int)$length;
6075
    }
6076 6
6077 3 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6078
        $encoding === 'UTF-8'
6079
        ||
6080
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6081 6
    ) {
6082
      $encoding = 'UTF-8';
6083 6
    } else {
6084 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6085 1
    }
6086 1
6087
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6088 6
      self::checkForSupport();
6089
    }
6090
6091
    if (
6092
        $encoding === 'CP850'
6093
        &&
6094
        self::$SUPPORT['mbstring_func_overload'] === false
6095
    ) {
6096
      return substr($str, $start, $length === null ? $str_length : $length);
6097
    }
6098 6
6099
    if (
6100 6
        $encoding !== 'UTF-8'
6101
        &&
6102 6
        self::$SUPPORT['mbstring'] === false
6103 6
    ) {
6104
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6105
    }
6106 5
6107 5
    if (self::$SUPPORT['mbstring'] === true) {
6108
      return \mb_substr($str, $start, $length, $encoding);
6109 5
    }
6110 1
6111 1
    if (
6112 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6113
        &&
6114 5
        self::$SUPPORT['intl'] === true
6115
        &&
6116
        Bootup::is_php('5.4') === true
6117
    ) {
6118
      return \grapheme_substr($str, $start, $length);
6119
    }
6120
6121
    if (
6122
        $length >= 0 // "iconv_substr()" can't handle negative length
6123
        &&
6124
        self::$SUPPORT['iconv'] === true
6125
    ) {
6126
      return \iconv_substr($str, $start, $length);
6127
    }
6128
6129
    // fallback via vanilla php
6130
6131
    // split to array, and remove invalid characters
6132
    $array = self::split($str);
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str) on line 6059 can also be of type false; however, voku\helper\UTF8::split() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6133
6134
    // extract relevant part, and join to make sting again
6135
    return implode('', array_slice($array, $start, $length));
6136
  }
6137
6138
  /**
6139
   * Binary safe comparison of two strings from an offset, up to length characters.
6140
   *
6141
   * @param string  $main_str           <p>The main string being compared.</p>
6142
   * @param string  $str                <p>The secondary string being compared.</p>
6143
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
6144 1
   *                                    the end of the string.</p>
6145
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6146 1
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6147
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6148
   *                                    insensitive.</p>
6149
   *
6150
   * @return int
6151
   */
6152
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
6153
  {
6154
    $main_str = self::substr($main_str, $offset, $length);
6155
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6154 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6156
6157
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6154 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6155 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 6154 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 6155 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6158 1
  }
6159
6160 1
  /**
6161
   * Count the number of substring occurrences.
6162 1
   *
6163 1
   * @link  http://php.net/manual/en/function.substr-count.php
6164
   *
6165
   * @param string  $haystack  <p>The string to search in.</p>
6166 1
   * @param string  $needle    <p>The substring to search for.</p>
6167
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6168 1
   * @param int     $length    [optional] <p>
6169 1
   *                           The maximum length after the specified offset to search for the
6170
   *                           substring. It outputs a warning if the offset plus the length is
6171
   *                           greater than the haystack length.
6172 1
   *                           </p>
6173
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
6174
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6175 1
   *
6176 1
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6177 1
   */
6178 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6179 1
  {
6180
    // init
6181
    $haystack = (string)$haystack;
6182 1
    $needle = (string)$needle;
6183
6184
    if (!isset($haystack[0], $needle[0])) {
6185
      return false;
6186
    }
6187
6188
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
6189
      $offset = (int)$offset;
6190
      $length = (int)$length;
6191
6192
      if (
6193
          $length + $offset <= 0
6194
          &&
6195
          Bootup::is_php('7.1') === false
6196
      ) {
6197
        return false;
6198
      }
6199
6200
      $haystack = self::substr($haystack, $offset, $length, $encoding);
6201 10
    }
6202
6203 10
    if ($encoding !== 'UTF-8') {
6204 10
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6205
    }
6206 10
6207 3
    if ($cleanUtf8 === true) {
6208
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6209
      // if invalid characters are found in $haystack before $needle
6210 8
      $needle = self::clean($needle);
6211 8
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack defined by self::clean($haystack) on line 6211 can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6212 8
    }
6213
6214 8
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6215
      self::checkForSupport();
6216 8
    }
6217
6218 8
    if (
6219 1
        $encoding !== 'UTF-8'
6220 1
        &&
6221 1
        self::$SUPPORT['mbstring'] === false
6222
    ) {
6223 8
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6224 8
    }
6225
6226 8
    if (self::$SUPPORT['mbstring'] === true) {
6227 8
      return \mb_substr_count($haystack, $needle, $encoding);
6228 8
    }
6229 8
6230 8
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6231
6232 8
    return count($matches);
6233 8
  }
6234 8
6235 8
  /**
6236
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6237 8
   *
6238 6
   * @param string $haystack <p>The string to search in.</p>
6239 6
   * @param string $needle   <p>The substring to search for.</p>
6240 6
   *
6241 6
   * @return string <p>Return the sub-string.</p>
6242
   */
6243 6 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6244 3
  {
6245 3
    // init
6246
    $haystack = (string)$haystack;
6247 6
    $needle = (string)$needle;
6248 6
6249
    if (!isset($haystack[0])) {
6250 8
      return '';
6251
    }
6252
6253
    if (!isset($needle[0])) {
6254
      return $haystack;
6255
    }
6256
6257
    if (self::str_istarts_with($haystack, $needle) === true) {
6258 1
      $haystack = self::substr($haystack, self::strlen($needle));
6259
    }
6260 1
6261
    return $haystack;
6262
  }
6263
6264
  /**
6265
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6266
   *
6267
   * @param string $haystack <p>The string to search in.</p>
6268
   * @param string $needle   <p>The substring to search for.</p>
6269
   *
6270
   * @return string <p>Return the sub-string.</p>
6271
   */
6272 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6273
  {
6274
    // init
6275
    $haystack = (string)$haystack;
6276
    $needle = (string)$needle;
6277
6278
    if (!isset($haystack[0])) {
6279
      return '';
6280
    }
6281
6282
    if (!isset($needle[0])) {
6283
      return $haystack;
6284
    }
6285
6286
    if (self::str_iends_with($haystack, $needle) === true) {
6287
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6288
    }
6289
6290
    return $haystack;
6291
  }
6292
6293
  /**
6294
   * Removes an prefix ($needle) from start of the string ($haystack).
6295
   *
6296
   * @param string $haystack <p>The string to search in.</p>
6297
   * @param string $needle   <p>The substring to search for.</p>
6298
   *
6299
   * @return string <p>Return the sub-string.</p>
6300
   */
6301 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6302
  {
6303
    // init
6304
    $haystack = (string)$haystack;
6305
    $needle = (string)$needle;
6306
6307
    if (!isset($haystack[0])) {
6308
      return '';
6309
    }
6310
6311
    if (!isset($needle[0])) {
6312
      return $haystack;
6313
    }
6314
6315
    if (self::str_starts_with($haystack, $needle) === true) {
6316
      $haystack = self::substr($haystack, self::strlen($needle));
6317
    }
6318
6319
    return $haystack;
6320
  }
6321
6322
  /**
6323
   * Replace text within a portion of a string.
6324
   *
6325
   * source: https://gist.github.com/stemar/8287074
6326
   *
6327
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6328
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6329
   * @param int|int[]       $start            <p>
6330
   *                                          If start is positive, the replacing will begin at the start'th offset
6331
   *                                          into string.
6332
   *                                          <br /><br />
6333
   *                                          If start is negative, the replacing will begin at the start'th character
6334
   *                                          from the end of string.
6335
   *                                          </p>
6336
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6337
   *                                          portion of string which is to be replaced. If it is negative, it
6338
   *                                          represents the number of characters from the end of string at which to
6339
   *                                          stop replacing. If it is not given, then it will default to strlen(
6340
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6341
   *                                          length is zero then this function will have the effect of inserting
6342
   *                                          replacement into string at the given start offset.</p>
6343
   *
6344
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6345
   */
6346
  public static function substr_replace($str, $replacement, $start, $length = null)
6347
  {
6348
    if (is_array($str) === true) {
6349
      $num = count($str);
6350
6351
      // $replacement
6352
      if (is_array($replacement) === true) {
6353
        $replacement = array_slice($replacement, 0, $num);
6354
      } else {
6355
        $replacement = array_pad(array($replacement), $num, $replacement);
6356
      }
6357
6358
      // $start
6359 View Code Duplication
      if (is_array($start) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6360
        $start = array_slice($start, 0, $num);
6361
        foreach ($start as &$valueTmp) {
6362
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6363
        }
6364
        unset($valueTmp);
6365
      } else {
6366
        $start = array_pad(array($start), $num, $start);
6367
      }
6368
6369
      // $length
6370
      if (!isset($length)) {
6371
        $length = array_fill(0, $num, 0);
6372 View Code Duplication
      } elseif (is_array($length) === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6373
        $length = array_slice($length, 0, $num);
6374
        foreach ($length as &$valueTmpV2) {
6375
          if (isset($valueTmpV2)) {
6376
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6377
          } else {
6378
            $valueTmpV2 = 0;
6379
          }
6380
        }
6381
        unset($valueTmpV2);
6382
      } else {
6383
        $length = array_pad(array($length), $num, $length);
6384
      }
6385
6386
      // Recursive call
6387
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $start, $length);
6388
6389
    } else {
6390
6391
      if (is_array($replacement) === true) {
6392
        if (count($replacement) > 0) {
6393
          $replacement = $replacement[0];
6394
        } else {
6395
          $replacement = '';
6396
        }
6397
      }
6398
    }
6399
6400
    // init
6401
    $str = (string)$str;
6402
    $replacement = (string)$replacement;
6403
6404
    if (!isset($str[0])) {
6405
      return $replacement;
6406
    }
6407
6408
    preg_match_all('/./us', $str, $smatches);
6409
    preg_match_all('/./us', $replacement, $rmatches);
6410
6411
    if ($length === null) {
6412
      $length = (int)self::strlen($str);
6413
    }
6414
6415
    array_splice($smatches[0], $start, $length, $rmatches[0]);
6416
6417
    return implode('', $smatches[0]);
6418
  }
6419
6420
  /**
6421
   * Removes an suffix ($needle) from end of the string ($haystack).
6422
   *
6423
   * @param string $haystack <p>The string to search in.</p>
6424
   * @param string $needle   <p>The substring to search for.</p>
6425
   *
6426
   * @return string <p>Return the sub-string.</p>
6427
   */
6428 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6429
  {
6430
    $haystack = (string)$haystack;
6431
    $needle = (string)$needle;
6432
6433
    if (!isset($haystack[0])) {
6434
      return '';
6435
    }
6436
6437
    if (!isset($needle[0])) {
6438
      return $haystack;
6439
    }
6440
6441
    if (self::str_ends_with($haystack, $needle) === true) {
6442
      $haystack = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6443
    }
6444
6445
    return $haystack;
6446
  }
6447
6448
  /**
6449
   * Returns a case swapped version of the string.
6450
   *
6451
   * @param string  $str       <p>The input string.</p>
6452
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6453
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6454
   *
6455
   * @return string <p>Each character's case swapped.</p>
6456
   */
6457
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6458
  {
6459
    $str = (string)$str;
6460
6461
    if (!isset($str[0])) {
6462
      return '';
6463
    }
6464
6465
    if ($encoding !== 'UTF-8') {
6466
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6467
    }
6468
6469
    if ($cleanUtf8 === true) {
6470
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6471
      // if invalid characters are found in $haystack before $needle
6472
      $str = self::clean($str);
6473
    }
6474
6475
    $strSwappedCase = preg_replace_callback(
6476
        '/[\S]/u',
6477
        function ($match) use ($encoding) {
6478
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6479
6480
          if ($match[0] === $marchToUpper) {
6481
            return UTF8::strtolower($match[0], $encoding);
6482
          } else {
6483
            return $marchToUpper;
6484
          }
6485
        },
6486
        $str
6487
    );
6488
6489
    return $strSwappedCase;
6490
  }
6491
6492
  /**
6493
   * alias for "UTF8::to_ascii()"
6494
   *
6495
   * @see UTF8::to_ascii()
6496
   *
6497
   * @param string $s
6498
   * @param string $subst_chr
6499
   * @param bool   $strict
6500
   *
6501
   * @return string
6502
   *
6503
   * @deprecated
6504
   */
6505
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6506
  {
6507
    return self::to_ascii($s, $subst_chr, $strict);
6508
  }
6509
6510
  /**
6511
   * alias for "UTF8::to_iso8859()"
6512
   *
6513
   * @see UTF8::to_iso8859()
6514
   *
6515
   * @param string $str
6516
   *
6517
   * @return string|string[]
6518
   *
6519
   * @deprecated
6520
   */
6521
  public static function toIso8859($str)
6522
  {
6523
    return self::to_iso8859($str);
6524
  }
6525
6526
  /**
6527
   * alias for "UTF8::to_latin1()"
6528
   *
6529
   * @see UTF8::to_latin1()
6530
   *
6531
   * @param $str
6532
   *
6533
   * @return string
6534
   *
6535
   * @deprecated
6536
   */
6537
  public static function toLatin1($str)
6538
  {
6539
    return self::to_latin1($str);
6540
  }
6541
6542
  /**
6543
   * alias for "UTF8::to_utf8()"
6544
   *
6545
   * @see UTF8::to_utf8()
6546
   *
6547
   * @param string $str
6548
   *
6549
   * @return string
6550
   *
6551
   * @deprecated
6552
   */
6553
  public static function toUTF8($str)
6554
  {
6555
    return self::to_utf8($str);
6556
  }
6557
6558
  /**
6559
   * Convert a string into ASCII.
6560
   *
6561
   * @param string $str     <p>The input string.</p>
6562
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6563
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6564
   *                        performance</p>
6565
   *
6566
   * @return string
6567
   */
6568
  public static function to_ascii($str, $unknown = '?', $strict = false)
6569
  {
6570
    static $UTF8_TO_ASCII;
6571
6572
    // init
6573
    $str = (string)$str;
6574
6575
    if (!isset($str[0])) {
6576
      return '';
6577
    }
6578
6579
    $str = self::clean($str, true, true, true);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The expression self::clean($str, true, true, true); of type string|false adds false to the return on line 6583 which is incompatible with the return type documented by voku\helper\UTF8::to_ascii of type string. It seems like you forgot to handle an error condition.
Loading history...
6580
6581
    // check if we only have ASCII
6582
    if (self::is_ascii($str) === true) {
0 ignored issues
show
Security Bug introduced by
It seems like $str defined by self::clean($str, true, true, true) on line 6579 can also be of type false; however, voku\helper\UTF8::is_ascii() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
6583
      return $str;
6584
    }
6585
6586
    if ($strict === true) {
6587
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6588
        self::checkForSupport();
6589
      }
6590
6591
      if (
6592
          self::$SUPPORT['intl'] === true
6593
          &&
6594
          Bootup::is_php('5.4') === true
6595
      ) {
6596
6597
        // HACK for issue from "transliterator_transliterate()"
6598
        $str = str_replace(
6599
            'ℌ',
6600
            'H',
6601
            $str
6602
        );
6603
6604
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6605
6606
        // check again, if we only have ASCII, now ...
6607
        if (self::is_ascii($str) === true) {
6608
          return $str;
6609
        }
6610
6611
      }
6612
    }
6613
6614
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6615
    $chars = $ar[0];
6616
    foreach ($chars as &$c) {
6617
6618
      $ordC0 = ord($c[0]);
6619
6620
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6621
        continue;
6622
      }
6623
6624
      $ordC1 = ord($c[1]);
6625
6626
      // ASCII - next please
6627
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6628
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6629
      }
6630
6631
      if ($ordC0 >= 224) {
6632
        $ordC2 = ord($c[2]);
6633
6634
        if ($ordC0 <= 239) {
6635
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6636
        }
6637
6638
        if ($ordC0 >= 240) {
6639
          $ordC3 = ord($c[3]);
6640
6641
          if ($ordC0 <= 247) {
6642
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6643
          }
6644
6645
          if ($ordC0 >= 248) {
6646
            $ordC4 = ord($c[4]);
6647
6648 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6649
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6650
            }
6651
6652
            if ($ordC0 >= 252) {
6653
              $ordC5 = ord($c[5]);
6654
6655 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6656
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6657
              }
6658
            }
6659
          }
6660
        }
6661
      }
6662
6663
      if ($ordC0 == 254 || $ordC0 == 255) {
6664
        $c = $unknown;
6665
        continue;
6666
      }
6667
6668
      if (!isset($ord)) {
6669
        $c = $unknown;
6670
        continue;
6671
      }
6672
6673
      $bank = $ord >> 8;
6674
      if (!isset($UTF8_TO_ASCII[$bank])) {
6675
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6676
        if ($UTF8_TO_ASCII[$bank] === false) {
6677
          $UTF8_TO_ASCII[$bank] = array();
6678
        }
6679
      }
6680
6681
      $newchar = $ord & 255;
6682
6683
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6684
6685
        // keep for debugging
6686
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6687
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6688
        echo "char: " . $c . "\n";
6689
        echo "ord: " . $ord . "\n";
6690
        echo "newchar: " . $newchar . "\n";
6691
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
6692
        echo "bank:" . $bank . "\n\n";
6693
        */
6694
6695
        $c = $UTF8_TO_ASCII[$bank][$newchar];
6696
      } else {
6697
6698
        // keep for debugging missing chars
6699
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
6700
        echo "file: " . sprintf('x%02x', $bank) . "\n";
6701
        echo "char: " . $c . "\n";
6702
        echo "ord: " . $ord . "\n";
6703
        echo "newchar: " . $newchar . "\n";
6704
        echo "bank:" . $bank . "\n\n";
6705
        */
6706
6707
        $c = $unknown;
6708
      }
6709
    }
6710
6711
    return implode('', $chars);
6712
  }
6713
6714
  /**
6715
   * Convert a string into "ISO-8859"-encoding (Latin-1).
6716
   *
6717
   * @param string|string[] $str
6718
   *
6719
   * @return string|string[]
6720
   */
6721
  public static function to_iso8859($str)
6722
  {
6723
    if (is_array($str) === true) {
6724
6725
      /** @noinspection ForeachSourceInspection */
6726
      foreach ($str as $k => $v) {
6727
        /** @noinspection AlterInForeachInspection */
6728
        /** @noinspection OffsetOperationsInspection */
6729
        $str[$k] = self::to_iso8859($v);
6730
      }
6731
6732
      return $str;
6733
    }
6734
6735
    $str = (string)$str;
6736
6737
    if (!isset($str[0])) {
6738
      return '';
6739
    }
6740
6741
    return self::utf8_decode($str);
6742
  }
6743
6744
  /**
6745
   * alias for "UTF8::to_iso8859()"
6746
   *
6747
   * @see UTF8::to_iso8859()
6748
   *
6749
   * @param string|string[] $str
6750
   *
6751
   * @return string|string[]
6752
   */
6753
  public static function to_latin1($str)
6754
  {
6755
    return self::to_iso8859($str);
6756
  }
6757
6758
  /**
6759
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
6760
   *
6761
   * <ul>
6762
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
6763
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
6764
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
6765
   * case.</li>
6766
   * </ul>
6767
   *
6768
   * @param string|string[] $str                    <p>Any string or array.</p>
6769
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
6770
   *
6771
   * @return string|string[] <p>The UTF-8 encoded string.</p>
6772
   */
6773
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
6774
  {
6775
    if (is_array($str) === true) {
6776
      /** @noinspection ForeachSourceInspection */
6777
      foreach ($str as $k => $v) {
6778
        /** @noinspection AlterInForeachInspection */
6779
        /** @noinspection OffsetOperationsInspection */
6780
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
6781
      }
6782
6783
      return $str;
6784
    }
6785
6786
    $str = (string)$str;
6787
6788
    if (!isset($str[0])) {
6789
      return $str;
6790
    }
6791
6792
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6793
      self::checkForSupport();
6794
    }
6795
6796 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6797
      $max = \mb_strlen($str, '8BIT');
6798
    } else {
6799
      $max = strlen($str);
6800
    }
6801
6802
    $buf = '';
6803
6804
    /** @noinspection ForeachInvariantsInspection */
6805
    for ($i = 0; $i < $max; $i++) {
6806
6807
      $c1 = $str[$i];
6808
6809
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
6810
6811
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
6812
6813
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6814
6815
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
6816
            $buf .= $c1 . $c2;
6817
            $i++;
6818 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6819
            $cc1tmp = ord($c1) / 64;
6820
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6821
            $cc2 = ($c1 & "\x3F") | "\x80";
6822
            $buf .= $cc1 . $cc2;
6823
          }
6824
6825
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
6826
6827
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6828
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6829
6830
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
6831
            $buf .= $c1 . $c2 . $c3;
6832
            $i += 2;
6833 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6834
            $cc1tmp = ord($c1) / 64;
6835
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6836
            $cc2 = ($c1 & "\x3F") | "\x80";
6837
            $buf .= $cc1 . $cc2;
6838
          }
6839
6840
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
6841
6842
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
6843
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
6844
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
6845
6846
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
6847
            $buf .= $c1 . $c2 . $c3 . $c4;
6848
            $i += 3;
6849 View Code Duplication
          } else { // not valid UTF8 - convert it
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6850
            $cc1tmp = ord($c1) / 64;
6851
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6852
            $cc2 = ($c1 & "\x3F") | "\x80";
6853
            $buf .= $cc1 . $cc2;
6854
          }
6855
6856 View Code Duplication
        } else { // doesn't look like UTF8, but should be converted
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6857
          $cc1tmp = ord($c1) / 64;
6858
          $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
6859
          $cc2 = ($c1 & "\x3F") | "\x80";
6860
          $buf .= $cc1 . $cc2;
6861
        }
6862
6863
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
6864
6865
        $ordC1 = ord($c1);
6866
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
6867
          $buf .= self::$WIN1252_TO_UTF8[$ordC1];
6868 View Code Duplication
        } else {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6869
          $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
6870
          $cc2 = ($c1 & "\x3F") | "\x80";
6871
          $buf .= $cc1 . $cc2;
6872
        }
6873
6874
      } else { // it doesn't need conversion
6875
        $buf .= $c1;
6876
      }
6877
    }
6878
6879
    // decode unicode escape sequences
6880
    $buf = preg_replace_callback(
6881
        '/\\\\u([0-9a-f]{4})/i',
6882
        function ($match) {
6883
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
6884
        },
6885
        $buf
6886
    );
6887
6888
    // decode UTF-8 codepoints
6889
    if ($decodeHtmlEntityToUtf8 === true) {
6890
      $buf = self::html_entity_decode($buf);
6891
    }
6892
6893
    return $buf;
6894
  }
6895
6896
  /**
6897
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
6898
   *
6899
   * INFO: This is slower then "trim()"
6900
   *
6901
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
6902
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
6903
   *
6904
   * @param string $str   <p>The string to be trimmed</p>
6905
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
6906
   *
6907
   * @return string <p>The trimmed string.</p>
6908
   */
6909
  public static function trim($str = '', $chars = INF)
6910
  {
6911
    $str = (string)$str;
6912
6913
    if (!isset($str[0])) {
6914
      return '';
6915
    }
6916
6917
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
6918
    if ($chars === INF || !$chars) {
6919
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
6920
    }
6921
6922
    return self::rtrim(self::ltrim($str, $chars), $chars);
6923
  }
6924
6925
  /**
6926
   * Makes string's first char uppercase.
6927
   *
6928
   * @param string  $str       <p>The input string.</p>
6929
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6930
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
6931
   *
6932
   * @return string <p>The resulting string</p>
6933
   */
6934
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6935
  {
6936
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
6937
  }
6938
6939
  /**
6940
   * alias for "UTF8::ucfirst()"
6941
   *
6942
   * @see UTF8::ucfirst()
6943
   *
6944
   * @param string  $word
6945
   * @param string  $encoding
6946
   * @param boolean $cleanUtf8
6947
   *
6948
   * @return string
6949
   */
6950
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
6951
  {
6952
    return self::ucfirst($word, $encoding, $cleanUtf8);
6953
  }
6954
6955
  /**
6956
   * Uppercase for all words in the string.
6957
   *
6958
   * @param string   $str        <p>The input string.</p>
6959
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
6960
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
6961
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
6962
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
6963
   *
6964
   * @return string
6965
   */
6966
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
6967
  {
6968
    if (!$str) {
6969
      return '';
6970
    }
6971
6972
    $words = self::str_to_words($str, $charlist);
6973
    $newWords = array();
6974
6975
    if (count($exceptions) > 0) {
6976
      $useExceptions = true;
6977
    } else {
6978
      $useExceptions = false;
6979
    }
6980
6981
    foreach ($words as $word) {
6982
6983
      if (!$word) {
6984
        continue;
6985
      }
6986
6987
      if (
6988
          ($useExceptions === false)
6989
          ||
6990
          (
6991
              $useExceptions === true
6992
              &&
6993
              !in_array($word, $exceptions, true)
6994
          )
6995
      ) {
6996
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
6997
      }
6998
6999
      $newWords[] = $word;
7000
    }
7001
7002
    return implode('', $newWords);
7003
  }
7004
7005
  /**
7006
   * Multi decode html entity & fix urlencoded-win1252-chars.
7007
   *
7008
   * e.g:
7009
   * 'test+test'                     => 'test test'
7010
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7011
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7012
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7013
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7014
   * 'Düsseldorf'                   => 'Düsseldorf'
7015
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7016
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7017
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7018
   *
7019
   * @param string $str          <p>The input string.</p>
7020
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7021
   *
7022
   * @return string
7023
   */
7024 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7025
  {
7026
    $str = (string)$str;
7027
7028
    if (!isset($str[0])) {
7029
      return '';
7030
    }
7031
7032
    $pattern = '/%u([0-9a-f]{3,4})/i';
7033
    if (preg_match($pattern, $str)) {
7034
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7035
    }
7036
7037
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7038
7039
    do {
7040
      $str_compare = $str;
7041
7042
      $str = self::fix_simple_utf8(
7043
          urldecode(
7044
              self::html_entity_decode(
7045
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7046
                  $flags
7047
              )
7048
          )
7049
      );
7050
7051
    } while ($multi_decode === true && $str_compare !== $str);
7052
7053
    return (string)$str;
7054
  }
7055
7056
  /**
7057
   * Return a array with "urlencoded"-win1252 -> UTF-8
7058
   *
7059
   * @deprecated use the "UTF8::urldecode()" function to decode a string
7060
   *
7061
   * @return array
7062
   */
7063
  public static function urldecode_fix_win1252_chars()
7064
  {
7065
    return array(
7066
        '%20' => ' ',
7067
        '%21' => '!',
7068
        '%22' => '"',
7069
        '%23' => '#',
7070
        '%24' => '$',
7071
        '%25' => '%',
7072
        '%26' => '&',
7073
        '%27' => "'",
7074
        '%28' => '(',
7075
        '%29' => ')',
7076
        '%2A' => '*',
7077
        '%2B' => '+',
7078
        '%2C' => ',',
7079
        '%2D' => '-',
7080
        '%2E' => '.',
7081
        '%2F' => '/',
7082
        '%30' => '0',
7083
        '%31' => '1',
7084
        '%32' => '2',
7085
        '%33' => '3',
7086
        '%34' => '4',
7087
        '%35' => '5',
7088
        '%36' => '6',
7089
        '%37' => '7',
7090
        '%38' => '8',
7091
        '%39' => '9',
7092
        '%3A' => ':',
7093
        '%3B' => ';',
7094
        '%3C' => '<',
7095
        '%3D' => '=',
7096
        '%3E' => '>',
7097
        '%3F' => '?',
7098
        '%40' => '@',
7099
        '%41' => 'A',
7100
        '%42' => 'B',
7101
        '%43' => 'C',
7102
        '%44' => 'D',
7103
        '%45' => 'E',
7104
        '%46' => 'F',
7105
        '%47' => 'G',
7106
        '%48' => 'H',
7107
        '%49' => 'I',
7108
        '%4A' => 'J',
7109
        '%4B' => 'K',
7110
        '%4C' => 'L',
7111
        '%4D' => 'M',
7112
        '%4E' => 'N',
7113
        '%4F' => 'O',
7114
        '%50' => 'P',
7115
        '%51' => 'Q',
7116
        '%52' => 'R',
7117
        '%53' => 'S',
7118
        '%54' => 'T',
7119
        '%55' => 'U',
7120
        '%56' => 'V',
7121
        '%57' => 'W',
7122
        '%58' => 'X',
7123
        '%59' => 'Y',
7124
        '%5A' => 'Z',
7125
        '%5B' => '[',
7126
        '%5C' => '\\',
7127
        '%5D' => ']',
7128
        '%5E' => '^',
7129
        '%5F' => '_',
7130
        '%60' => '`',
7131
        '%61' => 'a',
7132
        '%62' => 'b',
7133
        '%63' => 'c',
7134
        '%64' => 'd',
7135
        '%65' => 'e',
7136
        '%66' => 'f',
7137
        '%67' => 'g',
7138
        '%68' => 'h',
7139
        '%69' => 'i',
7140
        '%6A' => 'j',
7141
        '%6B' => 'k',
7142
        '%6C' => 'l',
7143
        '%6D' => 'm',
7144
        '%6E' => 'n',
7145
        '%6F' => 'o',
7146
        '%70' => 'p',
7147
        '%71' => 'q',
7148
        '%72' => 'r',
7149
        '%73' => 's',
7150
        '%74' => 't',
7151
        '%75' => 'u',
7152
        '%76' => 'v',
7153
        '%77' => 'w',
7154
        '%78' => 'x',
7155
        '%79' => 'y',
7156
        '%7A' => 'z',
7157
        '%7B' => '{',
7158
        '%7C' => '|',
7159
        '%7D' => '}',
7160
        '%7E' => '~',
7161
        '%7F' => '',
7162
        '%80' => '`',
7163
        '%81' => '',
7164
        '%82' => '‚',
7165
        '%83' => 'ƒ',
7166
        '%84' => '„',
7167
        '%85' => '…',
7168
        '%86' => '†',
7169
        '%87' => '‡',
7170
        '%88' => 'ˆ',
7171
        '%89' => '‰',
7172
        '%8A' => 'Š',
7173
        '%8B' => '‹',
7174
        '%8C' => 'Œ',
7175
        '%8D' => '',
7176
        '%8E' => 'Ž',
7177
        '%8F' => '',
7178
        '%90' => '',
7179
        '%91' => '‘',
7180
        '%92' => '’',
7181
        '%93' => '“',
7182
        '%94' => '”',
7183
        '%95' => '•',
7184
        '%96' => '–',
7185
        '%97' => '—',
7186
        '%98' => '˜',
7187
        '%99' => '™',
7188
        '%9A' => 'š',
7189
        '%9B' => '›',
7190
        '%9C' => 'œ',
7191
        '%9D' => '',
7192
        '%9E' => 'ž',
7193
        '%9F' => 'Ÿ',
7194
        '%A0' => '',
7195
        '%A1' => '¡',
7196
        '%A2' => '¢',
7197
        '%A3' => '£',
7198
        '%A4' => '¤',
7199
        '%A5' => '¥',
7200
        '%A6' => '¦',
7201
        '%A7' => '§',
7202
        '%A8' => '¨',
7203
        '%A9' => '©',
7204
        '%AA' => 'ª',
7205
        '%AB' => '«',
7206
        '%AC' => '¬',
7207
        '%AD' => '',
7208
        '%AE' => '®',
7209
        '%AF' => '¯',
7210
        '%B0' => '°',
7211
        '%B1' => '±',
7212
        '%B2' => '²',
7213
        '%B3' => '³',
7214
        '%B4' => '´',
7215
        '%B5' => 'µ',
7216
        '%B6' => '¶',
7217
        '%B7' => '·',
7218
        '%B8' => '¸',
7219
        '%B9' => '¹',
7220
        '%BA' => 'º',
7221
        '%BB' => '»',
7222
        '%BC' => '¼',
7223
        '%BD' => '½',
7224
        '%BE' => '¾',
7225
        '%BF' => '¿',
7226
        '%C0' => 'À',
7227
        '%C1' => 'Á',
7228
        '%C2' => 'Â',
7229
        '%C3' => 'Ã',
7230
        '%C4' => 'Ä',
7231
        '%C5' => 'Å',
7232
        '%C6' => 'Æ',
7233
        '%C7' => 'Ç',
7234
        '%C8' => 'È',
7235
        '%C9' => 'É',
7236
        '%CA' => 'Ê',
7237
        '%CB' => 'Ë',
7238
        '%CC' => 'Ì',
7239
        '%CD' => 'Í',
7240
        '%CE' => 'Î',
7241
        '%CF' => 'Ï',
7242
        '%D0' => 'Ð',
7243
        '%D1' => 'Ñ',
7244
        '%D2' => 'Ò',
7245
        '%D3' => 'Ó',
7246
        '%D4' => 'Ô',
7247
        '%D5' => 'Õ',
7248
        '%D6' => 'Ö',
7249
        '%D7' => '×',
7250
        '%D8' => 'Ø',
7251
        '%D9' => 'Ù',
7252
        '%DA' => 'Ú',
7253
        '%DB' => 'Û',
7254
        '%DC' => 'Ü',
7255
        '%DD' => 'Ý',
7256
        '%DE' => 'Þ',
7257
        '%DF' => 'ß',
7258
        '%E0' => 'à',
7259
        '%E1' => 'á',
7260
        '%E2' => 'â',
7261
        '%E3' => 'ã',
7262
        '%E4' => 'ä',
7263
        '%E5' => 'å',
7264
        '%E6' => 'æ',
7265
        '%E7' => 'ç',
7266
        '%E8' => 'è',
7267
        '%E9' => 'é',
7268
        '%EA' => 'ê',
7269
        '%EB' => 'ë',
7270
        '%EC' => 'ì',
7271
        '%ED' => 'í',
7272
        '%EE' => 'î',
7273
        '%EF' => 'ï',
7274
        '%F0' => 'ð',
7275
        '%F1' => 'ñ',
7276
        '%F2' => 'ò',
7277
        '%F3' => 'ó',
7278
        '%F4' => 'ô',
7279
        '%F5' => 'õ',
7280
        '%F6' => 'ö',
7281
        '%F7' => '÷',
7282
        '%F8' => 'ø',
7283
        '%F9' => 'ù',
7284
        '%FA' => 'ú',
7285
        '%FB' => 'û',
7286
        '%FC' => 'ü',
7287
        '%FD' => 'ý',
7288
        '%FE' => 'þ',
7289
        '%FF' => 'ÿ',
7290
    );
7291
  }
7292
7293
  /**
7294
   * Decodes an UTF-8 string to ISO-8859-1.
7295
   *
7296
   * @param string $str <p>The input string.</p>
7297
   *
7298
   * @return string
7299
   */
7300
  public static function utf8_decode($str)
7301
  {
7302
    // init
7303
    $str = (string)$str;
7304
7305
    if (!isset($str[0])) {
7306
      return '';
7307
    }
7308
7309
    $str = (string)self::to_utf8($str);
7310
7311
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7312
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7313
7314
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7315
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7316
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7317
    }
7318
7319
    /** @noinspection PhpInternalEntityUsedInspection */
7320
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7321
7322
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7323
      self::checkForSupport();
7324
    }
7325
7326 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7327
      $len = \mb_strlen($str, '8BIT');
7328
    } else {
7329
      $len = strlen($str);
7330
    }
7331
7332
    /** @noinspection ForeachInvariantsInspection */
7333
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7334
      switch ($str[$i] & "\xF0") {
7335
        case "\xC0":
7336
        case "\xD0":
7337
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7338
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7339
          break;
7340
7341
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7342
          ++$i;
7343
        case "\xE0":
7344
          $str[$j] = '?';
7345
          $i += 2;
7346
          break;
7347
7348
        default:
7349
          $str[$j] = $str[$i];
7350
      }
7351
    }
7352
7353
    return self::substr($str, 0, $j, '8BIT');
7354
  }
7355
7356
  /**
7357
   * Encodes an ISO-8859-1 string to UTF-8.
7358
   *
7359
   * @param string $str <p>The input string.</p>
7360
   *
7361
   * @return string
7362
   */
7363
  public static function utf8_encode($str)
7364
  {
7365
    // init
7366
    $str = (string)$str;
7367
7368
    if (!isset($str[0])) {
7369
      return '';
7370
    }
7371
7372
    $str = \utf8_encode($str);
7373
7374
    if (false === strpos($str, "\xC2")) {
7375
      return $str;
7376
    } else {
7377
7378
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
7379
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
7380
7381
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7382
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7383
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7384
      }
7385
7386
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7387
    }
7388
  }
7389
7390
  /**
7391
   * fix -> utf8-win1252 chars
7392
   *
7393
   * @param string $str <p>The input string.</p>
7394
   *
7395
   * @return string
7396
   *
7397
   * @deprecated use "UTF8::fix_simple_utf8()"
7398
   */
7399
  public static function utf8_fix_win1252_chars($str)
7400
  {
7401
    return self::fix_simple_utf8($str);
7402
  }
7403
7404
  /**
7405
   * Returns an array with all utf8 whitespace characters.
7406
   *
7407
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7408
   *
7409
   * @author: Derek E. [email protected]
7410
   *
7411
   * @return array <p>
7412
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7413
   *               as defined in above URL.
7414
   *               </p>
7415
   */
7416
  public static function whitespace_table()
7417
  {
7418
    return self::$WHITESPACE_TABLE;
7419
  }
7420
7421
  /**
7422
   * Limit the number of words in a string.
7423
   *
7424
   * @param string $str      <p>The input string.</p>
7425
   * @param int    $words    <p>The limit of words as integer.</p>
7426
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7427
   *
7428
   * @return string
7429
   */
7430
  public static function words_limit($str, $words = 100, $strAddOn = '...')
7431
  {
7432
    $str = (string)$str;
7433
7434
    if (!isset($str[0])) {
7435
      return '';
7436
    }
7437
7438
    $words = (int)$words;
7439
7440
    if ($words < 1) {
7441
      return '';
7442
    }
7443
7444
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
7445
7446
    if (
7447
        !isset($matches[0])
7448
        ||
7449
        self::strlen($str) === self::strlen($matches[0])
7450
    ) {
7451
      return $str;
7452
    }
7453
7454
    return self::rtrim($matches[0]) . $strAddOn;
7455
  }
7456
7457
  /**
7458
   * Wraps a string to a given number of characters
7459
   *
7460
   * @link  http://php.net/manual/en/function.wordwrap.php
7461
   *
7462
   * @param string $str   <p>The input string.</p>
7463
   * @param int    $width [optional] <p>The column width.</p>
7464
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7465
   * @param bool   $cut   [optional] <p>
7466
   *                      If the cut is set to true, the string is
7467
   *                      always wrapped at or before the specified width. So if you have
7468
   *                      a word that is larger than the given width, it is broken apart.
7469
   *                      </p>
7470
   *
7471
   * @return string <p>The given string wrapped at the specified column.</p>
7472
   */
7473
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7474
  {
7475
    $str = (string)$str;
7476
    $break = (string)$break;
7477
7478
    if (!isset($str[0], $break[0])) {
7479
      return '';
7480
    }
7481
7482
    $w = '';
7483
    $strSplit = explode($break, $str);
7484
    $count = count($strSplit);
7485
7486
    $chars = array();
7487
    /** @noinspection ForeachInvariantsInspection */
7488
    for ($i = 0; $i < $count; ++$i) {
7489
7490
      if ($i) {
7491
        $chars[] = $break;
7492
        $w .= '#';
7493
      }
7494
7495
      $c = $strSplit[$i];
7496
      unset($strSplit[$i]);
7497
7498
      foreach (self::split($c) as $c) {
7499
        $chars[] = $c;
7500
        $w .= ' ' === $c ? ' ' : '?';
7501
      }
7502
    }
7503
7504
    $strReturn = '';
7505
    $j = 0;
7506
    $b = $i = -1;
7507
    $w = wordwrap($w, $width, '#', $cut);
7508
7509
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7510
      for (++$i; $i < $b; ++$i) {
7511
        $strReturn .= $chars[$j];
7512
        unset($chars[$j++]);
7513
      }
7514
7515
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7516
        unset($chars[$j++]);
7517
      }
7518
7519
      $strReturn .= $break;
7520
    }
7521
7522
    return $strReturn . implode('', $chars);
7523
  }
7524
7525
  /**
7526
   * Returns an array of Unicode White Space characters.
7527
   *
7528
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7529
   */
7530
  public static function ws()
7531
  {
7532
    return self::$WHITESPACE;
7533
  }
7534
7535
}
7536